modulepackage
4.1.0+incompatible
Repository: https://github.com/andrewyang17/goeagi.git
Documentation: pkg.go.dev
# README

GoEAGI
A Go library designed to seamlessly integrate with Asterisk's EAGI, offering essential functionalities for enhanced interaction and communication.
Report Bug · Request Feature
Features
- Audio Streaming
- Google's Text to Speech
- Google's Speech to Text
- Microsoft Azure's Speech to Text
- Vosk server Speech to Text
- Voice Activity Detection
- Speech File Generation
- Commands to Asterisk
Example Usage
Google Text to Speech
- Render text to speech and play it back to the user.
- You may refer the language code and voice name here.
- Example dialplan code:
;GoogleTTS, playback message to the user
exten => 1234,1,Answer
exten => 1234,n,AGI(<build-script>, "What's up my buddy? how are you?", "en-GB", "en-GB-Neural2-A")
exten => 1234,n,Hangup
- Example Go code:
package main
import (
"strings"
"github.com/andrewyang17/goEagi"
)
func main() {
eagi, err := goEagi.New()
if err != nil {
os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
content := strings.TrimSpace(eagi.Env["arg_1"])
languageCode := strings.TrimSpace(eagi.Env["arg_2"])
voiceName := strings.TrimSpace(eagi.Env["arg_3"])
tts, err := goEagi.NewGoogleTTS(
"<GoogleSpeechToTextPrivateKey>",
"/tmp/tts",
languageCode,
voiceName)
if err != nil {
eagi.Verbose(err.Error())
}
audioPath, err := tts.GenerateAudio(content)
if err != nil {
eagi.Verbose(err.Error())
}
_, err = eagi.StreamFile(audioPath, "")
if err != nil {
eagi.Verbose(err.Error())
}
}
Google Speech to Text
package main
import (
"context"
"fmt"
"github.com/andrewyang17/goEagi"
)
func main() {
eagi, err := goEagi.New()
if err != nil {
os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
googleService, err := goEagi.NewGoogleService("<GoogleSpeechToTextPrivateKey>", "<languageCode>", nil)
if err != nil {
eagi.Verbose(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
defer googleService.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
bridgeStream := make(chan []byte)
audioStream := goEagi.StreamAudio(ctx)
errCh := googleService.StartStreaming(ctx, bridgeStream)
googleResponseCh := googleService.SpeechToTextResponse(ctx)
go func(ctx context.Context, eagi *goEagi.Eagi) {
for {
select {
case <-ctx.Done(): return
case audio := <-audioStream:
if audio.Error != nil {
eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
cancel()
return
}
bridgeStream <- audio.Stream
}
}
}(ctx, eagi)
for {
select {
case <-ctx.Done(): return
case err := <-errCh:
eagi.Verbose(fmt.Sprintf("Google speech to text response: G error: %v", err))
cancel()
return
case response := <-googleResponseCh:
if response.Error != nil {
eagi.Verbose(fmt.Sprintf("Google speech to text response: G error: %v", response.Error))
cancel()
return
}
transcription := response.Result.Alternatives[0].Transcript
isFinal := response.Result.IsFinal
eagi.Verbose(fmt.Sprintf("IsFinal: %v, Transcription: %v\n", isFinal, transcription))
}
}
}
Microsoft Azure Speech to Text
- Prerequisite - install the Speech SDK
- Carefully read the Speech SDK documentation and verify the platform requirements to ensure compatibility with your Asterisk server.
- If it is not possible to install the Speech SDK on your Asterisk server, you can install it on a different machine and stream the audio from your Asterisk server to the Speech SDK.
- For Azure Speech to Text, you need to enable "CGO_ENABLED" flag and build the project with the tag "azure", as shown below:
CGO_ENABLED=1 go build -tags azure main.go
package main
import (
"context"
"fmt"
"os"
"github.com/andrewyang17/goEagi"
)
func main() {
eagi, err := goEagi.New()
if err != nil {
os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
azureService, err := goEagi.NewAzureService("<subscriptionKey>", "serviceRegion", "", []string{"...<language_code>"})
if err != nil {
eagi.Verbose(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
defer azureService.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
bridgeStream := make(chan []byte)
audioStream := goEagi.StreamAudio(ctx)
errCh := azureService.StartStreaming(ctx, bridgeStream)
azureResponseCh := azureService.SpeechToTextResponse(ctx)
go func(ctx context.Context, eagi *goEagi.Eagi) {
for {
select {
case <-ctx.Done(): return
case audio := <-audioStream:
if audio.Error != nil {
eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
cancel()
return
}
bridgeStream <- audio.Stream
}
}
}(ctx, eagi)
for {
select {
case <-ctx.Done(): return
case err := <-errCh:
eagi.Verbose(fmt.Sprintf("Azure speech to text response: G error: %v", err))
cancel()
return
case response := <-azureResponseCh:
if response.Error != nil {
eagi.Verbose(fmt.Sprintf("Azure speech to text response: G error: %v", response.Error))
cancel()
return
}
if response.Info != "" {
eagi.Verbose(fmt.Sprintf("Info: %v", response.Info))
continue
}
eagi.Verbose(fmt.Sprintf("IsFinal: %v, Transcription: %v\n", response.IsFinal, response.Transcription))
}
}
}
Vosk
- prerequisite - run the vosk server
docker run -d -p 2700:2700 alphacep/kaldi-en:latest
package main
import (
"context"
"fmt"
"os"
"github.com/andrewyang17/goEagi"
)
func main() {
eagi, err := goEagi.New()
if err != nil {
os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
//use phraseList to list the valid phrases/words.
//notes
// * if you use a phrase list, Vosk will only detect these words, ignoring any other word
// * some Vosk models doesn't support phrase list (I tested with spanish)
// * to disable phrase list, leave phraseList empty
voskService, err := goEagi.NewVoskService("<voskHost>", "<voskPort>", nil)
if err != nil {
eagi.Verbose(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
defer voskService.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
bridgeStream := make(chan []byte)
defer close(bridgeStream)
audioStream := goEagi.StreamAudio(ctx)
errCh := voskService.StartStreaming(ctx, bridgeStream)
voskResponseCh := voskService.SpeechToTextResponse(ctx)
go func(ctx context.Context, eagi *goEagi.Eagi) {
for {
select {
case <-ctx.Done(): return
case audio := <-audioStream:
if audio.Error != nil {
eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
cancel()
return
}
bridgeStream <- audio.Stream
}
}
}(ctx, eagi)
for {
select {
case <-ctx.Done(): return
case err := <-errCh:
eagi.Verbose(fmt.Sprintf("Vosk speech to text response: G error: %v", err))
cancel()
return
case response := <-voskResponseCh:
// you will receive partial data in v.Partial and, if the full text was recognized, you will receive v.Text.
eagi.Verbose(fmt.Sprintf("Transcription: %v\n", response.Text))
}
}
}
Contributing
Made with contrib.rocks
Contributions are always welcome!
License
MIT License, see LICENSE.
Contact
Andrew Yang - [email protected]
Project Link: https://github.com/andrewyang17/goEagi
Acknowledgements
We would like to express our gratitude to the authors and contributors of the following open-source libraries, which were used in this project:
- cloud.google.com/go/speech: Developed by Google
- github.com/Microsoft/cognitive-services-speech-sdk-go: Developed by Microsoft
- github.com/cryptix/wav: Developed by Henry Cryptix
- github.com/zaf/agi: Developed by Lefteris Zafiris
- github.com/gorilla/websocket: Developed by Gorilla
# Functions
ComputeAmplitude analyzes the amplitude of a sample slice of bytes.
GenerateAudio writes a sample slice of bytes into an audio file.
No description provided by the author
NewGoogleService creates a new GoogleService instance, it takes a privateKeyPath and set it in environment with key GOOGLE_APPLICATION_CREDENTIALS, a languageCode, example ["en-GB", "en-US", "ch", ...], see (https://cloud.google.com/speech-to-text/docs/languages), and a speech context, see (https://cloud.google.com/speech-to-text/docs/speech-adaptation).
No description provided by the author
NewVad is a constructor of Vad.
NewVoskService creates a new VoskService.
StreamAudio launches a new goroutine for audio streaming via file descriptor 3.
# Structs
No description provided by the author
No description provided by the author
GoogleResult is a struct that contains transcription result from Google Speech to Text service.
GoogleService is used to stream audio data to Google Speech to Text service.
No description provided by the author
No description provided by the author
No description provided by the author
VoskResult is the response from Vosk Speech Recognizer.
VoskService is the client for Vosk Speech Recognizer.