Categorygithub.com/andrewyang17/goEagi

modulepackage

4.1.0+incompatible

Repository: https://github.com/andrewyang17/goeagi.git

Documentation: pkg.go.dev

GoEAGI

A Go library designed to seamlessly integrate with Asterisk's EAGI, offering essential functionalities for enhanced interaction and communication.

Features

Audio Streaming
Google's Text to Speech
Google's Speech to Text
Microsoft Azure's Speech to Text
Vosk server Speech to Text
Voice Activity Detection
Speech File Generation
Commands to Asterisk

Example Usage

Google Text to Speech

Render text to speech and play it back to the user.
You may refer the language code and voice name here.
Example dialplan code:

;GoogleTTS, playback message to the user
exten => 1234,1,Answer
exten => 1234,n,AGI(<build-script>, "What's up my buddy? how are you?", "en-GB", "en-GB-Neural2-A")
exten => 1234,n,Hangup

Example Go code:

package main

import (
	"strings"
	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}

	content := strings.TrimSpace(eagi.Env["arg_1"])
	languageCode := strings.TrimSpace(eagi.Env["arg_2"])
	voiceName := strings.TrimSpace(eagi.Env["arg_3"])

	tts, err := goEagi.NewGoogleTTS(
		"<GoogleSpeechToTextPrivateKey>",
		"/tmp/tts", 
		languageCode, 
		voiceName)
	if err != nil {
		eagi.Verbose(err.Error())
	}

	audioPath, err := tts.GenerateAudio(content)
	if err != nil {
		eagi.Verbose(err.Error())
	}

	_, err = eagi.StreamFile(audioPath, "")
	if err != nil {
		eagi.Verbose(err.Error())
	}
}

Google Speech to Text

package main

import (
	"context"
	"fmt"
	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	
	googleService, err := goEagi.NewGoogleService("<GoogleSpeechToTextPrivateKey>", "<languageCode>", nil)
	if err != nil {
		eagi.Verbose(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	defer googleService.Close()
	
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)

	audioStream := goEagi.StreamAudio(ctx)
	errCh := googleService.StartStreaming(ctx, bridgeStream)
	googleResponseCh := googleService.SpeechToTextResponse(ctx)

	go func(ctx context.Context, eagi *goEagi.Eagi) {
		for {
			select {
			case <-ctx.Done(): return

			case audio := <-audioStream:
				if audio.Error != nil {
					eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
					cancel()
					return
				}
				bridgeStream <- audio.Stream
			}
		}
	}(ctx, eagi)
	
	for {
		select {
		case <-ctx.Done(): return
			
		case err := <-errCh:
			eagi.Verbose(fmt.Sprintf("Google speech to text response: G error: %v", err))
			cancel()
			return

		case response := <-googleResponseCh:
			if response.Error != nil {
				eagi.Verbose(fmt.Sprintf("Google speech to text response: G error: %v", response.Error))
				cancel()
				return
			}

			transcription := response.Result.Alternatives[0].Transcript
			isFinal := response.Result.IsFinal

			eagi.Verbose(fmt.Sprintf("IsFinal: %v, Transcription: %v\n", isFinal, transcription))
		}
	}
	
}

Microsoft Azure Speech to Text

Prerequisite - install the Speech SDK
Carefully read the Speech SDK documentation and verify the platform requirements to ensure compatibility with your Asterisk server.
If it is not possible to install the Speech SDK on your Asterisk server, you can install it on a different machine and stream the audio from your Asterisk server to the Speech SDK.
For Azure Speech to Text, you need to enable "CGO_ENABLED" flag and build the project with the tag "azure", as shown below:

CGO_ENABLED=1 go build -tags azure main.go

package main

import (
	"context"
	"fmt"
	"os"

	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}

	azureService, err := goEagi.NewAzureService("<subscriptionKey>", "serviceRegion", "", []string{"...<language_code>"})
	if err != nil {
		eagi.Verbose(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	defer azureService.Close()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)

	audioStream := goEagi.StreamAudio(ctx)
	errCh := azureService.StartStreaming(ctx, bridgeStream)
	azureResponseCh := azureService.SpeechToTextResponse(ctx)

	go func(ctx context.Context, eagi *goEagi.Eagi) {
		for {
			select {
			case <-ctx.Done(): return

			case audio := <-audioStream:
				if audio.Error != nil {
					eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
					cancel()
					return
				}
				bridgeStream <- audio.Stream
			}
		}
	}(ctx, eagi)
	
	for {
		select {
		case <-ctx.Done(): return
			
		case err := <-errCh:
			eagi.Verbose(fmt.Sprintf("Azure speech to text response: G error: %v", err))
			cancel()
			return

		case response := <-azureResponseCh:
			if response.Error != nil {
				eagi.Verbose(fmt.Sprintf("Azure speech to text response: G error: %v", response.Error))
				cancel()
				return
			}

			if response.Info != "" {
				eagi.Verbose(fmt.Sprintf("Info: %v", response.Info))
				continue
			}

			eagi.Verbose(fmt.Sprintf("IsFinal: %v, Transcription: %v\n", response.IsFinal, response.Transcription))
		}
	}
}

Vosk

prerequisite - run the vosk server

docker run -d -p 2700:2700 alphacep/kaldi-en:latest

package main

import (
	"context"
	"fmt"
	"os"

	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}

	//use phraseList to list the valid phrases/words. 
	//notes
	//	* if you use a phrase list, Vosk will only detect these words, ignoring any other word
	//	* some Vosk models doesn't support phrase list (I tested with spanish)
	//  * to disable phrase list, leave phraseList empty
	voskService, err := goEagi.NewVoskService("<voskHost>", "<voskPort>", nil)
	if err != nil {
		eagi.Verbose(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	defer voskService.Close()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)
	defer close(bridgeStream)

	audioStream := goEagi.StreamAudio(ctx)
	errCh := voskService.StartStreaming(ctx, bridgeStream)
	voskResponseCh := voskService.SpeechToTextResponse(ctx)

	go func(ctx context.Context, eagi *goEagi.Eagi) {
		for {
			select {
			case <-ctx.Done(): return

			case audio := <-audioStream:
				if audio.Error != nil {
					eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
					cancel()
					return
				}
				bridgeStream <- audio.Stream
			}
		}
	}(ctx, eagi)

	for {
		select {
		case <-ctx.Done(): return
			
		case err := <-errCh:
			eagi.Verbose(fmt.Sprintf("Vosk speech to text response: G error: %v", err))
			cancel()
			return

		case response := <-voskResponseCh:
			// you will receive partial data in v.Partial and, if the full text was recognized, you will receive v.Text.
			eagi.Verbose(fmt.Sprintf("Transcription: %v\n", response.Text))
		}
	}
}

Contributing

Made with contrib.rocks

Contributions are always welcome!

License

MIT License, see LICENSE.

Contact

Andrew Yang - [email protected]

Project Link: https://github.com/andrewyang17/goEagi

Acknowledgements

We would like to express our gratitude to the authors and contributors of the following open-source libraries, which were used in this project:

cloud.google.com/go/speech: Developed by Google
github.com/Microsoft/cognitive-services-speech-sdk-go: Developed by Microsoft
github.com/cryptix/wav: Developed by Henry Cryptix
github.com/zaf/agi: Developed by Lefteris Zafiris
github.com/gorilla/websocket: Developed by Gorilla

# Functions

ComputeAmplitude

ComputeAmplitude analyzes the amplitude of a sample slice of bytes.

GenerateAudio

GenerateAudio writes a sample slice of bytes into an audio file.

New

No description provided by the author

NewGoogleService

NewGoogleService creates a new GoogleService instance, it takes a privateKeyPath and set it in environment with key GOOGLE_APPLICATION_CREDENTIALS, a languageCode, example ["en-GB", "en-US", "ch", ...], see (https://cloud.google.com/speech-to-text/docs/languages), and a speech context, see (https://cloud.google.com/speech-to-text/docs/speech-adaptation).

NewGoogleTTS

No description provided by the author

NewVad

NewVad is a constructor of Vad.

NewVoskService

NewVoskService creates a new VoskService.

StreamAudio

StreamAudio launches a new goroutine for audio streaming via file descriptor 3.