diff --git a/community/agent-speech-processing.yaml b/community/agent-speech-processing.yaml index d2ea14d..942253a 100644 --- a/community/agent-speech-processing.yaml +++ b/community/agent-speech-processing.yaml @@ -1,5 +1,5 @@ # Docker image of the agent. -docker_image: docker.io/openvidu/agent-speech-processing:main +docker_image: docker.io/openvidu/agent-speech-processing-cloud:main # Whether to run the agent or not. enabled: false @@ -66,7 +66,7 @@ live_captions: # List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition. phrase_list: # Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior. - explicit_punctuation: + explicit_punctuation: azure_openai: # Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai @@ -318,4 +318,28 @@ live_captions: # - "en" # - "es" # Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context - context: \ No newline at end of file + context: + + vosk: + # Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk" + # Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models): + # - vosk-model-en-us-0.22-lgraph (English US) + # - vosk-model-small-cn-0.22 (Chinese) + # - vosk-model-small-de-0.15 (German) + # - vosk-model-small-en-in-0.4 (English India) + # - vosk-model-small-es-0.42 (Spanish) + # - vosk-model-small-fr-0.22 (French) + # - vosk-model-small-hi-0.22 (Hindi) + # - vosk-model-small-it-0.22 (Italian) + # - vosk-model-small-ja-0.22 (Japanese) + # - vosk-model-small-nl-0.22 (Dutch) + # - vosk-model-small-pt-0.3 (Portuguese) + # - vosk-model-small-ru-0.22 (Russian) + model: vosk-model-en-us-0.22-lgraph + # Language code for reference. It has no effect other than observability purposes. + # If a pre-installed "model" is declared, this will be set automatically if empty. + language: + # Audio sample rate in Hz. Default is 16000. + sample_rate: + # Whether to return interim/partial results during recognition. Default is true. + partial_results: diff --git a/pro/agent-speech-processing.yaml b/pro/agent-speech-processing.yaml index d2ea14d..942253a 100644 --- a/pro/agent-speech-processing.yaml +++ b/pro/agent-speech-processing.yaml @@ -1,5 +1,5 @@ # Docker image of the agent. -docker_image: docker.io/openvidu/agent-speech-processing:main +docker_image: docker.io/openvidu/agent-speech-processing-cloud:main # Whether to run the agent or not. enabled: false @@ -66,7 +66,7 @@ live_captions: # List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition. phrase_list: # Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior. - explicit_punctuation: + explicit_punctuation: azure_openai: # Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai @@ -318,4 +318,28 @@ live_captions: # - "en" # - "es" # Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context - context: \ No newline at end of file + context: + + vosk: + # Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk" + # Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models): + # - vosk-model-en-us-0.22-lgraph (English US) + # - vosk-model-small-cn-0.22 (Chinese) + # - vosk-model-small-de-0.15 (German) + # - vosk-model-small-en-in-0.4 (English India) + # - vosk-model-small-es-0.42 (Spanish) + # - vosk-model-small-fr-0.22 (French) + # - vosk-model-small-hi-0.22 (Hindi) + # - vosk-model-small-it-0.22 (Italian) + # - vosk-model-small-ja-0.22 (Japanese) + # - vosk-model-small-nl-0.22 (Dutch) + # - vosk-model-small-pt-0.3 (Portuguese) + # - vosk-model-small-ru-0.22 (Russian) + model: vosk-model-en-us-0.22-lgraph + # Language code for reference. It has no effect other than observability purposes. + # If a pre-installed "model" is declared, this will be set automatically if empty. + language: + # Audio sample rate in Hz. Default is 16000. + sample_rate: + # Whether to return interim/partial results during recognition. Default is true. + partial_results: