Update agent-speech-processing.yaml

2026-01-19 14:07:35 +01:00 · 2026-01-19 14:07:35 +01:00 · d51a1b2cdf
commit d51a1b2cdf
parent 7c8908707b
2 changed files with 54 additions and 6 deletions
--- a/community/agent-speech-processing.yaml
+++ b/community/agent-speech-processing.yaml
@ -1,5 +1,5 @@
 # Docker image of the agent.
-docker_image: docker.io/openvidu/agent-speech-processing:main
+docker_image: docker.io/openvidu/agent-speech-processing-cloud:main

 # Whether to run the agent or not.
 enabled: false
@ -66,7 +66,7 @@ live_captions:
    # List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
    phrase_list:
    # Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
-    explicit_punctuation: 
+    explicit_punctuation:

  azure_openai:
    # Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
@ -318,4 +318,28 @@ live_captions:
      # - "en"
      # - "es"
    # Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
-    context:
+    context:
+
+  vosk:
+    # Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
+    # Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
+    # - vosk-model-en-us-0.22-lgraph (English US)
+    # - vosk-model-small-cn-0.22 (Chinese)
+    # - vosk-model-small-de-0.15 (German)
+    # - vosk-model-small-en-in-0.4 (English India)
+    # - vosk-model-small-es-0.42 (Spanish)
+    # - vosk-model-small-fr-0.22 (French)
+    # - vosk-model-small-hi-0.22 (Hindi)
+    # - vosk-model-small-it-0.22 (Italian)
+    # - vosk-model-small-ja-0.22 (Japanese)
+    # - vosk-model-small-nl-0.22 (Dutch)
+    # - vosk-model-small-pt-0.3 (Portuguese)
+    # - vosk-model-small-ru-0.22 (Russian)
+    model: vosk-model-en-us-0.22-lgraph
+    # Language code for reference. It has no effect other than observability purposes.
+    # If a pre-installed "model" is declared, this will be set automatically if empty.
+    language:
+    # Audio sample rate in Hz. Default is 16000.
+    sample_rate:
+    # Whether to return interim/partial results during recognition. Default is true.
+    partial_results:
--- a/pro/agent-speech-processing.yaml
+++ b/pro/agent-speech-processing.yaml
@ -1,5 +1,5 @@
 # Docker image of the agent.
-docker_image: docker.io/openvidu/agent-speech-processing:main
+docker_image: docker.io/openvidu/agent-speech-processing-cloud:main

 # Whether to run the agent or not.
 enabled: false
@ -66,7 +66,7 @@ live_captions:
    # List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
    phrase_list:
    # Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
-    explicit_punctuation: 
+    explicit_punctuation:

  azure_openai:
    # Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
@ -318,4 +318,28 @@ live_captions:
      # - "en"
      # - "es"
    # Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
-    context:
+    context:
+
+  vosk:
+    # Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
+    # Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
+    # - vosk-model-en-us-0.22-lgraph (English US)
+    # - vosk-model-small-cn-0.22 (Chinese)
+    # - vosk-model-small-de-0.15 (German)
+    # - vosk-model-small-en-in-0.4 (English India)
+    # - vosk-model-small-es-0.42 (Spanish)
+    # - vosk-model-small-fr-0.22 (French)
+    # - vosk-model-small-hi-0.22 (Hindi)
+    # - vosk-model-small-it-0.22 (Italian)
+    # - vosk-model-small-ja-0.22 (Japanese)
+    # - vosk-model-small-nl-0.22 (Dutch)
+    # - vosk-model-small-pt-0.3 (Portuguese)
+    # - vosk-model-small-ru-0.22 (Russian)
+    model: vosk-model-en-us-0.22-lgraph
+    # Language code for reference. It has no effect other than observability purposes.
+    # If a pre-installed "model" is declared, this will be set automatically if empty.
+    language:
+    # Audio sample rate in Hz. Default is 16000.
+    sample_rate:
+    # Whether to return interim/partial results during recognition. Default is true.
+    partial_results: