Add nvidia and vosk live_captions providers to YAML
This commit is contained in:
parent
d51a1b2cdf
commit
2ea399dc42
@ -16,7 +16,7 @@ live_captions:
|
||||
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
|
||||
processing: automatic
|
||||
|
||||
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox]
|
||||
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox, nvidia, vosk]
|
||||
# The custom configuration for the selected provider must be set below
|
||||
provider:
|
||||
|
||||
@ -320,6 +320,28 @@ live_captions:
|
||||
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
|
||||
context:
|
||||
|
||||
nvidia:
|
||||
# API key for NVIDIA. See https://build.nvidia.com/explore/speech?integrate_nim=true&hosted_api=true&modal=integrate-nim
|
||||
# Required when using NVIDIA's cloud services. To use a self-hosted NVIDIA Riva server setup "server" and "use_ssl" instead.
|
||||
api_key:
|
||||
# The NVIDIA Riva ASR model to use. Default is "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
|
||||
# See available models: https://build.nvidia.com/search/models?filters=usecase%3Ausecase_speech_to_text
|
||||
model:
|
||||
# The NVIDIA function ID for the model. Default is "1598d209-5e27-4d3c-8079-4751568b1081"
|
||||
function_id:
|
||||
# Whether to add punctuation to transcription results. Default is true.
|
||||
punctuate:
|
||||
# The language code for transcription. Default is "en-US"
|
||||
language_code:
|
||||
# Audio sample rate in Hz. Default is 16000.
|
||||
sample_rate:
|
||||
# The NVIDIA Riva server address. Default is "grpc.nvcf.nvidia.com:443"
|
||||
# For self-hosted NIM, use your server address (e.g., "localhost:50051")
|
||||
server:
|
||||
# Whether to use SSL for the connection. Default is true.
|
||||
# Set to false for locally hosted Riva NIM services without SSL.
|
||||
use_ssl:
|
||||
|
||||
vosk:
|
||||
# Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
|
||||
# Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
|
||||
|
||||
@ -16,7 +16,7 @@ live_captions:
|
||||
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
|
||||
processing: automatic
|
||||
|
||||
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox]
|
||||
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox, nvidia, vosk]
|
||||
# The custom configuration for the selected provider must be set below
|
||||
provider:
|
||||
|
||||
@ -320,6 +320,28 @@ live_captions:
|
||||
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
|
||||
context:
|
||||
|
||||
nvidia:
|
||||
# API key for NVIDIA. See https://build.nvidia.com/explore/speech?integrate_nim=true&hosted_api=true&modal=integrate-nim
|
||||
# Required when using NVIDIA's cloud services. To use a self-hosted NVIDIA Riva server setup "server" and "use_ssl" instead.
|
||||
api_key:
|
||||
# The NVIDIA Riva ASR model to use. Default is "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
|
||||
# See available models: https://build.nvidia.com/search/models?filters=usecase%3Ausecase_speech_to_text
|
||||
model:
|
||||
# The NVIDIA function ID for the model. Default is "1598d209-5e27-4d3c-8079-4751568b1081"
|
||||
function_id:
|
||||
# Whether to add punctuation to transcription results. Default is true.
|
||||
punctuate:
|
||||
# The language code for transcription. Default is "en-US"
|
||||
language_code:
|
||||
# Audio sample rate in Hz. Default is 16000.
|
||||
sample_rate:
|
||||
# The NVIDIA Riva server address. Default is "grpc.nvcf.nvidia.com:443"
|
||||
# For self-hosted NIM, use your server address (e.g., "localhost:50051")
|
||||
server:
|
||||
# Whether to use SSL for the connection. Default is true.
|
||||
# Set to false for locally hosted Riva NIM services without SSL.
|
||||
use_ssl:
|
||||
|
||||
vosk:
|
||||
# Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
|
||||
# Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user