Compare commits

...

18 Commits
3.5.0 ... main

Author SHA1 Message Date
GitHub Actions
3425341e81 Bump to version 3.6.0 2026-03-06 12:05:23 +00:00
pabloFuente
8a4923e4a8 Add sherpa.provider YAML property to agent-speech-processing.yaml 2026-03-04 13:09:53 +01:00
pabloFuente
ea27ec1f96 Minor update of YAML comment 2026-02-25 18:30:18 +01:00
cruizba
c145494bbd Change speech processing default from automatic to manual
The agent will now only connect to Rooms when explicitly requested
via the Agent Dispatch API instead of auto-connecting to all new Rooms.
2026-02-25 18:22:19 +01:00
cruizba
9bff284b8f Bump Docker image versions for Redis, Minio and MongoDB
- Redis: 8.2.2-alpine -> 8.6.1-alpine
- Minio: 2025.9.7-debian-12-r3 -> 2025.10.15-debian-12-r9
- MongoDB: 8.0.15-r0 -> 8.0.19-r1
2026-02-24 15:29:10 +01:00
pabloFuente
1724fa5c18 Add use_global_cpu_monitoring and min_disk_space_mb to egress config 2026-02-20 14:10:57 +01:00
pabloFuente
3d06d98ea0 Beautify docker-compose.yaml 2026-02-18 13:19:34 +01:00
pabloFuente
ff54026aad Default live_captions provider to vosk 2026-02-12 12:33:31 +01:00
cruizba
c778720ba5 Add meet.env file to caddy-proxy and ready-check which needs MEET_INITIAL_* env variables. 2026-02-09 20:03:23 +01:00
cruizba
0422cbd8c2 Refactor OpenVidu Meet configuration: consolidate environment variables into meet.env and update docker-compose to use new configuration structure 2026-02-09 19:49:59 +01:00
pabloFuente
7c22e68ab5 Update agent-speech-processing.yaml files 2026-02-06 10:57:12 +01:00
cruizba
1fd49f308c Add MEET_BASE_PATH environment variable to configuration files 2026-02-05 19:12:34 +01:00
pabloFuente
8fc6edaa87 Fix agent-speech-processing.yaml files sherpa model names 2026-02-04 17:04:24 +01:00
pabloFuente
803dfbbfa8 Updated agent-speech-processing.yaml files with new providers 2026-02-04 16:39:15 +01:00
cruizba
0ee45ec06f Route 9080 through caddy to redir to /meet 2026-02-02 22:00:22 +01:00
pabloFuente
2ea399dc42 Add nvidia and vosk live_captions providers to YAML 2026-01-21 11:48:59 +01:00
pabloFuente
d51a1b2cdf Update agent-speech-processing.yaml 2026-01-19 14:07:35 +01:00
cruizba
7c8908707b Revert to main version 2026-01-10 03:08:48 +01:00
10 changed files with 388 additions and 110 deletions

View File

@ -30,7 +30,5 @@ MINIO_SECRET_KEY=minioadmin
MONGO_ADMIN_USERNAME=mongoadmin
MONGO_ADMIN_PASSWORD=mongoadmin
# OpenVidu Meet configuration.
MEET_INITIAL_ADMIN_USER=admin
MEET_INITIAL_ADMIN_PASSWORD=admin
MEET_INITIAL_API_KEY=meet-api-key
# OpenVidu Meet base path
MEET_BASE_PATH=/meet

View File

@ -1,5 +1,5 @@
# Docker image of the agent.
docker_image: docker.io/openvidu/agent-speech-processing:3.5.0
docker_image: docker.io/openvidu/agent-speech-processing-vosk:3.6.0
# Whether to run the agent or not.
enabled: false
@ -7,18 +7,18 @@ enabled: false
# Maximum CPU load threshold for the agent to accept new jobs. Value between 0 and 1.
load_threshold: 1.0
# Log level for the agent [DEBUG, INFO, WARNING, ERROR, CRITICAL]
# Log level for the agent [DEBUG, INFO, WARN, ERROR, CRITICAL]
log_level: INFO
live_captions:
# How this agent will connect to Rooms [automatic, manual]
# - automatic: the agent will automatically connect to new Rooms.
# How this agent will connect to Rooms [manual, automatic]
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
processing: automatic
# - automatic: the agent will automatically connect to new Rooms.
processing: manual
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox]
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox, nvidia, elevenlabs, simplismart, vosk, sherpa]
# The custom configuration for the selected provider must be set below
provider:
provider: vosk
aws:
# Credentials for AWS Transcribe. See https://docs.aws.amazon.com/transcribe/latest/dg/what-is.html
@ -66,7 +66,7 @@ live_captions:
# List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
phrase_list:
# Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
explicit_punctuation:
explicit_punctuation:
azure_openai:
# Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
@ -318,4 +318,120 @@ live_captions:
# - "en"
# - "es"
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
context:
context:
nvidia:
# API key for NVIDIA. See https://build.nvidia.com/explore/speech?integrate_nim=true&hosted_api=true&modal=integrate-nim
# Required when using NVIDIA's cloud services. To use a self-hosted NVIDIA Riva server setup "server" and "use_ssl" instead.
api_key:
# The NVIDIA Riva ASR model to use. Default is "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
# See available models: https://build.nvidia.com/search/models?filters=usecase%3Ausecase_speech_to_text
model:
# The NVIDIA function ID for the model. Default is "1598d209-5e27-4d3c-8079-4751568b1081"
function_id:
# Whether to add punctuation to transcription results. Default is true.
punctuate:
# The language code for transcription. Default is "en-US"
language_code:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# The NVIDIA Riva server address. Default is "grpc.nvcf.nvidia.com:443"
# For self-hosted NIM, use your server address (e.g., "localhost:50051")
server:
# Whether to use SSL for the connection. Default is true.
# Set to false for locally hosted Riva NIM services without SSL.
use_ssl:
spitch:
# API key for Spitch. See https://docs.spitch.app/keys
api_key:
# Language short code for the generated speech. For supported values, see https://docs.spitch.app/concepts/languages
language:
elevenlabs:
# API key for ElevenLabs. See https://elevenlabs.io/app/settings/api-keys
api_key:
# The ElevenLabs STT model to use. Valid values are ["scribe_v1", "scribe_v2", "scribe_v2_realtime"]. See https://elevenlabs.io/docs/overview/models#models-overview
model_id:
# An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically
language_code:
# Custom base URL for the API. Optional.
base_url:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Only supported for Scribe v1 model. Default is True
tag_audio_events:
# Whether to include word-level timestamps in the transcription. Default is false.
include_timestamps:
simplismart:
# API key for SimpliSmart. See https://docs.simplismart.ai/model-suite/settings/api-keys
api_key:
# Model identifier for the backend STT model. One of ["openai/whisper-large-v2", "openai/whisper-large-v3", "openai/whisper-large-v3-turbo"]
# Default is "openai/whisper-large-v3-turbo"
model:
# Language code for transcription (default: "en"). See https://docs.simplismart.ai/get-started/playground/transcription-models#supported-languages-with-their-codes
language:
# Operation to perform. "transcribe" converts speech to text in the original language, "translate" translates into English. Default is "transcribe".
task:
# If true, disables timestamp generation in transcripts. Default is true
without_timestamps:
# Minimum duration (ms) for a valid speech segment. Default is 0
min_speech_duration_ms:
# Decoding temperature (affects randomness). Default is 0.0
temperature:
# Whether to permit multilingual recognition. Default is false
multilingual:
vosk:
# Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
# Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
# - vosk-model-en-us-0.22-lgraph (English US)
# - vosk-model-small-cn-0.22 (Chinese)
# - vosk-model-small-de-0.15 (German)
# - vosk-model-small-en-in-0.4 (English India)
# - vosk-model-small-es-0.42 (Spanish)
# - vosk-model-small-fr-0.22 (French)
# - vosk-model-small-hi-0.22 (Hindi)
# - vosk-model-small-it-0.22 (Italian)
# - vosk-model-small-ja-0.22 (Japanese)
# - vosk-model-small-nl-0.22 (Dutch)
# - vosk-model-small-pt-0.3 (Portuguese)
# - vosk-model-small-ru-0.22 (Russian)
model: vosk-model-en-us-0.22-lgraph
# Language code for reference. It has no effect other than observability purposes.
# If a pre-installed "model" is declared, this will be set automatically if empty.
language:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Whether to override Vosk's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false
sherpa:
# sherpa streaming model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-sherpa"
# Below is the list of pre-installed models in the container (available at https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models):
# - sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06 (English)
# - sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06 (Spanish)
# - sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06 (German)
# - sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06 (French)
# - sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10 (Multilingual: Arabic, English, Indonesian, Japanese, Russian, Thai, Vietnamese, Chinese)
model: sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06
# Language code for reference. Auto-detected from model name if not set.
language:
# Runtime provider for sherpa-onnx. Supported values: "cpu" or "cuda". Default is "cpu".
# Learn about GPU acceleration at https://openvidu.io/docs/ai/live-captions/#gpu-acceleration-for-sherpa-provider
provider:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Number of threads for ONNX Runtime. Default is 2.
num_threads:
# Recognizer type ("transducer", "paraformer", "zipformer_ctc", "nemo_ctc", "t_one_ctc"). Auto-detected from model name if not set.
recognizer_type:
# Decoding method ("greedy_search", "modified_beam_search"). Default is "greedy_search".
decoding_method:
# Whether to override sherpa's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false

View File

@ -1,6 +1,6 @@
services:
caddy-proxy:
image: docker.io/openvidu/openvidu-caddy-local:3.5.0
image: docker.io/openvidu/openvidu-caddy-local:3.6.0
container_name: caddy-proxy
restart: unless-stopped
extra_hosts:
@ -16,9 +16,9 @@ services:
- DASHBOARD_ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- MEET_INITIAL_ADMIN_USER=${MEET_INITIAL_ADMIN_USER:-}
- MEET_INITIAL_ADMIN_PASSWORD=${MEET_INITIAL_ADMIN_PASSWORD:-}
- MEET_INITIAL_API_KEY=${MEET_INITIAL_API_KEY:-}
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
env_file:
- ./meet.env
volumes:
- ./custom-layout:/var/www/custom-layout
- /etc/localtime:/etc/localtime:ro
@ -28,12 +28,13 @@ services:
- 7443:7443
- 7880:7880
- 9443:9443
- 9080:9080
depends_on:
setup:
condition: service_completed_successfully
redis:
image: docker.io/redis:8.2.2-alpine
image: docker.io/redis:8.6.1-alpine
container_name: redis
restart: unless-stopped
ports:
@ -42,15 +43,13 @@ services:
- redis:/data
- /etc/localtime:/etc/localtime:ro
command: >
redis-server
--bind 0.0.0.0
--requirepass ${REDIS_PASSWORD:-}
redis-server --bind 0.0.0.0 --requirepass ${REDIS_PASSWORD:-}
depends_on:
setup:
condition: service_completed_successfully
minio:
image: docker.io/openvidu/minio:2025.9.7-debian-12-r3
image: docker.io/openvidu/minio:2025.10.15-debian-12-r9
container_name: minio
restart: unless-stopped
ports:
@ -71,7 +70,7 @@ services:
condition: service_completed_successfully
mongo:
image: docker.io/openvidu/mongodb:8.0.15-r0
image: docker.io/openvidu/mongodb:8.0.19-r1
container_name: mongo
restart: unless-stopped
ports:
@ -91,7 +90,7 @@ services:
condition: service_completed_successfully
dashboard:
image: docker.io/openvidu/openvidu-dashboard:3.5.0
image: docker.io/openvidu/openvidu-dashboard:3.6.0
container_name: dashboard
restart: unless-stopped
environment:
@ -106,7 +105,7 @@ services:
condition: service_completed_successfully
openvidu:
image: docker.io/openvidu/openvidu-server:3.5.0
image: docker.io/openvidu/openvidu-server:3.6.0
restart: unless-stopped
container_name: openvidu
extra_hosts:
@ -129,7 +128,7 @@ services:
condition: service_completed_successfully
ingress:
image: docker.io/openvidu/ingress:3.5.0
image: docker.io/openvidu/ingress:3.6.0
container_name: ingress
restart: unless-stopped
extra_hosts:
@ -148,7 +147,7 @@ services:
condition: service_completed_successfully
egress:
image: docker.io/openvidu/egress:3.5.0
image: docker.io/openvidu/egress:3.6.0
restart: unless-stopped
container_name: egress
extra_hosts:
@ -164,11 +163,9 @@ services:
condition: service_completed_successfully
openvidu-meet:
image: docker.io/openvidu/openvidu-meet:3.5.0
image: docker.io/openvidu/openvidu-meet:3.6.0
container_name: openvidu-meet
restart: on-failure
ports:
- 9080:6080
extra_hosts:
- host.docker.internal:host-gateway
environment:
@ -176,31 +173,16 @@ services:
- LAN_MODE=${LAN_MODE:-false}
- LAN_DOMAIN=${LAN_DOMAIN:-}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- SERVER_PORT=6080
- MEET_LOG_LEVEL=${MEET_LOG_LEVEL:-info}
- MEET_NAME_ID=openviduMeet-LOCAL
- MEET_INITIAL_API_KEY=${MEET_INITIAL_API_KEY:-meet-api-key}
- MEET_INITIAL_ADMIN_USER=${MEET_INITIAL_ADMIN_USER:-admin}
- MEET_INITIAL_ADMIN_PASSWORD=${MEET_INITIAL_ADMIN_PASSWORD:-admin}
- MEET_COOKIE_SECURE=false
- MEET_INITIAL_WEBHOOK_ENABLED=true
- MEET_INITIAL_WEBHOOK_URL=${MEET_INITIAL_WEBHOOK_URL:-http://host.docker.internal:6080/webhook}
- LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
- MEET_S3_BUCKET=${MEET_S3_BUCKET:-openvidu-appdata}
- MEET_S3_SUBBUCKET=${MEET_S3_SUBBUCKET:-openvidu-meet}
- MEET_S3_SERVICE_ENDPOINT=${MEET_S3_SERVICE_ENDPOINT:-http://minio:9000}
- MEET_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MEET_S3_SECRET_KEY=${MINIO_SECRET_KEY}
- MEET_AWS_REGION=${MEET_AWS_REGION:-us-east-1}
- MEET_S3_WITH_PATH_STYLE_ACCESS=${MEET_S3_WITH_PATH_STYLE_ACCESS:-true}
- MEET_REDIS_HOST=redis
- MEET_REDIS_PORT=6379
- MEET_REDIS_PASSWORD=${REDIS_PASSWORD:-}
- MEET_REDIS_DB=0
- MEET_MONGO_URI=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
- MEET_CONFIG_DIR=/config/meet.env
volumes:
- ./meet.env:/config/meet.env
- ./scripts/entrypoint_openvidu_meet.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
@ -210,7 +192,7 @@ services:
condition: service_completed_successfully
operator:
image: docker.io/openvidu/openvidu-operator:3.5.0
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: operator
restart: unless-stopped
volumes:
@ -234,7 +216,7 @@ services:
condition: service_completed_successfully
ready-check:
image: docker.io/openvidu/openvidu-operator:3.5.0
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: ready-check
restart: on-failure
volumes:
@ -252,9 +234,8 @@ services:
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- MEET_INITIAL_ADMIN_USER=${MEET_INITIAL_ADMIN_USER:-}
- MEET_INITIAL_ADMIN_PASSWORD=${MEET_INITIAL_ADMIN_PASSWORD:-}
- MEET_INITIAL_API_KEY=${MEET_INITIAL_API_KEY:-}
env_file:
- ./meet.env
depends_on:
- openvidu
- ingress

View File

@ -46,8 +46,16 @@ cpu_cost:
openvidu:
# Allocation strategy for new egress requests
# - cpuload: the node with the lowest CPU load will be selected. Distributes the CPU load evenly across all nodes.
# - binpack: some node already hosting at least one Egress will be selected. Fills up nodes before assigning work to new ones.
# - binpack: some node already hosting at least one egress will be selected. Fills up nodes before assigning work to new ones.
allocation_strategy: cpuload
# Whether to use system-wide CPU monitoring or egress process CPU monitoring. This affects the allocation of new egress requests.
# It is preferable to set this value to:
# - true: when the egress service is running in a shared server also hosting other CPU-intensive services.
# - false: when the egress service is running in a dedicated server.
use_global_cpu_monitoring: true
# Disables the automatic killing of the most expensive egress when CPU is overloaded.
# The default "false" value helps keeping the node stable, but may cause unexpected egress terminations under high load.
disable_cpu_overload_killer: false
disable_cpu_overload_killer: false
# Minimum available disk space in MB required to accept new egress requests.
# Default: 512 MB. Set to a negative value (e.g., -1) to disable disk space checking.
min_disk_space_mb: 512

36
community/meet.env Normal file
View File

@ -0,0 +1,36 @@
# OpenVidu Meet configuration
# Static environment variables loaded via MEET_CONFIG_DIR
SERVER_PORT=6080
MEET_NAME_ID=openviduMeet-LOCAL
MEET_LOG_LEVEL=info
MEET_COOKIE_SECURE=false
MEET_INITIAL_ADMIN_USER=admin
MEET_INITIAL_ADMIN_PASSWORD=admin
MEET_INITIAL_API_KEY=meet-api-key
MEET_INITIAL_WEBHOOK_ENABLED=true
MEET_INITIAL_WEBHOOK_URL=http://host.docker.internal:6080/webhook
LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
# S3 configuration
MEET_S3_BUCKET=openvidu-appdata
MEET_S3_SUBBUCKET=openvidu-meet
MEET_S3_SERVICE_ENDPOINT=http://minio:9000
MEET_AWS_REGION=us-east-1
MEET_S3_WITH_PATH_STYLE_ACCESS=true
# Storage backend type
MEET_BLOB_STORAGE_MODE=s3
# Redis configuration
MEET_REDIS_HOST=redis
MEET_REDIS_PORT=6379
MEET_REDIS_DB=0
# MongoDB configuration
MEET_MONGO_ENABLED=true
MEET_MONGO_DB_NAME=openvidu-meet
# Enable live captions using OpenVidu Speech to Text agent
MEET_CAPTIONS_ENABLED=false

View File

@ -30,7 +30,5 @@ MINIO_SECRET_KEY=minioadmin
MONGO_ADMIN_USERNAME=mongoadmin
MONGO_ADMIN_PASSWORD=mongoadmin
# OpenVidu Meet configuration.
MEET_INITIAL_ADMIN_USER=admin
MEET_INITIAL_ADMIN_PASSWORD=admin
MEET_INITIAL_API_KEY=meet-api-key
# OpenVidu Meet base path
MEET_BASE_PATH=/meet

View File

@ -1,5 +1,5 @@
# Docker image of the agent.
docker_image: docker.io/openvidu/agent-speech-processing:3.5.0
docker_image: docker.io/openvidu/agent-speech-processing-vosk:3.6.0
# Whether to run the agent or not.
enabled: false
@ -7,18 +7,18 @@ enabled: false
# Maximum CPU load threshold for the agent to accept new jobs. Value between 0 and 1.
load_threshold: 1.0
# Log level for the agent [DEBUG, INFO, WARNING, ERROR, CRITICAL]
# Log level for the agent [DEBUG, INFO, WARN, ERROR, CRITICAL]
log_level: INFO
live_captions:
# How this agent will connect to Rooms [automatic, manual]
# - automatic: the agent will automatically connect to new Rooms.
# How this agent will connect to Rooms [manual, automatic]
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
processing: automatic
# - automatic: the agent will automatically connect to new Rooms.
processing: manual
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox]
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox, nvidia, elevenlabs, simplismart, vosk, sherpa]
# The custom configuration for the selected provider must be set below
provider:
provider: vosk
aws:
# Credentials for AWS Transcribe. See https://docs.aws.amazon.com/transcribe/latest/dg/what-is.html
@ -66,7 +66,7 @@ live_captions:
# List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
phrase_list:
# Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
explicit_punctuation:
explicit_punctuation:
azure_openai:
# Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
@ -318,4 +318,120 @@ live_captions:
# - "en"
# - "es"
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
context:
context:
nvidia:
# API key for NVIDIA. See https://build.nvidia.com/explore/speech?integrate_nim=true&hosted_api=true&modal=integrate-nim
# Required when using NVIDIA's cloud services. To use a self-hosted NVIDIA Riva server setup "server" and "use_ssl" instead.
api_key:
# The NVIDIA Riva ASR model to use. Default is "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
# See available models: https://build.nvidia.com/search/models?filters=usecase%3Ausecase_speech_to_text
model:
# The NVIDIA function ID for the model. Default is "1598d209-5e27-4d3c-8079-4751568b1081"
function_id:
# Whether to add punctuation to transcription results. Default is true.
punctuate:
# The language code for transcription. Default is "en-US"
language_code:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# The NVIDIA Riva server address. Default is "grpc.nvcf.nvidia.com:443"
# For self-hosted NIM, use your server address (e.g., "localhost:50051")
server:
# Whether to use SSL for the connection. Default is true.
# Set to false for locally hosted Riva NIM services without SSL.
use_ssl:
spitch:
# API key for Spitch. See https://docs.spitch.app/keys
api_key:
# Language short code for the generated speech. For supported values, see https://docs.spitch.app/concepts/languages
language:
elevenlabs:
# API key for ElevenLabs. See https://elevenlabs.io/app/settings/api-keys
api_key:
# The ElevenLabs STT model to use. Valid values are ["scribe_v1", "scribe_v2", "scribe_v2_realtime"]. See https://elevenlabs.io/docs/overview/models#models-overview
model_id:
# An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically
language_code:
# Custom base URL for the API. Optional.
base_url:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Only supported for Scribe v1 model. Default is True
tag_audio_events:
# Whether to include word-level timestamps in the transcription. Default is false.
include_timestamps:
simplismart:
# API key for SimpliSmart. See https://docs.simplismart.ai/model-suite/settings/api-keys
api_key:
# Model identifier for the backend STT model. One of ["openai/whisper-large-v2", "openai/whisper-large-v3", "openai/whisper-large-v3-turbo"]
# Default is "openai/whisper-large-v3-turbo"
model:
# Language code for transcription (default: "en"). See https://docs.simplismart.ai/get-started/playground/transcription-models#supported-languages-with-their-codes
language:
# Operation to perform. "transcribe" converts speech to text in the original language, "translate" translates into English. Default is "transcribe".
task:
# If true, disables timestamp generation in transcripts. Default is true
without_timestamps:
# Minimum duration (ms) for a valid speech segment. Default is 0
min_speech_duration_ms:
# Decoding temperature (affects randomness). Default is 0.0
temperature:
# Whether to permit multilingual recognition. Default is false
multilingual:
vosk:
# Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
# Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
# - vosk-model-en-us-0.22-lgraph (English US)
# - vosk-model-small-cn-0.22 (Chinese)
# - vosk-model-small-de-0.15 (German)
# - vosk-model-small-en-in-0.4 (English India)
# - vosk-model-small-es-0.42 (Spanish)
# - vosk-model-small-fr-0.22 (French)
# - vosk-model-small-hi-0.22 (Hindi)
# - vosk-model-small-it-0.22 (Italian)
# - vosk-model-small-ja-0.22 (Japanese)
# - vosk-model-small-nl-0.22 (Dutch)
# - vosk-model-small-pt-0.3 (Portuguese)
# - vosk-model-small-ru-0.22 (Russian)
model: vosk-model-en-us-0.22-lgraph
# Language code for reference. It has no effect other than observability purposes.
# If a pre-installed "model" is declared, this will be set automatically if empty.
language:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Whether to override Vosk's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false
sherpa:
# sherpa streaming model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-sherpa"
# Below is the list of pre-installed models in the container (available at https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models):
# - sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06 (English)
# - sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06 (Spanish)
# - sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06 (German)
# - sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06 (French)
# - sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10 (Multilingual: Arabic, English, Indonesian, Japanese, Russian, Thai, Vietnamese, Chinese)
model: sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06
# Language code for reference. Auto-detected from model name if not set.
language:
# Runtime provider for sherpa-onnx. Supported values: "cpu" or "cuda". Default is "cpu".
# Learn about GPU acceleration at https://openvidu.io/docs/ai/live-captions/#gpu-acceleration-for-sherpa-provider
provider:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Number of threads for ONNX Runtime. Default is 2.
num_threads:
# Recognizer type ("transducer", "paraformer", "zipformer_ctc", "nemo_ctc", "t_one_ctc"). Auto-detected from model name if not set.
recognizer_type:
# Decoding method ("greedy_search", "modified_beam_search"). Default is "greedy_search".
decoding_method:
# Whether to override sherpa's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false

View File

@ -1,6 +1,6 @@
services:
caddy-proxy:
image: docker.io/openvidu/openvidu-caddy-local:3.5.0
image: docker.io/openvidu/openvidu-caddy-local:3.6.0
container_name: caddy-proxy
restart: unless-stopped
extra_hosts:
@ -17,9 +17,9 @@ services:
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
- MEET_INITIAL_ADMIN_USER=${MEET_INITIAL_ADMIN_USER:-}
- MEET_INITIAL_ADMIN_PASSWORD=${MEET_INITIAL_ADMIN_PASSWORD:-}
- MEET_INITIAL_API_KEY=${MEET_INITIAL_API_KEY:-}
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
env_file:
- ./meet.env
volumes:
- ./custom-layout:/var/www/custom-layout
- /etc/localtime:/etc/localtime:ro
@ -29,12 +29,13 @@ services:
- 7443:7443
- 7880:7880
- 9443:9443
- 9080:9080
depends_on:
setup:
condition: service_completed_successfully
redis:
image: docker.io/redis:8.2.2-alpine
image: docker.io/redis:8.6.1-alpine
container_name: redis
restart: unless-stopped
ports:
@ -43,15 +44,13 @@ services:
- redis:/data
- /etc/localtime:/etc/localtime:ro
command: >
redis-server
--bind 0.0.0.0
--requirepass ${REDIS_PASSWORD:-}
redis-server --bind 0.0.0.0 --requirepass ${REDIS_PASSWORD:-}
depends_on:
setup:
condition: service_completed_successfully
minio:
image: docker.io/openvidu/minio:2025.9.7-debian-12-r3
image: docker.io/openvidu/minio:2025.10.15-debian-12-r9
restart: unless-stopped
ports:
- 9000:9000
@ -71,7 +70,7 @@ services:
condition: service_completed_successfully
mongo:
image: docker.io/openvidu/mongodb:8.0.15-r0
image: docker.io/openvidu/mongodb:8.0.19-r1
container_name: mongo
restart: unless-stopped
ports:
@ -91,7 +90,7 @@ services:
condition: service_completed_successfully
dashboard:
image: docker.io/openvidu/openvidu-dashboard:3.5.0
image: docker.io/openvidu/openvidu-dashboard:3.6.0
container_name: dashboard
restart: unless-stopped
environment:
@ -106,7 +105,7 @@ services:
condition: service_completed_successfully
openvidu:
image: docker.io/openvidu/openvidu-server-pro:3.5.0
image: docker.io/openvidu/openvidu-server-pro:3.6.0
restart: unless-stopped
container_name: openvidu
extra_hosts:
@ -131,7 +130,7 @@ services:
condition: service_completed_successfully
ingress:
image: docker.io/openvidu/ingress:3.5.0
image: docker.io/openvidu/ingress:3.6.0
container_name: ingress
restart: unless-stopped
extra_hosts:
@ -150,7 +149,7 @@ services:
condition: service_completed_successfully
egress:
image: docker.io/openvidu/egress:3.5.0
image: docker.io/openvidu/egress:3.6.0
restart: unless-stopped
container_name: egress
extra_hosts:
@ -166,11 +165,9 @@ services:
condition: service_completed_successfully
openvidu-meet:
image: docker.io/openvidu/openvidu-meet:3.5.0
image: docker.io/openvidu/openvidu-meet:3.6.0
container_name: openvidu-meet
restart: on-failure
ports:
- 9080:6080
extra_hosts:
- host.docker.internal:host-gateway
environment:
@ -178,31 +175,16 @@ services:
- LAN_MODE=${LAN_MODE:-false}
- LAN_DOMAIN=${LAN_DOMAIN:-}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- SERVER_PORT=6080
- MEET_LOG_LEVEL=${MEET_LOG_LEVEL:-info}
- MEET_NAME_ID=openviduMeet-LOCAL
- MEET_INITIAL_API_KEY=${MEET_INITIAL_API_KEY:-meet-api-key}
- MEET_INITIAL_ADMIN_USER=${MEET_INITIAL_ADMIN_USER:-admin}
- MEET_INITIAL_ADMIN_PASSWORD=${MEET_INITIAL_ADMIN_PASSWORD:-admin}
- MEET_COOKIE_SECURE=false
- MEET_INITIAL_WEBHOOK_ENABLED=true
- MEET_INITIAL_WEBHOOK_URL=${MEET_INITIAL_WEBHOOK_URL:-http://host.docker.internal:6080/webhook}
- LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
- MEET_S3_BUCKET=${MEET_S3_BUCKET:-openvidu-appdata}
- MEET_S3_SUBBUCKET=${MEET_S3_SUBBUCKET:-openvidu-meet}
- MEET_S3_SERVICE_ENDPOINT=${MEET_S3_SERVICE_ENDPOINT:-http://minio:9000}
- MEET_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MEET_S3_SECRET_KEY=${MINIO_SECRET_KEY}
- MEET_AWS_REGION=${MEET_AWS_REGION:-us-east-1}
- MEET_S3_WITH_PATH_STYLE_ACCESS=${MEET_S3_WITH_PATH_STYLE_ACCESS:-true}
- MEET_REDIS_HOST=redis
- MEET_REDIS_PORT=6379
- MEET_REDIS_PASSWORD=${REDIS_PASSWORD:-}
- MEET_REDIS_DB=0
- MEET_MONGO_URI=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
- MEET_CONFIG_DIR=/config/meet.env
volumes:
- ./meet.env:/config/meet.env
- ./scripts/entrypoint_openvidu_meet.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
@ -212,7 +194,7 @@ services:
condition: service_completed_successfully
openvidu-v2compatibility:
image: docker.io/openvidu/openvidu-v2compatibility:3.5.0
image: docker.io/openvidu/openvidu-v2compatibility:3.6.0
restart: unless-stopped
container_name: openvidu-v2compatibility
entrypoint: /bin/sh /scripts/entrypoint.sh
@ -254,7 +236,7 @@ services:
condition: service_completed_successfully
ready-check:
image: docker.io/openvidu/openvidu-operator:3.5.0
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: ready-check
restart: on-failure
volumes:
@ -273,9 +255,8 @@ services:
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
- MEET_INITIAL_ADMIN_USER=${MEET_INITIAL_ADMIN_USER:-}
- MEET_INITIAL_ADMIN_PASSWORD=${MEET_INITIAL_ADMIN_PASSWORD:-}
- MEET_INITIAL_API_KEY=${MEET_INITIAL_API_KEY:-}
env_file:
- ./meet.env
depends_on:
- openvidu
- ingress
@ -285,7 +266,7 @@ services:
- mongo
operator:
image: docker.io/openvidu/openvidu-operator:3.5.0
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: operator
restart: unless-stopped
volumes:

View File

@ -46,8 +46,16 @@ cpu_cost:
openvidu:
# Allocation strategy for new egress requests
# - cpuload: the node with the lowest CPU load will be selected. Distributes the CPU load evenly across all nodes.
# - binpack: some node already hosting at least one Egress will be selected. Fills up nodes before assigning work to new ones.
# - binpack: some node already hosting at least one egress will be selected. Fills up nodes before assigning work to new ones.
allocation_strategy: cpuload
# Whether to use system-wide CPU monitoring or egress process CPU monitoring. This affects the allocation of new egress requests.
# It is preferable to set this value to:
# - true: when the egress service is running in a shared server also hosting other CPU-intensive services.
# - false: when the egress service is running in a dedicated server.
use_global_cpu_monitoring: true
# Disables the automatic killing of the most expensive egress when CPU is overloaded.
# The default "false" value helps keeping the node stable, but may cause unexpected egress terminations under high load.
disable_cpu_overload_killer: false
# Minimum available disk space in MB required to accept new egress requests.
# Default: 512 MB. Set to a negative value (e.g., -1) to disable disk space checking.
min_disk_space_mb: 512

36
pro/meet.env Normal file
View File

@ -0,0 +1,36 @@
# OpenVidu Meet configuration
# Static environment variables loaded via MEET_CONFIG_DIR
SERVER_PORT=6080
MEET_NAME_ID=openviduMeet-LOCAL
MEET_LOG_LEVEL=info
MEET_COOKIE_SECURE=false
MEET_INITIAL_ADMIN_USER=admin
MEET_INITIAL_ADMIN_PASSWORD=admin
MEET_INITIAL_API_KEY=meet-api-key
MEET_INITIAL_WEBHOOK_ENABLED=true
MEET_INITIAL_WEBHOOK_URL=http://host.docker.internal:6080/webhook
LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
# S3 configuration
MEET_S3_BUCKET=openvidu-appdata
MEET_S3_SUBBUCKET=openvidu-meet
MEET_S3_SERVICE_ENDPOINT=http://minio:9000
MEET_AWS_REGION=us-east-1
MEET_S3_WITH_PATH_STYLE_ACCESS=true
# Storage backend type
MEET_BLOB_STORAGE_MODE=s3
# Redis configuration
MEET_REDIS_HOST=redis
MEET_REDIS_PORT=6379
MEET_REDIS_DB=0
# MongoDB configuration
MEET_MONGO_ENABLED=true
MEET_MONGO_DB_NAME=openvidu-meet
# Enable live captions using OpenVidu Speech to Text agent
MEET_CAPTIONS_ENABLED=false