Merge pull request #13 from OpenVidu/development

Merge 3.5.0
This commit is contained in:
Carlos Ruiz Ballesteros 2025-12-29 16:41:25 +01:00 committed by GitHub
commit 2ae40b3a40
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 198 additions and 102 deletions

View File

@ -1,5 +1,5 @@
# Docker image of the agent.
docker_image: docker.io/openvidu/agent-speech-processing:3.4.1
docker_image: docker.io/openvidu/agent-speech-processing:3.5.0
# Whether to run the agent or not.
enabled: false
@ -16,7 +16,7 @@ live_captions:
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
processing: automatic
# Which speech-to-text AI provider to use [aws, azure, google, openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, spitch]
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox]
# The custom configuration for the selected provider must be set below
provider:
@ -63,6 +63,10 @@ live_captions:
# Removes profanity (swearing), or replaces letters of profane words with stars. Valid values: Masked | Removed | Raw
# See https://learn.microsoft.com/en-us/azure/ai-services/translator/profanity-filtering
profanity:
# List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
phrase_list:
# Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
explicit_punctuation:
azure_openai:
# Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
@ -82,6 +86,8 @@ live_captions:
project:
# The language code to use for transcription (e.g., "en" for English).
language:
# Whether to automatically detect the language.
detect_language:
# ID of the model to use for speech-to-text.
model:
# Initial prompt to guide the transcription.
@ -135,6 +141,8 @@ live_captions:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Optional text prompt to guide the transcription. Only supported for whisper-1.
prompt:
@ -146,8 +154,12 @@ live_captions:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Prompt to guide the model's style or specify how to spell unfamiliar words. 224 tokens max.
prompt:
# Base URL for the Groq API. By default "https://api.groq.com/openai/v1"
base_url:
deepgram:
# See https://console.deepgram.com/
@ -156,25 +168,27 @@ live_captions:
model:
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.language
language:
# Whether to enable automatic language detection. Defaults to false. See https://developers.deepgram.com/docs/language-detection
# Whether to enable automatic language detection. See https://developers.deepgram.com/docs/language-detection
detect_language: false
# Whether to return interim (non-final) transcription results. Defaults to true. See https://developers.deepgram.com/docs/interim-results
# Whether to return interim (non-final) transcription results. See https://developers.deepgram.com/docs/interim-results
interim_results: true
# Whether to apply smart formatting to numbers, dates, etc. Defaults to false. See https://developers.deepgram.com/docs/smart-format
# Whether to apply smart formatting to numbers, dates, etc. See https://developers.deepgram.com/docs/smart-format
smart_format: false
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. Defaults to true. See https://developers.deepgram.com/docs/smart-format#using-no-delay
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. See https://developers.deepgram.com/docs/smart-format#using-no-delay
no_delay: true
# Whether to add punctuations to the transcription. Defaults to true. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
# Whether to add punctuations to the transcription. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
punctuate: true
# Whether to include filler words (um, uh, etc.) in transcription. Defaults to true. See https://developers.deepgram.com/docs/filler-words
# Whether to include filler words (um, uh, etc.) in transcription. See https://developers.deepgram.com/docs/filler-words
filler_words: true
# Whether to filter profanity from the transcription. Defaults to false. See https://developers.deepgram.com/docs/profanity-filter
# Whether to filter profanity from the transcription. See https://developers.deepgram.com/docs/profanity-filter
profanity_filter: false
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). Defaults to None. keywords does not work with Nova-3 models. Use keyterms instead.
# Whether to transcribe numbers as numerals. See https://developers.deepgram.com/docs/numerals
numerals: false
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). keywords does not work with Nova-3 models. Use keyterms instead.
# keywords:
# - [OpenVidu, 1.5]
# - [WebRTC, 1]
# List of key terms to improve recognition accuracy. Defaults to None. keyterms is supported by Nova-3 models.
# List of key terms to improve recognition accuracy. keyterms is supported by Nova-3 models.
# Commented below is an example
keyterms:
# - "OpenVidu"
@ -183,8 +197,18 @@ live_captions:
assemblyai:
# API key for AssemblyAI. See https://www.assemblyai.com/dashboard/api-keys
api_key:
# The confidence threshold (0.0 to 1.0) to use when determining if the end of a turn has been reached.
end_of_turn_confidence_threshold:
# The minimum amount of silence in milliseconds required to detect end of turn when confident.
min_end_of_turn_silence_when_confident:
# The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered.
max_turn_silence:
# Whether to return formatted final transcripts (proper punctuation, letter casing...). If enabled, formatted final transcripts are emitted shortly following an end-of-turn detection.
format_turns: true
# List of keyterms to improve recognition accuracy for specific words and phrases.
keyterms_prompt:
# - "OpenVidu"
# - "WebRTC"
fal:
# API key for fal. See https://fal.ai/dashboard/keys
@ -208,12 +232,14 @@ live_captions:
speechmatics:
# API key for Speechmatics. See https://portal.speechmatics.com/manage-access/
api_key:
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/introduction/supported-languages
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/speech-to-text/languages#transcription-languages
language:
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/features/accuracy-language-packs#accuracy
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/speech-to-text/languages#operating-points
operating_point:
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/features/realtime-latency#partial-transcripts
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/speech-to-text/realtime/output#partial-transcripts
enable_partials:
# Enable speaker diarization. When enabled, the STT engine will determine and attribute words to unique speakers. The speaker_sensitivity parameter can be used to adjust the sensitivity of diarization
enable_diarization:
# RFC-5646 language code to make spelling rules more consistent in the transcription output. See https://docs.speechmatics.com/features/word-tagging#output-locale
output_locale:
# The delay in seconds between the end of a spoken word and returning the final transcript results. See https://docs.speechmatics.com/features/realtime-latency#configuration-example
@ -255,6 +281,10 @@ live_captions:
languages:
# Whether to allow switching between languages during recognition. Defaults to True
code_switching:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-audio-enhancer
pre_processing_audio_enhancer:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-speech-threshold
pre_processing_speech_threshold:
sarvam:
# API key for Sarvam. See https://dashboard.sarvam.ai/key-management
@ -263,3 +293,29 @@ live_captions:
language:
# The Sarvam STT model to use. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.model.model
model:
mistralai:
# API key for Mistral AI. See https://console.mistral.ai/api-keys
api_key:
# Name of the Voxtral STT model to use. Default to voxtral-mini-latest. See https://docs.mistral.ai/capabilities/audio/
model:
# The language code to use for transcription (e.g., "en" for English)
language:
cartesia:
# API key for Cartesia. See https://play.cartesia.ai/keys
api_key:
# The Cartesia STT model to use
model:
# The language code to use for transcription (e.g., "en" for English)
language:
soniox:
# API key for Soniox. See https://console.soniox.com/
api_key:
# Set language hints when possible to significantly improve accuracy. See: https://soniox.com/docs/stt/concepts/language-hints
language_hints:
# - "en"
# - "es"
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
context:

View File

@ -15,13 +15,3 @@ fi
# Replace the LAN_PRIVATE_IP in the .env file
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env
# If sillicon mac, enable EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU flag
if [ "$(uname -m)" = "arm64" ]; then
if ! grep -q "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU" .env; then
echo "# Enable this flag to run Docker Desktop on Apple Silicon Macs" >> .env
echo "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1" >> .env
else
sed -i'' -e "s/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=.*/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1/g" .env
fi
fi

View File

@ -1,7 +1,6 @@
services:
caddy-proxy:
image: docker.io/openvidu/openvidu-caddy-local:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-caddy-local:3.5.0
container_name: caddy-proxy
restart: unless-stopped
extra_hosts:
@ -22,6 +21,7 @@ services:
- MEET_INITIAL_API_KEY=${MEET_INITIAL_API_KEY:-}
volumes:
- ./custom-layout:/var/www/custom-layout
- /etc/localtime:/etc/localtime:ro
ports:
- 5443:5443
- 6443:6443
@ -33,14 +33,14 @@ services:
condition: service_completed_successfully
redis:
image: docker.io/redis:7.4.4-alpine
platform: linux/amd64
image: docker.io/redis:8.2.2-alpine
container_name: redis
restart: unless-stopped
ports:
- 6379:6379
volumes:
- redis:/data
- /etc/localtime:/etc/localtime:ro
command: >
redis-server
--bind 0.0.0.0
@ -50,8 +50,7 @@ services:
condition: service_completed_successfully
minio:
image: docker.io/openvidu/minio:2025.5.24-debian-12-r1
platform: linux/amd64
image: docker.io/openvidu/minio:2025.9.7-debian-12-r3
container_name: minio
restart: unless-stopped
ports:
@ -61,23 +60,25 @@ services:
- MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY:-}
- MINIO_DEFAULT_BUCKETS=openvidu-appdata
- MINIO_CONSOLE_SUBPATH=/minio-console
- MINIO_BROWSER=on
- MINIO_BROWSER_REDIRECT_URL=http://localhost:7880/minio-console
volumes:
- minio-data:/bitnami/minio/data
- minio-certs:/certs
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
mongo:
image: docker.io/openvidu/mongodb:8.0.12-r0
platform: linux/amd64
image: docker.io/openvidu/mongodb:8.0.15-r0
container_name: mongo
restart: unless-stopped
ports:
- 27017:27017
volumes:
- mongo-data:/bitnami/mongodb
- /etc/localtime:/etc/localtime:ro
environment:
- MONGODB_ROOT_USER=${MONGO_ADMIN_USERNAME:-}
- MONGODB_ROOT_PASSWORD=${MONGO_ADMIN_PASSWORD:-}
@ -85,14 +86,12 @@ services:
- MONGODB_REPLICA_SET_MODE=primary
- MONGODB_REPLICA_SET_NAME=rs0
- MONGODB_REPLICA_SET_KEY=devreplicasetkey
- EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=${EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU:-0}
depends_on:
setup:
condition: service_completed_successfully
dashboard:
image: docker.io/openvidu/openvidu-dashboard:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-dashboard:3.5.0
container_name: dashboard
restart: unless-stopped
environment:
@ -100,13 +99,14 @@ services:
- ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
- ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- DATABASE_URL=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
volumes:
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
openvidu:
image: docker.io/openvidu/openvidu-server:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-server:3.5.0
restart: unless-stopped
container_name: openvidu
extra_hosts:
@ -123,13 +123,13 @@ services:
volumes:
- ./livekit.yaml:/etc/livekit.yaml
- ./scripts/entrypoint_openvidu.sh:/scripts/entrypoint.sh
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
ingress:
image: docker.io/openvidu/ingress:3.4.1
platform: linux/amd64
image: docker.io/openvidu/ingress:3.5.0
container_name: ingress
restart: unless-stopped
extra_hosts:
@ -142,13 +142,13 @@ services:
- INGRESS_CONFIG_FILE=/etc/ingress.yaml
volumes:
- ./ingress.yaml:/etc/ingress.yaml
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
egress:
image: docker.io/livekit/egress:v1.10.0
platform: linux/amd64
image: docker.io/openvidu/egress:3.5.0
restart: unless-stopped
container_name: egress
extra_hosts:
@ -158,13 +158,13 @@ services:
volumes:
- ./egress.yaml:/etc/egress.yaml
- egress-data:/home/egress/tmp
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
openvidu-meet:
image: docker.io/openvidu/openvidu-meet:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-meet:3.5.0
container_name: openvidu-meet
restart: on-failure
ports:
@ -199,25 +199,26 @@ services:
- MEET_REDIS_PORT=6379
- MEET_REDIS_PASSWORD=${REDIS_PASSWORD:-}
- MEET_REDIS_DB=0
- MEET_MONGO_URI=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
volumes:
- ./scripts/entrypoint_openvidu_meet.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
entrypoint: /bin/sh /scripts/entrypoint.sh
depends_on:
setup:
condition: service_completed_successfully
operator:
image: docker.io/openvidu/openvidu-operator:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-operator:3.5.0
container_name: operator
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- agents-config:/agents-config
- ./:/deployment
- /etc/localtime:/etc/localtime:ro
environment:
- PLATFORM=linux/amd64
- MODE=agent-manager-local
- DEPLOYMENT_FILES_DIR=/deployment
- AGENTS_CONFIG_DIR=/agents-config
@ -233,10 +234,11 @@ services:
condition: service_completed_successfully
ready-check:
image: docker.io/openvidu/openvidu-operator:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-operator:3.5.0
container_name: ready-check
restart: on-failure
volumes:
- /etc/localtime:/etc/localtime:ro
environment:
- MODE=local-ready-check
- OPENVIDU_ENVIRONMENT=local-platform
@ -263,7 +265,6 @@ services:
setup:
image: docker.io/busybox:1.37.0
platform: linux/amd64
container_name: setup
restart: "no"
volumes:
@ -271,6 +272,7 @@ services:
- mongo-data:/mongo
- egress-data:/egress
- ./scripts/setup.sh:/scripts/setup.sh
- /etc/localtime:/etc/localtime:ro
environment:
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_MODE=${LAN_MODE:-false}

View File

@ -1,5 +1,5 @@
# Docker image of the agent.
docker_image: docker.io/openvidu/agent-speech-processing:3.4.1
docker_image: docker.io/openvidu/agent-speech-processing:3.5.0
# Whether to run the agent or not.
enabled: false
@ -16,7 +16,7 @@ live_captions:
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
processing: automatic
# Which speech-to-text AI provider to use [aws, azure, google, openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, spitch]
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox]
# The custom configuration for the selected provider must be set below
provider:
@ -63,6 +63,10 @@ live_captions:
# Removes profanity (swearing), or replaces letters of profane words with stars. Valid values: Masked | Removed | Raw
# See https://learn.microsoft.com/en-us/azure/ai-services/translator/profanity-filtering
profanity:
# List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
phrase_list:
# Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
explicit_punctuation:
azure_openai:
# Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
@ -82,6 +86,8 @@ live_captions:
project:
# The language code to use for transcription (e.g., "en" for English).
language:
# Whether to automatically detect the language.
detect_language:
# ID of the model to use for speech-to-text.
model:
# Initial prompt to guide the transcription.
@ -135,6 +141,8 @@ live_captions:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Optional text prompt to guide the transcription. Only supported for whisper-1.
prompt:
@ -146,8 +154,12 @@ live_captions:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Prompt to guide the model's style or specify how to spell unfamiliar words. 224 tokens max.
prompt:
# Base URL for the Groq API. By default "https://api.groq.com/openai/v1"
base_url:
deepgram:
# See https://console.deepgram.com/
@ -156,25 +168,27 @@ live_captions:
model:
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.language
language:
# Whether to enable automatic language detection. Defaults to false. See https://developers.deepgram.com/docs/language-detection
# Whether to enable automatic language detection. See https://developers.deepgram.com/docs/language-detection
detect_language: false
# Whether to return interim (non-final) transcription results. Defaults to true. See https://developers.deepgram.com/docs/interim-results
# Whether to return interim (non-final) transcription results. See https://developers.deepgram.com/docs/interim-results
interim_results: true
# Whether to apply smart formatting to numbers, dates, etc. Defaults to false. See https://developers.deepgram.com/docs/smart-format
# Whether to apply smart formatting to numbers, dates, etc. See https://developers.deepgram.com/docs/smart-format
smart_format: false
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. Defaults to true. See https://developers.deepgram.com/docs/smart-format#using-no-delay
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. See https://developers.deepgram.com/docs/smart-format#using-no-delay
no_delay: true
# Whether to add punctuations to the transcription. Defaults to true. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
# Whether to add punctuations to the transcription. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
punctuate: true
# Whether to include filler words (um, uh, etc.) in transcription. Defaults to true. See https://developers.deepgram.com/docs/filler-words
# Whether to include filler words (um, uh, etc.) in transcription. See https://developers.deepgram.com/docs/filler-words
filler_words: true
# Whether to filter profanity from the transcription. Defaults to false. See https://developers.deepgram.com/docs/profanity-filter
# Whether to filter profanity from the transcription. See https://developers.deepgram.com/docs/profanity-filter
profanity_filter: false
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). Defaults to None. keywords does not work with Nova-3 models. Use keyterms instead.
# Whether to transcribe numbers as numerals. See https://developers.deepgram.com/docs/numerals
numerals: false
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). keywords does not work with Nova-3 models. Use keyterms instead.
# keywords:
# - [OpenVidu, 1.5]
# - [WebRTC, 1]
# List of key terms to improve recognition accuracy. Defaults to None. keyterms is supported by Nova-3 models.
# List of key terms to improve recognition accuracy. keyterms is supported by Nova-3 models.
# Commented below is an example
keyterms:
# - "OpenVidu"
@ -183,8 +197,18 @@ live_captions:
assemblyai:
# API key for AssemblyAI. See https://www.assemblyai.com/dashboard/api-keys
api_key:
# The confidence threshold (0.0 to 1.0) to use when determining if the end of a turn has been reached.
end_of_turn_confidence_threshold:
# The minimum amount of silence in milliseconds required to detect end of turn when confident.
min_end_of_turn_silence_when_confident:
# The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered.
max_turn_silence:
# Whether to return formatted final transcripts (proper punctuation, letter casing...). If enabled, formatted final transcripts are emitted shortly following an end-of-turn detection.
format_turns: true
# List of keyterms to improve recognition accuracy for specific words and phrases.
keyterms_prompt:
# - "OpenVidu"
# - "WebRTC"
fal:
# API key for fal. See https://fal.ai/dashboard/keys
@ -208,12 +232,14 @@ live_captions:
speechmatics:
# API key for Speechmatics. See https://portal.speechmatics.com/manage-access/
api_key:
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/introduction/supported-languages
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/speech-to-text/languages#transcription-languages
language:
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/features/accuracy-language-packs#accuracy
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/speech-to-text/languages#operating-points
operating_point:
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/features/realtime-latency#partial-transcripts
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/speech-to-text/realtime/output#partial-transcripts
enable_partials:
# Enable speaker diarization. When enabled, the STT engine will determine and attribute words to unique speakers. The speaker_sensitivity parameter can be used to adjust the sensitivity of diarization
enable_diarization:
# RFC-5646 language code to make spelling rules more consistent in the transcription output. See https://docs.speechmatics.com/features/word-tagging#output-locale
output_locale:
# The delay in seconds between the end of a spoken word and returning the final transcript results. See https://docs.speechmatics.com/features/realtime-latency#configuration-example
@ -255,6 +281,10 @@ live_captions:
languages:
# Whether to allow switching between languages during recognition. Defaults to True
code_switching:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-audio-enhancer
pre_processing_audio_enhancer:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-speech-threshold
pre_processing_speech_threshold:
sarvam:
# API key for Sarvam. See https://dashboard.sarvam.ai/key-management
@ -263,3 +293,29 @@ live_captions:
language:
# The Sarvam STT model to use. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.model.model
model:
mistralai:
# API key for Mistral AI. See https://console.mistral.ai/api-keys
api_key:
# Name of the Voxtral STT model to use. Default to voxtral-mini-latest. See https://docs.mistral.ai/capabilities/audio/
model:
# The language code to use for transcription (e.g., "en" for English)
language:
cartesia:
# API key for Cartesia. See https://play.cartesia.ai/keys
api_key:
# The Cartesia STT model to use
model:
# The language code to use for transcription (e.g., "en" for English)
language:
soniox:
# API key for Soniox. See https://console.soniox.com/
api_key:
# Set language hints when possible to significantly improve accuracy. See: https://soniox.com/docs/stt/concepts/language-hints
language_hints:
# - "en"
# - "es"
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
context:

View File

@ -15,13 +15,3 @@ fi
# Replace the LAN_PRIVATE_IP in the .env file
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env
# If sillicon mac, enable EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU flag
if [ "$(uname -m)" = "arm64" ]; then
if ! grep -q "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU" .env; then
echo "# Enable this flag to run Docker Desktop on Apple Silicon Macs" >> .env
echo "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1" >> .env
else
sed -i'' -e "s/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=.*/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1/g" .env
fi
fi

View File

@ -1,7 +1,6 @@
services:
caddy-proxy:
image: docker.io/openvidu/openvidu-caddy-local:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-caddy-local:3.5.0
container_name: caddy-proxy
restart: unless-stopped
extra_hosts:
@ -23,6 +22,7 @@ services:
- MEET_INITIAL_API_KEY=${MEET_INITIAL_API_KEY:-}
volumes:
- ./custom-layout:/var/www/custom-layout
- /etc/localtime:/etc/localtime:ro
ports:
- 5443:5443
- 6443:6443
@ -34,14 +34,14 @@ services:
condition: service_completed_successfully
redis:
image: docker.io/redis:7.4.4-alpine
platform: linux/amd64
image: docker.io/redis:8.2.2-alpine
container_name: redis
restart: unless-stopped
ports:
- 6379:6379
volumes:
- redis:/data
- /etc/localtime:/etc/localtime:ro
command: >
redis-server
--bind 0.0.0.0
@ -51,8 +51,7 @@ services:
condition: service_completed_successfully
minio:
image: docker.io/openvidu/minio:2025.5.24-debian-12-r1
platform: linux/amd64
image: docker.io/openvidu/minio:2025.9.7-debian-12-r3
restart: unless-stopped
ports:
- 9000:9000
@ -61,23 +60,25 @@ services:
- MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY:-}
- MINIO_DEFAULT_BUCKETS=openvidu-appdata
- MINIO_CONSOLE_SUBPATH=/minio-console
- MINIO_BROWSER=on
- MINIO_BROWSER_REDIRECT_URL=http://localhost:7880/minio-console
volumes:
- minio-data:/bitnami/minio/data
- minio-certs:/certs
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
mongo:
image: docker.io/openvidu/mongodb:8.0.12-r0
platform: linux/amd64
image: docker.io/openvidu/mongodb:8.0.15-r0
container_name: mongo
restart: unless-stopped
ports:
- 27017:27017
volumes:
- mongo-data:/bitnami/mongodb
- /etc/localtime:/etc/localtime:ro
environment:
- MONGODB_ROOT_USER=${MONGO_ADMIN_USERNAME:-}
- MONGODB_ROOT_PASSWORD=${MONGO_ADMIN_PASSWORD:-}
@ -85,14 +86,12 @@ services:
- MONGODB_REPLICA_SET_MODE=primary
- MONGODB_REPLICA_SET_NAME=rs0
- MONGODB_REPLICA_SET_KEY=devreplicasetkey
- EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=${EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU:-0}
depends_on:
setup:
condition: service_completed_successfully
dashboard:
image: docker.io/openvidu/openvidu-dashboard:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-dashboard:3.5.0
container_name: dashboard
restart: unless-stopped
environment:
@ -100,13 +99,14 @@ services:
- ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
- ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- DATABASE_URL=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
volumes:
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
openvidu:
image: docker.io/openvidu/openvidu-server-pro:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-server-pro:3.5.0
restart: unless-stopped
container_name: openvidu
extra_hosts:
@ -125,13 +125,13 @@ services:
volumes:
- ./livekit.yaml:/etc/livekit.yaml
- ./scripts/entrypoint_openvidu.sh:/scripts/entrypoint.sh
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
ingress:
image: docker.io/openvidu/ingress:3.4.1
platform: linux/amd64
image: docker.io/openvidu/ingress:3.5.0
container_name: ingress
restart: unless-stopped
extra_hosts:
@ -144,13 +144,13 @@ services:
- INGRESS_CONFIG_FILE=/etc/ingress.yaml
volumes:
- ./ingress.yaml:/etc/ingress.yaml
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
egress:
image: docker.io/livekit/egress:v1.10.0
platform: linux/amd64
image: docker.io/openvidu/egress:3.5.0
restart: unless-stopped
container_name: egress
extra_hosts:
@ -160,13 +160,13 @@ services:
volumes:
- ./egress.yaml:/etc/egress.yaml
- egress-data:/home/egress
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
openvidu-meet:
image: docker.io/openvidu/openvidu-meet:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-meet:3.5.0
container_name: openvidu-meet
restart: on-failure
ports:
@ -201,17 +201,18 @@ services:
- MEET_REDIS_PORT=6379
- MEET_REDIS_PASSWORD=${REDIS_PASSWORD:-}
- MEET_REDIS_DB=0
- MEET_MONGO_URI=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
volumes:
- ./scripts/entrypoint_openvidu_meet.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
entrypoint: /bin/sh /scripts/entrypoint.sh
depends_on:
setup:
condition: service_completed_successfully
openvidu-v2compatibility:
image: docker.io/openvidu/openvidu-v2compatibility:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-v2compatibility:3.5.0
restart: unless-stopped
container_name: openvidu-v2compatibility
entrypoint: /bin/sh /scripts/entrypoint.sh
@ -247,15 +248,17 @@ services:
- ./recordings:/opt/openvidu/recordings
- ./scripts/entrypoint_v2comp.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
ready-check:
image: docker.io/openvidu/openvidu-operator:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-operator:3.5.0
container_name: ready-check
restart: on-failure
volumes:
- /etc/localtime:/etc/localtime:ro
environment:
- MODE=local-ready-check
- OPENVIDU_ENVIRONMENT=local-platform
@ -282,16 +285,15 @@ services:
- mongo
operator:
image: docker.io/openvidu/openvidu-operator:3.4.1
platform: linux/amd64
image: docker.io/openvidu/openvidu-operator:3.5.0
container_name: operator
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- agents-config:/agents-config
- ./:/deployment
- /etc/localtime:/etc/localtime:ro
environment:
- PLATFORM=linux/amd64
- MODE=agent-manager-local
- DEPLOYMENT_FILES_DIR=/deployment
- AGENTS_CONFIG_DIR=/agents-config
@ -307,7 +309,6 @@ services:
condition: service_completed_successfully
setup:
image: docker.io/busybox:1.37.0
platform: linux/amd64
container_name: setup
restart: "no"
volumes:
@ -315,6 +316,7 @@ services:
- mongo-data:/mongo
- egress-data:/egress
- ./scripts/setup.sh:/scripts/setup.sh
- /etc/localtime:/etc/localtime:ro
environment:
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_MODE=${LAN_MODE:-false}