Compare commits

...

58 Commits
3.3.0 ... main

Author SHA1 Message Date
GitHub Actions
3425341e81 Bump to version 3.6.0 2026-03-06 12:05:23 +00:00
pabloFuente
8a4923e4a8 Add sherpa.provider YAML property to agent-speech-processing.yaml 2026-03-04 13:09:53 +01:00
pabloFuente
ea27ec1f96 Minor update of YAML comment 2026-02-25 18:30:18 +01:00
cruizba
c145494bbd Change speech processing default from automatic to manual
The agent will now only connect to Rooms when explicitly requested
via the Agent Dispatch API instead of auto-connecting to all new Rooms.
2026-02-25 18:22:19 +01:00
cruizba
9bff284b8f Bump Docker image versions for Redis, Minio and MongoDB
- Redis: 8.2.2-alpine -> 8.6.1-alpine
- Minio: 2025.9.7-debian-12-r3 -> 2025.10.15-debian-12-r9
- MongoDB: 8.0.15-r0 -> 8.0.19-r1
2026-02-24 15:29:10 +01:00
pabloFuente
1724fa5c18 Add use_global_cpu_monitoring and min_disk_space_mb to egress config 2026-02-20 14:10:57 +01:00
pabloFuente
3d06d98ea0 Beautify docker-compose.yaml 2026-02-18 13:19:34 +01:00
pabloFuente
ff54026aad Default live_captions provider to vosk 2026-02-12 12:33:31 +01:00
cruizba
c778720ba5 Add meet.env file to caddy-proxy and ready-check which needs MEET_INITIAL_* env variables. 2026-02-09 20:03:23 +01:00
cruizba
0422cbd8c2 Refactor OpenVidu Meet configuration: consolidate environment variables into meet.env and update docker-compose to use new configuration structure 2026-02-09 19:49:59 +01:00
pabloFuente
7c22e68ab5 Update agent-speech-processing.yaml files 2026-02-06 10:57:12 +01:00
cruizba
1fd49f308c Add MEET_BASE_PATH environment variable to configuration files 2026-02-05 19:12:34 +01:00
pabloFuente
8fc6edaa87 Fix agent-speech-processing.yaml files sherpa model names 2026-02-04 17:04:24 +01:00
pabloFuente
803dfbbfa8 Updated agent-speech-processing.yaml files with new providers 2026-02-04 16:39:15 +01:00
cruizba
0ee45ec06f Route 9080 through caddy to redir to /meet 2026-02-02 22:00:22 +01:00
pabloFuente
2ea399dc42 Add nvidia and vosk live_captions providers to YAML 2026-01-21 11:48:59 +01:00
pabloFuente
d51a1b2cdf Update agent-speech-processing.yaml 2026-01-19 14:07:35 +01:00
cruizba
7c8908707b Revert to main version 2026-01-10 03:08:48 +01:00
Carlos Ruiz Ballesteros
2ae40b3a40
Merge pull request #13 from OpenVidu/development
Merge 3.5.0
2025-12-29 16:41:25 +01:00
cruizba
96af8554fb Bump to version 3.5.0 2025-12-29 16:39:57 +01:00
cruizba
0e1a3cfea0 Add arm64 support 2025-12-02 13:31:22 +01:00
cruizba
3ce20ad7ba Install tzdata on images and use timezone of host. Should fix https://github.com/OpenVidu/openvidu-local-deployment/issues/9 2025-11-28 23:10:04 +01:00
cruizba
5e13135101 Add MEET_MONGO_URI environment variable to OpenVidu Meet service 2025-11-25 12:45:23 +01:00
cruizba
749eaa56ec Bump Docker images 2025-10-17 21:32:54 +02:00
pabloFuente
1fb923ca05 Update agent-speech-processing.yaml 2025-10-14 12:30:00 +02:00
cruizba
5a932730ad Revert "Bump to version 3.4.1"
This reverts commit 376e4dee58308e9cbb1b81121de566cc95d99b24.
2025-10-13 20:35:35 +02:00
Carlos Ruiz Ballesteros
2f2635429f
Merge pull request #11 from OpenVidu/development
Merge 3.4.1
2025-10-13 20:35:06 +02:00
cruizba
376e4dee58 Bump to version 3.4.1 2025-10-13 20:34:25 +02:00
cruizba
1eda3c348b Revert "Bump to version 3.4.0"
This reverts commit 1234088d307664ca5a2f3ea99d7d59f31258f8d4.
2025-10-01 19:30:35 +02:00
cruizba
1234088d30 Bump to version 3.4.0 2025-10-01 19:30:05 +02:00
Carlos Ruiz Ballesteros
a9d78ea908
Merge pull request #10 from OpenVidu/development
Merge 3.4.0
2025-10-01 19:25:16 +02:00
Carlos Ruiz Ballesteros
9b0f9ec7c6
Merge branch 'main' into development 2025-10-01 19:24:28 +02:00
pabloFuente
31e6d6943e Increase values for agent-speech-processing load_threshold to 1.0 2025-09-26 17:36:48 +02:00
pabloFuente
2ec5048b84 Add new config to agent-speech-processing.yaml [load_threshold, log_level] 2025-09-26 17:34:55 +02:00
cruizba
afcf531588 Revert "Add MEET_BASE_URL"
This reverts commit 8e644bddb89d6588f27dacb7355fee1ac8ef7bb9.
2025-09-26 14:23:05 +02:00
pabloFuente
e0d70bf1ac Add new egress config [allocation_strategy, disable_cpu_overload_killer] 2025-09-26 13:17:42 +02:00
Piwccle
2b8bc05dc2 Add GCP configuration placeholders in egress.yaml files 2025-09-22 15:31:22 +02:00
cruizba
8e644bddb8 Add MEET_BASE_URL 2025-09-17 20:30:10 +02:00
juancarmore
e6c20beb03 Update MEET_INITIAL_WEBHOOK_URL to use port 6080 in docker-compose files 2025-09-11 14:04:12 +02:00
cruizba
d8903420a4 Update MongoDB image to 8.0.12-r0 and change OPENVIDU_ENVIRONMENT to local-platform 2025-09-01 02:05:38 +02:00
cruizba
44697e87ad Set OPENVIDU_ENVIRONMENT to local in docker-compose files 2025-08-27 20:30:22 +02:00
cruizba
539703ff94 Rename OpenVidu Meet configuration variables for clarity 2025-08-27 18:55:06 +02:00
cruizba
acc9df23ed Update MinIO and MongoDB images to use OpenVidu registry 2025-08-21 15:00:35 +02:00
cruizba
b60adf9ed2 Update MinIO and MongoDB images to use OpenVidu registry 2025-08-21 14:59:41 +02:00
pabloFuente
6b7729be90 Update egress from v1.9.1 to v1.10.0 2025-08-20 17:07:39 +02:00
cruizba
8f5da0fd01 Do not configure LAN_PRIVATE_IP in openvidu if LAN_MODE is not true. 2025-07-08 12:05:37 +02:00
cruizba
332e51231b Add instructions for disabling LAN_MODE in setup script 2025-07-08 11:55:00 +02:00
cruizba
412985ca3f Update ready-check service to use openvidu-operator image and set local-ready-check mode 2025-07-08 11:29:51 +02:00
cruizba
277c15b2c0 Update ready-check service to use own image. Improve output of ready-check 2025-07-07 21:32:43 +02:00
cruizba
32844c1ef2 Update OpenVidu Meet ports and webhook URLs to use 6080 internally. Expose port 9080 2025-07-07 13:58:28 +02:00
cruizba
2144d51c21 Allow LAN_MODE=false and USE_HTTPS=false without defining LAN_PRIVATE_IP 2025-07-04 22:38:38 +02:00
cruizba
fb03ffbae6 Add Meet env vars to caddy-proxy 2025-07-04 22:04:55 +02:00
cruizba
33d05574f6 Update environment configuration for OpenVidu Meet 2025-07-04 21:25:30 +02:00
cruizba
251885b802 Update entrypoint_openvidu_meet.sh to include 'ws' in getDeploymentUrl 2025-07-04 21:17:44 +02:00
cruizba
7e7de0fe33 Add openvidu-meet 2025-07-04 21:16:37 +02:00
pabloFuente
1fe72ba2af Remove spitch provider from agent-speech-processing.yaml 2025-07-02 21:50:42 +02:00
pabloFuente
a6d0a062be Updated agent-speech-processing.yaml with Spitch provider 2025-07-02 20:15:00 +02:00
cruizba
54640c9260 Revert "Bump to version 3.3.0"
This reverts commit 4bf87d6485edc518420ac55684c83a58328b3f1e.
2025-06-26 22:25:15 +02:00
22 changed files with 663 additions and 347 deletions

View File

@ -30,9 +30,5 @@ MINIO_SECRET_KEY=minioadmin
MONGO_ADMIN_USERNAME=mongoadmin
MONGO_ADMIN_PASSWORD=mongoadmin
# Default App (OpenVidu Call)
CALL_PRIVATE_ACCESS=false
CALL_USER=
CALL_SECRET=
CALL_ADMIN_USER=admin
CALL_ADMIN_SECRET=admin
# OpenVidu Meet base path
MEET_BASE_PATH=/meet

View File

@ -1,18 +1,24 @@
# Docker image of the agent.
docker_image: docker.io/openvidu/agent-speech-processing:3.3.0
docker_image: docker.io/openvidu/agent-speech-processing-vosk:3.6.0
# Whether to run the agent or not.
enabled: false
live_captions:
# How this agent will connect to Rooms [automatic, manual]
# - automatic: the agent will automatically connect to new Rooms.
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
processing: automatic
# Maximum CPU load threshold for the agent to accept new jobs. Value between 0 and 1.
load_threshold: 1.0
# Which speech-to-text AI provider to use [aws, azure, google, opeanai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam]
# Log level for the agent [DEBUG, INFO, WARN, ERROR, CRITICAL]
log_level: INFO
live_captions:
# How this agent will connect to Rooms [manual, automatic]
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
# - automatic: the agent will automatically connect to new Rooms.
processing: manual
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox, nvidia, elevenlabs, simplismart, vosk, sherpa]
# The custom configuration for the selected provider must be set below
provider:
provider: vosk
aws:
# Credentials for AWS Transcribe. See https://docs.aws.amazon.com/transcribe/latest/dg/what-is.html
@ -57,6 +63,10 @@ live_captions:
# Removes profanity (swearing), or replaces letters of profane words with stars. Valid values: Masked | Removed | Raw
# See https://learn.microsoft.com/en-us/azure/ai-services/translator/profanity-filtering
profanity:
# List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
phrase_list:
# Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
explicit_punctuation:
azure_openai:
# Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
@ -76,6 +86,8 @@ live_captions:
project:
# The language code to use for transcription (e.g., "en" for English).
language:
# Whether to automatically detect the language.
detect_language:
# ID of the model to use for speech-to-text.
model:
# Initial prompt to guide the transcription.
@ -129,6 +141,8 @@ live_captions:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Optional text prompt to guide the transcription. Only supported for whisper-1.
prompt:
@ -140,8 +154,12 @@ live_captions:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Prompt to guide the model's style or specify how to spell unfamiliar words. 224 tokens max.
prompt:
# Base URL for the Groq API. By default "https://api.groq.com/openai/v1"
base_url:
deepgram:
# See https://console.deepgram.com/
@ -150,25 +168,27 @@ live_captions:
model:
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.language
language:
# Whether to enable automatic language detection. Defaults to false. See https://developers.deepgram.com/docs/language-detection
# Whether to enable automatic language detection. See https://developers.deepgram.com/docs/language-detection
detect_language: false
# Whether to return interim (non-final) transcription results. Defaults to true. See https://developers.deepgram.com/docs/interim-results
# Whether to return interim (non-final) transcription results. See https://developers.deepgram.com/docs/interim-results
interim_results: true
# Whether to apply smart formatting to numbers, dates, etc. Defaults to true. See https://developers.deepgram.com/docs/smart-format
smart_format: true
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. Defaults to true. See https://developers.deepgram.com/docs/smart-format#using-no-delay
# Whether to apply smart formatting to numbers, dates, etc. See https://developers.deepgram.com/docs/smart-format
smart_format: false
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. See https://developers.deepgram.com/docs/smart-format#using-no-delay
no_delay: true
# Whether to add punctuations to the transcription. Defaults to true. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
# Whether to add punctuations to the transcription. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
punctuate: true
# Whether to include filler words (um, uh, etc.) in transcription. Defaults to true. See https://developers.deepgram.com/docs/filler-words
# Whether to include filler words (um, uh, etc.) in transcription. See https://developers.deepgram.com/docs/filler-words
filler_words: true
# Whether to filter profanity from the transcription. Defaults to false. See https://developers.deepgram.com/docs/profanity-filter
# Whether to filter profanity from the transcription. See https://developers.deepgram.com/docs/profanity-filter
profanity_filter: false
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). Defaults to None. keywords does not work with Nova-3 models. Use keyterms instead.
# Whether to transcribe numbers as numerals. See https://developers.deepgram.com/docs/numerals
numerals: false
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). keywords does not work with Nova-3 models. Use keyterms instead.
# keywords:
# - [OpenVidu, 1.5]
# - [WebRTC, 1]
# List of key terms to improve recognition accuracy. Defaults to None. keyterms is supported by Nova-3 models.
# List of key terms to improve recognition accuracy. keyterms is supported by Nova-3 models.
# Commented below is an example
keyterms:
# - "OpenVidu"
@ -177,8 +197,18 @@ live_captions:
assemblyai:
# API key for AssemblyAI. See https://www.assemblyai.com/dashboard/api-keys
api_key:
# The confidence threshold (0.0 to 1.0) to use when determining if the end of a turn has been reached.
end_of_turn_confidence_threshold:
# The minimum amount of silence in milliseconds required to detect end of turn when confident.
min_end_of_turn_silence_when_confident:
# The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered.
max_turn_silence:
# Whether to return formatted final transcripts (proper punctuation, letter casing...). If enabled, formatted final transcripts are emitted shortly following an end-of-turn detection.
format_turns: true
# List of keyterms to improve recognition accuracy for specific words and phrases.
keyterms_prompt:
# - "OpenVidu"
# - "WebRTC"
fal:
# API key for fal. See https://fal.ai/dashboard/keys
@ -202,12 +232,14 @@ live_captions:
speechmatics:
# API key for Speechmatics. See https://portal.speechmatics.com/manage-access/
api_key:
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/introduction/supported-languages
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/speech-to-text/languages#transcription-languages
language:
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/features/accuracy-language-packs#accuracy
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/speech-to-text/languages#operating-points
operating_point:
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/features/realtime-latency#partial-transcripts
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/speech-to-text/realtime/output#partial-transcripts
enable_partials:
# Enable speaker diarization. When enabled, the STT engine will determine and attribute words to unique speakers. The speaker_sensitivity parameter can be used to adjust the sensitivity of diarization
enable_diarization:
# RFC-5646 language code to make spelling rules more consistent in the transcription output. See https://docs.speechmatics.com/features/word-tagging#output-locale
output_locale:
# The delay in seconds between the end of a spoken word and returning the final transcript results. See https://docs.speechmatics.com/features/realtime-latency#configuration-example
@ -222,7 +254,7 @@ live_captions:
speaker_sensitivity:
# See https://docs.speechmatics.com/features/diarization#prefer-current-speaker
prefer_current_speaker:
# Permitted puctuation marks for advanced punctuation. See https://docs.speechmatics.com/features/punctuation-settings
# Permitted punctuation marks for advanced punctuation. See https://docs.speechmatics.com/features/punctuation-settings
# Commented is an example of punctuation settings
punctuation_overrides:
# permitted_marks: [ ".", "," ]
@ -249,6 +281,10 @@ live_captions:
languages:
# Whether to allow switching between languages during recognition. Defaults to True
code_switching:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-audio-enhancer
pre_processing_audio_enhancer:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-speech-threshold
pre_processing_speech_threshold:
sarvam:
# API key for Sarvam. See https://dashboard.sarvam.ai/key-management
@ -257,3 +293,145 @@ live_captions:
language:
# The Sarvam STT model to use. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.model.model
model:
mistralai:
# API key for Mistral AI. See https://console.mistral.ai/api-keys
api_key:
# Name of the Voxtral STT model to use. Default to voxtral-mini-latest. See https://docs.mistral.ai/capabilities/audio/
model:
# The language code to use for transcription (e.g., "en" for English)
language:
cartesia:
# API key for Cartesia. See https://play.cartesia.ai/keys
api_key:
# The Cartesia STT model to use
model:
# The language code to use for transcription (e.g., "en" for English)
language:
soniox:
# API key for Soniox. See https://console.soniox.com/
api_key:
# Set language hints when possible to significantly improve accuracy. See: https://soniox.com/docs/stt/concepts/language-hints
language_hints:
# - "en"
# - "es"
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
context:
nvidia:
# API key for NVIDIA. See https://build.nvidia.com/explore/speech?integrate_nim=true&hosted_api=true&modal=integrate-nim
# Required when using NVIDIA's cloud services. To use a self-hosted NVIDIA Riva server setup "server" and "use_ssl" instead.
api_key:
# The NVIDIA Riva ASR model to use. Default is "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
# See available models: https://build.nvidia.com/search/models?filters=usecase%3Ausecase_speech_to_text
model:
# The NVIDIA function ID for the model. Default is "1598d209-5e27-4d3c-8079-4751568b1081"
function_id:
# Whether to add punctuation to transcription results. Default is true.
punctuate:
# The language code for transcription. Default is "en-US"
language_code:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# The NVIDIA Riva server address. Default is "grpc.nvcf.nvidia.com:443"
# For self-hosted NIM, use your server address (e.g., "localhost:50051")
server:
# Whether to use SSL for the connection. Default is true.
# Set to false for locally hosted Riva NIM services without SSL.
use_ssl:
spitch:
# API key for Spitch. See https://docs.spitch.app/keys
api_key:
# Language short code for the generated speech. For supported values, see https://docs.spitch.app/concepts/languages
language:
elevenlabs:
# API key for ElevenLabs. See https://elevenlabs.io/app/settings/api-keys
api_key:
# The ElevenLabs STT model to use. Valid values are ["scribe_v1", "scribe_v2", "scribe_v2_realtime"]. See https://elevenlabs.io/docs/overview/models#models-overview
model_id:
# An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically
language_code:
# Custom base URL for the API. Optional.
base_url:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Only supported for Scribe v1 model. Default is True
tag_audio_events:
# Whether to include word-level timestamps in the transcription. Default is false.
include_timestamps:
simplismart:
# API key for SimpliSmart. See https://docs.simplismart.ai/model-suite/settings/api-keys
api_key:
# Model identifier for the backend STT model. One of ["openai/whisper-large-v2", "openai/whisper-large-v3", "openai/whisper-large-v3-turbo"]
# Default is "openai/whisper-large-v3-turbo"
model:
# Language code for transcription (default: "en"). See https://docs.simplismart.ai/get-started/playground/transcription-models#supported-languages-with-their-codes
language:
# Operation to perform. "transcribe" converts speech to text in the original language, "translate" translates into English. Default is "transcribe".
task:
# If true, disables timestamp generation in transcripts. Default is true
without_timestamps:
# Minimum duration (ms) for a valid speech segment. Default is 0
min_speech_duration_ms:
# Decoding temperature (affects randomness). Default is 0.0
temperature:
# Whether to permit multilingual recognition. Default is false
multilingual:
vosk:
# Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
# Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
# - vosk-model-en-us-0.22-lgraph (English US)
# - vosk-model-small-cn-0.22 (Chinese)
# - vosk-model-small-de-0.15 (German)
# - vosk-model-small-en-in-0.4 (English India)
# - vosk-model-small-es-0.42 (Spanish)
# - vosk-model-small-fr-0.22 (French)
# - vosk-model-small-hi-0.22 (Hindi)
# - vosk-model-small-it-0.22 (Italian)
# - vosk-model-small-ja-0.22 (Japanese)
# - vosk-model-small-nl-0.22 (Dutch)
# - vosk-model-small-pt-0.3 (Portuguese)
# - vosk-model-small-ru-0.22 (Russian)
model: vosk-model-en-us-0.22-lgraph
# Language code for reference. It has no effect other than observability purposes.
# If a pre-installed "model" is declared, this will be set automatically if empty.
language:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Whether to override Vosk's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false
sherpa:
# sherpa streaming model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-sherpa"
# Below is the list of pre-installed models in the container (available at https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models):
# - sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06 (English)
# - sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06 (Spanish)
# - sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06 (German)
# - sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06 (French)
# - sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10 (Multilingual: Arabic, English, Indonesian, Japanese, Russian, Thai, Vietnamese, Chinese)
model: sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06
# Language code for reference. Auto-detected from model name if not set.
language:
# Runtime provider for sherpa-onnx. Supported values: "cpu" or "cuda". Default is "cpu".
# Learn about GPU acceleration at https://openvidu.io/docs/ai/live-captions/#gpu-acceleration-for-sherpa-provider
provider:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Number of threads for ONNX Runtime. Default is 2.
num_threads:
# Recognizer type ("transducer", "paraformer", "zipformer_ctc", "nemo_ctc", "t_one_ctc"). Auto-detected from model name if not set.
recognizer_type:
# Decoding method ("greedy_search", "modified_beam_search"). Default is "greedy_search".
decoding_method:
# Whether to override sherpa's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false

View File

@ -14,14 +14,4 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
fi
# Replace the LAN_PRIVATE_IP in the .env file
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env
# If sillicon mac, enable EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU flag
if [ "$(uname -m)" = "arm64" ]; then
if ! grep -q "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU" .env; then
echo "# Enable this flag to run Docker Desktop on Apple Silicon Macs" >> .env
echo "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1" >> .env
else
sed -i'' -e "s/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=.*/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1/g" .env
fi
fi
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env

View File

@ -1,7 +1,6 @@
services:
caddy-proxy:
image: docker.io/openvidu/openvidu-caddy-local:3.3.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-caddy-local:3.6.0
container_name: caddy-proxy
restart: unless-stopped
extra_hosts:
@ -17,37 +16,40 @@ services:
- DASHBOARD_ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
env_file:
- ./meet.env
volumes:
- ./custom-layout:/var/www/custom-layout
- /etc/localtime:/etc/localtime:ro
ports:
- 5443:5443
- 6443:6443
- 7443:7443
- 7880:7880
- 9443:9443
- 9080:9080
depends_on:
setup:
condition: service_completed_successfully
redis:
image: docker.io/redis:7.4.4-alpine
platform: linux/amd64
image: docker.io/redis:8.6.1-alpine
container_name: redis
restart: unless-stopped
ports:
- 6379:6379
volumes:
- redis:/data
- /etc/localtime:/etc/localtime:ro
command: >
redis-server
--bind 0.0.0.0
--requirepass ${REDIS_PASSWORD:-}
redis-server --bind 0.0.0.0 --requirepass ${REDIS_PASSWORD:-}
depends_on:
setup:
condition: service_completed_successfully
minio:
image: docker.io/bitnami/minio:2025.5.24-debian-12-r1
platform: linux/amd64
image: docker.io/openvidu/minio:2025.10.15-debian-12-r9
container_name: minio
restart: unless-stopped
ports:
@ -57,23 +59,25 @@ services:
- MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY:-}
- MINIO_DEFAULT_BUCKETS=openvidu-appdata
- MINIO_CONSOLE_SUBPATH=/minio-console
- MINIO_BROWSER=on
- MINIO_BROWSER_REDIRECT_URL=http://localhost:7880/minio-console
volumes:
- minio-data:/bitnami/minio/data
- minio-certs:/certs
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
mongo:
image: docker.io/bitnami/mongodb:8.0.9
platform: linux/amd64
image: docker.io/openvidu/mongodb:8.0.19-r1
container_name: mongo
restart: unless-stopped
ports:
- 27017:27017
volumes:
- mongo-data:/bitnami/mongodb
- /etc/localtime:/etc/localtime:ro
environment:
- MONGODB_ROOT_USER=${MONGO_ADMIN_USERNAME:-}
- MONGODB_ROOT_PASSWORD=${MONGO_ADMIN_PASSWORD:-}
@ -81,14 +85,12 @@ services:
- MONGODB_REPLICA_SET_MODE=primary
- MONGODB_REPLICA_SET_NAME=rs0
- MONGODB_REPLICA_SET_KEY=devreplicasetkey
- EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=${EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU:-0}
depends_on:
setup:
condition: service_completed_successfully
dashboard:
image: docker.io/openvidu/openvidu-dashboard:3.3.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-dashboard:3.6.0
container_name: dashboard
restart: unless-stopped
environment:
@ -96,19 +98,21 @@ services:
- ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
- ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- DATABASE_URL=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
volumes:
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
openvidu:
image: docker.io/openvidu/openvidu-server:3.3.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-server:3.6.0
restart: unless-stopped
container_name: openvidu
extra_hosts:
- host.docker.internal:host-gateway
environment:
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- LAN_MODE=${LAN_MODE:-false}
ports:
- 3478:3478/udp
- 7881:7881/tcp
@ -118,13 +122,13 @@ services:
volumes:
- ./livekit.yaml:/etc/livekit.yaml
- ./scripts/entrypoint_openvidu.sh:/scripts/entrypoint.sh
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
ingress:
image: docker.io/openvidu/ingress:3.3.0
platform: linux/amd64
image: docker.io/openvidu/ingress:3.6.0
container_name: ingress
restart: unless-stopped
extra_hosts:
@ -137,13 +141,13 @@ services:
- INGRESS_CONFIG_FILE=/etc/ingress.yaml
volumes:
- ./ingress.yaml:/etc/ingress.yaml
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
egress:
image: docker.io/livekit/egress:v1.9.1
platform: linux/amd64
image: docker.io/openvidu/egress:3.6.0
restart: unless-stopped
container_name: egress
extra_hosts:
@ -153,57 +157,50 @@ services:
volumes:
- ./egress.yaml:/etc/egress.yaml
- egress-data:/home/egress/tmp
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
default-app:
image: docker.io/openvidu/openvidu-call:3.3.0-demo
platform: linux/amd64
container_name: openvidu-call
openvidu-meet:
image: docker.io/openvidu/openvidu-meet:3.6.0
container_name: openvidu-meet
restart: on-failure
extra_hosts:
- host.docker.internal:host-gateway
environment:
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_MODE=${LAN_MODE:-false}
- LAN_DOMAIN=${LAN_DOMAIN:-}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- SERVER_PORT=6080
- CALL_NAME_ID=OpenViduCall-LOCAL
- LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
- CALL_PRIVATE_ACCESS=${CALL_PRIVATE_ACCESS:-false}
- CALL_USER=${CALL_USER:-}
- CALL_SECRET=${CALL_SECRET:-}
- CALL_RECORDING=${CALL_RECORDING:-}
- CALL_ADMIN_USER=${CALL_ADMIN_USER:-admin}
- CALL_ADMIN_SECRET=${CALL_ADMIN_SECRET:-admin}
- CALL_LOG_LEVEL=${CALL_LOG_LEVEL:-info}
- CALL_S3_BUCKET=${CALL_S3_BUCKET:-openvidu-appdata}
- CALL_S3_SERVICE_ENDPOINT=${CALL_S3_SERVICE_ENDPOINT:-http://minio:9000}
- CALL_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
- CALL_S3_SECRET_KEY=${MINIO_SECRET_KEY}
- CALL_AWS_REGION=${CALL_AWS_REGION:-us-east-1}
- CALL_S3_WITH_PATH_STYLE_ACCESS=${CALL_S3_WITH_PATH_STYLE_ACCESS:-true}
- MEET_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MEET_S3_SECRET_KEY=${MINIO_SECRET_KEY}
- MEET_REDIS_PASSWORD=${REDIS_PASSWORD:-}
- MEET_MONGO_URI=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
- MEET_CONFIG_DIR=/config/meet.env
volumes:
- ./scripts/entrypoint_default_app.sh:/scripts/entrypoint.sh
- ./meet.env:/config/meet.env
- ./scripts/entrypoint_openvidu_meet.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
entrypoint: /bin/sh /scripts/entrypoint.sh
depends_on:
setup:
condition: service_completed_successfully
operator:
image: docker.io/openvidu/openvidu-operator:3.3.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: operator
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- agents-config:/agents-config
- ./:/deployment
- /etc/localtime:/etc/localtime:ro
environment:
- PLATFORM=linux/amd64
- MODE=agent-manager-local
- DEPLOYMENT_FILES_DIR=/deployment
- AGENTS_CONFIG_DIR=/agents-config
@ -219,21 +216,26 @@ services:
condition: service_completed_successfully
ready-check:
image: docker.io/curlimages/curl:8.13.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: ready-check
restart: on-failure
volumes:
- /etc/localtime:/etc/localtime:ro
environment:
- MODE=local-ready-check
- OPENVIDU_ENVIRONMENT=local-platform
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_DOMAIN=${LAN_DOMAIN:-}
- LAN_MODE=${LAN_MODE:-false}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- DASHBOARD_ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
- DASHBOARD_ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
env_file:
- ./meet.env
depends_on:
- openvidu
- ingress
@ -241,14 +243,9 @@ services:
- dashboard
- minio
- mongo
volumes:
- ./scripts/ready-check.sh:/scripts/ready-check.sh
- ./scripts/utils.sh:/scripts/utils.sh
command: /bin/sh /scripts/ready-check.sh
setup:
image: docker.io/busybox:1.37.0
platform: linux/amd64
container_name: setup
restart: "no"
volumes:
@ -256,6 +253,7 @@ services:
- mongo-data:/mongo
- egress-data:/egress
- ./scripts/setup.sh:/scripts/setup.sh
- /etc/localtime:/etc/localtime:ro
environment:
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_MODE=${LAN_MODE:-false}

View File

@ -27,6 +27,10 @@ storage:
# account_name: your_account_name
# account_key: your_account_key
# container_name: openvidu-appdata
# gcp:
# credentials_json: |
# your_credentials_json
# bucket: openvidu-appdata
# CPU cost for each type of Egress operation.
cpu_cost:
@ -38,3 +42,20 @@ cpu_cost:
participant_cpu_cost: 0.01
track_composite_cpu_cost: 0.01
track_cpu_cost: 0.01
openvidu:
# Allocation strategy for new egress requests
# - cpuload: the node with the lowest CPU load will be selected. Distributes the CPU load evenly across all nodes.
# - binpack: some node already hosting at least one egress will be selected. Fills up nodes before assigning work to new ones.
allocation_strategy: cpuload
# Whether to use system-wide CPU monitoring or egress process CPU monitoring. This affects the allocation of new egress requests.
# It is preferable to set this value to:
# - true: when the egress service is running in a shared server also hosting other CPU-intensive services.
# - false: when the egress service is running in a dedicated server.
use_global_cpu_monitoring: true
# Disables the automatic killing of the most expensive egress when CPU is overloaded.
# The default "false" value helps keeping the node stable, but may cause unexpected egress terminations under high load.
disable_cpu_overload_killer: false
# Minimum available disk space in MB required to accept new egress requests.
# Default: 512 MB. Set to a negative value (e.g., -1) to disable disk space checking.
min_disk_space_mb: 512

View File

@ -31,7 +31,7 @@ webhook:
api_key: devkey
urls:
- http://host.docker.internal:6080/livekit/webhook
- http://default-app:6080/livekit/webhook
- http://openvidu-meet:6080/livekit/webhook
ingress:
rtmp_base_url: rtmp://localhost:1935/rtmp
whip_base_url: http://localhost:8085/whip

36
community/meet.env Normal file
View File

@ -0,0 +1,36 @@
# OpenVidu Meet configuration
# Static environment variables loaded via MEET_CONFIG_DIR
SERVER_PORT=6080
MEET_NAME_ID=openviduMeet-LOCAL
MEET_LOG_LEVEL=info
MEET_COOKIE_SECURE=false
MEET_INITIAL_ADMIN_USER=admin
MEET_INITIAL_ADMIN_PASSWORD=admin
MEET_INITIAL_API_KEY=meet-api-key
MEET_INITIAL_WEBHOOK_ENABLED=true
MEET_INITIAL_WEBHOOK_URL=http://host.docker.internal:6080/webhook
LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
# S3 configuration
MEET_S3_BUCKET=openvidu-appdata
MEET_S3_SUBBUCKET=openvidu-meet
MEET_S3_SERVICE_ENDPOINT=http://minio:9000
MEET_AWS_REGION=us-east-1
MEET_S3_WITH_PATH_STYLE_ACCESS=true
# Storage backend type
MEET_BLOB_STORAGE_MODE=s3
# Redis configuration
MEET_REDIS_HOST=redis
MEET_REDIS_PORT=6379
MEET_REDIS_DB=0
# MongoDB configuration
MEET_MONGO_ENABLED=true
MEET_MONGO_DB_NAME=openvidu-meet
# Enable live captions using OpenVidu Speech to Text agent
MEET_CAPTIONS_ENABLED=false

View File

@ -1,7 +1,8 @@
#!/bin/sh
set -e
if [ "$LAN_PRIVATE_IP" != "none" ]; then
if [ "$LAN_PRIVATE_IP" != "" ] && [ "$LAN_MODE" = 'true' ]; then
echo "Using as NODE_IP: $LAN_PRIVATE_IP"
export NODE_IP="$LAN_PRIVATE_IP"
fi

View File

@ -2,7 +2,7 @@
. /scripts/utils.sh
URL=$(getDeploymentUrl)
URL=$(getDeploymentUrl ws)
export LIVEKIT_URL="${URL}"
/usr/local/bin/entrypoint.sh

View File

@ -1,70 +0,0 @@
#!/bin/sh
. /scripts/utils.sh
trap 'handle_sigint' SIGINT
handle_sigint() {
echo "SIGINT signal received, exiting..."
exit 1
}
wait_for_service() {
SERVICE_NAME=$1
SERVICE_URL=$2
shift 2
EXTRA=$@
if [ -n "$EXTRA" ]; then
until curl $EXTRA $SERVICE_URL > /dev/null; do
echo "Waiting for $SERVICE_NAME to start...";
sleep 1;
done;
else
until curl --silent --head --fail $SERVICE_URL > /dev/null; do
echo "Waiting for $SERVICE_NAME to start...";
sleep 1;
done;
fi;
}
wait_for_service 'OpenVidu' 'http://openvidu:7880'
wait_for_service 'Ingress' 'http://ingress:9091'
wait_for_service 'Egress' 'http://egress:9091'
wait_for_service 'Dashboard' 'http://dashboard:5000'
wait_for_service 'Minio' 'http://minio:9000/minio/health/live'
wait_for_service 'Minio Console' 'http://minio:9001/minio-console'
wait_for_service 'Mongo' 'http://mongo:27017' --connect-timeout 10 --silent
LAN_HTTP_URL=$(getDeploymentUrl http)
LAN_WS_URL=$(getDeploymentUrl ws)
for i in $(seq 1 10); do
echo 'Starting OpenVidu... Please be patient...'
sleep 1
done;
echo ''
echo ''
echo '========================================='
echo '🎉 OpenVidu is ready! 🎉'
echo '========================================='
echo ''
echo 'OpenVidu Server & LiveKit Server URLs:'
echo ''
echo ' - From this machine:'
echo ''
echo ' - http://localhost:7880'
echo ' - ws://localhost:7880'
echo ''
echo ' - From other devices in your LAN:'
echo ''
echo " - $LAN_HTTP_URL"
echo " - $LAN_WS_URL"
echo ''
echo '========================================='
echo ''
echo 'OpenVidu Developer UI (services and passwords):'
echo ''
echo ' - http://localhost:7880'
echo " - $LAN_HTTP_URL"
echo ''
echo '========================================='

View File

@ -1,6 +1,11 @@
#!/bin/sh
if [ -z "$LAN_PRIVATE_IP" ]; then
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
exit 1
fi
if [ "$LAN_MODE" = 'true' ] && [ -z "$LAN_PRIVATE_IP" ]; then
echo '------------------------'
echo ''
echo 'LAN_PRIVATE_IP is required in the .env file.'
@ -14,7 +19,9 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
echo 'If it can'\''t be found, you can manually set it in the .env file'
echo '------------------------'
exit 1
else
fi
if [ "$LAN_MODE" = 'true' ] && [ -n "$LAN_PRIVATE_IP" ]; then
# Check if the LAN_PRIVATE_IP is reachable
if ! ping -c 1 -W 1 "$LAN_PRIVATE_IP" > /dev/null; then
echo "ERROR: LAN_PRIVATE_IP $LAN_PRIVATE_IP is not reachable"
@ -26,15 +33,16 @@ else
echo " - MacOS: ./configure_lan_private_ip_macos.sh"
echo " - Windows: .\configure_lan_private_ip_windows.bat"
echo ""
echo " If you don't want to access OpenVidu through your LAN,"
echo " you can run without LAN_MODE enabled, simply set"
echo " the following variables in the .env file:"
echo " USE_HTTPS=false"
echo " LAN_MODE=false"
echo ""
exit 1
fi
fi
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
exit 1
fi
# Prepare volumes
mkdir -p /minio/data
mkdir -p /mongo/data

View File

@ -30,9 +30,5 @@ MINIO_SECRET_KEY=minioadmin
MONGO_ADMIN_USERNAME=mongoadmin
MONGO_ADMIN_PASSWORD=mongoadmin
# Default App (OpenVidu Call)
CALL_PRIVATE_ACCESS=false
CALL_USER=
CALL_SECRET=
CALL_ADMIN_USER=admin
CALL_ADMIN_SECRET=admin
# OpenVidu Meet base path
MEET_BASE_PATH=/meet

View File

@ -1,18 +1,24 @@
# Docker image of the agent.
docker_image: docker.io/openvidu/agent-speech-processing:3.3.0
docker_image: docker.io/openvidu/agent-speech-processing-vosk:3.6.0
# Whether to run the agent or not.
enabled: false
live_captions:
# How this agent will connect to Rooms [automatic, manual]
# - automatic: the agent will automatically connect to new Rooms.
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
processing: automatic
# Maximum CPU load threshold for the agent to accept new jobs. Value between 0 and 1.
load_threshold: 1.0
# Which speech-to-text AI provider to use [aws, azure, google, opeanai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam]
# Log level for the agent [DEBUG, INFO, WARN, ERROR, CRITICAL]
log_level: INFO
live_captions:
# How this agent will connect to Rooms [manual, automatic]
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
# - automatic: the agent will automatically connect to new Rooms.
processing: manual
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox, nvidia, elevenlabs, simplismart, vosk, sherpa]
# The custom configuration for the selected provider must be set below
provider:
provider: vosk
aws:
# Credentials for AWS Transcribe. See https://docs.aws.amazon.com/transcribe/latest/dg/what-is.html
@ -57,6 +63,10 @@ live_captions:
# Removes profanity (swearing), or replaces letters of profane words with stars. Valid values: Masked | Removed | Raw
# See https://learn.microsoft.com/en-us/azure/ai-services/translator/profanity-filtering
profanity:
# List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
phrase_list:
# Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
explicit_punctuation:
azure_openai:
# Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
@ -76,6 +86,8 @@ live_captions:
project:
# The language code to use for transcription (e.g., "en" for English).
language:
# Whether to automatically detect the language.
detect_language:
# ID of the model to use for speech-to-text.
model:
# Initial prompt to guide the transcription.
@ -129,6 +141,8 @@ live_captions:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Optional text prompt to guide the transcription. Only supported for whisper-1.
prompt:
@ -140,8 +154,12 @@ live_captions:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Prompt to guide the model's style or specify how to spell unfamiliar words. 224 tokens max.
prompt:
# Base URL for the Groq API. By default "https://api.groq.com/openai/v1"
base_url:
deepgram:
# See https://console.deepgram.com/
@ -150,25 +168,27 @@ live_captions:
model:
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.language
language:
# Whether to enable automatic language detection. Defaults to false. See https://developers.deepgram.com/docs/language-detection
# Whether to enable automatic language detection. See https://developers.deepgram.com/docs/language-detection
detect_language: false
# Whether to return interim (non-final) transcription results. Defaults to true. See https://developers.deepgram.com/docs/interim-results
# Whether to return interim (non-final) transcription results. See https://developers.deepgram.com/docs/interim-results
interim_results: true
# Whether to apply smart formatting to numbers, dates, etc. Defaults to true. See https://developers.deepgram.com/docs/smart-format
smart_format: true
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. Defaults to true. See https://developers.deepgram.com/docs/smart-format#using-no-delay
# Whether to apply smart formatting to numbers, dates, etc. See https://developers.deepgram.com/docs/smart-format
smart_format: false
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. See https://developers.deepgram.com/docs/smart-format#using-no-delay
no_delay: true
# Whether to add punctuations to the transcription. Defaults to true. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
# Whether to add punctuations to the transcription. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
punctuate: true
# Whether to include filler words (um, uh, etc.) in transcription. Defaults to true. See https://developers.deepgram.com/docs/filler-words
# Whether to include filler words (um, uh, etc.) in transcription. See https://developers.deepgram.com/docs/filler-words
filler_words: true
# Whether to filter profanity from the transcription. Defaults to false. See https://developers.deepgram.com/docs/profanity-filter
# Whether to filter profanity from the transcription. See https://developers.deepgram.com/docs/profanity-filter
profanity_filter: false
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). Defaults to None. keywords does not work with Nova-3 models. Use keyterms instead.
# Whether to transcribe numbers as numerals. See https://developers.deepgram.com/docs/numerals
numerals: false
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). keywords does not work with Nova-3 models. Use keyterms instead.
# keywords:
# - [OpenVidu, 1.5]
# - [WebRTC, 1]
# List of key terms to improve recognition accuracy. Defaults to None. keyterms is supported by Nova-3 models.
# List of key terms to improve recognition accuracy. keyterms is supported by Nova-3 models.
# Commented below is an example
keyterms:
# - "OpenVidu"
@ -177,8 +197,18 @@ live_captions:
assemblyai:
# API key for AssemblyAI. See https://www.assemblyai.com/dashboard/api-keys
api_key:
# The confidence threshold (0.0 to 1.0) to use when determining if the end of a turn has been reached.
end_of_turn_confidence_threshold:
# The minimum amount of silence in milliseconds required to detect end of turn when confident.
min_end_of_turn_silence_when_confident:
# The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered.
max_turn_silence:
# Whether to return formatted final transcripts (proper punctuation, letter casing...). If enabled, formatted final transcripts are emitted shortly following an end-of-turn detection.
format_turns: true
# List of keyterms to improve recognition accuracy for specific words and phrases.
keyterms_prompt:
# - "OpenVidu"
# - "WebRTC"
fal:
# API key for fal. See https://fal.ai/dashboard/keys
@ -202,12 +232,14 @@ live_captions:
speechmatics:
# API key for Speechmatics. See https://portal.speechmatics.com/manage-access/
api_key:
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/introduction/supported-languages
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/speech-to-text/languages#transcription-languages
language:
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/features/accuracy-language-packs#accuracy
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/speech-to-text/languages#operating-points
operating_point:
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/features/realtime-latency#partial-transcripts
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/speech-to-text/realtime/output#partial-transcripts
enable_partials:
# Enable speaker diarization. When enabled, the STT engine will determine and attribute words to unique speakers. The speaker_sensitivity parameter can be used to adjust the sensitivity of diarization
enable_diarization:
# RFC-5646 language code to make spelling rules more consistent in the transcription output. See https://docs.speechmatics.com/features/word-tagging#output-locale
output_locale:
# The delay in seconds between the end of a spoken word and returning the final transcript results. See https://docs.speechmatics.com/features/realtime-latency#configuration-example
@ -222,7 +254,7 @@ live_captions:
speaker_sensitivity:
# See https://docs.speechmatics.com/features/diarization#prefer-current-speaker
prefer_current_speaker:
# Permitted puctuation marks for advanced punctuation. See https://docs.speechmatics.com/features/punctuation-settings
# Permitted punctuation marks for advanced punctuation. See https://docs.speechmatics.com/features/punctuation-settings
# Commented is an example of punctuation settings
punctuation_overrides:
# permitted_marks: [ ".", "," ]
@ -249,6 +281,10 @@ live_captions:
languages:
# Whether to allow switching between languages during recognition. Defaults to True
code_switching:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-audio-enhancer
pre_processing_audio_enhancer:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-speech-threshold
pre_processing_speech_threshold:
sarvam:
# API key for Sarvam. See https://dashboard.sarvam.ai/key-management
@ -257,3 +293,145 @@ live_captions:
language:
# The Sarvam STT model to use. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.model.model
model:
mistralai:
# API key for Mistral AI. See https://console.mistral.ai/api-keys
api_key:
# Name of the Voxtral STT model to use. Default to voxtral-mini-latest. See https://docs.mistral.ai/capabilities/audio/
model:
# The language code to use for transcription (e.g., "en" for English)
language:
cartesia:
# API key for Cartesia. See https://play.cartesia.ai/keys
api_key:
# The Cartesia STT model to use
model:
# The language code to use for transcription (e.g., "en" for English)
language:
soniox:
# API key for Soniox. See https://console.soniox.com/
api_key:
# Set language hints when possible to significantly improve accuracy. See: https://soniox.com/docs/stt/concepts/language-hints
language_hints:
# - "en"
# - "es"
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
context:
nvidia:
# API key for NVIDIA. See https://build.nvidia.com/explore/speech?integrate_nim=true&hosted_api=true&modal=integrate-nim
# Required when using NVIDIA's cloud services. To use a self-hosted NVIDIA Riva server setup "server" and "use_ssl" instead.
api_key:
# The NVIDIA Riva ASR model to use. Default is "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
# See available models: https://build.nvidia.com/search/models?filters=usecase%3Ausecase_speech_to_text
model:
# The NVIDIA function ID for the model. Default is "1598d209-5e27-4d3c-8079-4751568b1081"
function_id:
# Whether to add punctuation to transcription results. Default is true.
punctuate:
# The language code for transcription. Default is "en-US"
language_code:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# The NVIDIA Riva server address. Default is "grpc.nvcf.nvidia.com:443"
# For self-hosted NIM, use your server address (e.g., "localhost:50051")
server:
# Whether to use SSL for the connection. Default is true.
# Set to false for locally hosted Riva NIM services without SSL.
use_ssl:
spitch:
# API key for Spitch. See https://docs.spitch.app/keys
api_key:
# Language short code for the generated speech. For supported values, see https://docs.spitch.app/concepts/languages
language:
elevenlabs:
# API key for ElevenLabs. See https://elevenlabs.io/app/settings/api-keys
api_key:
# The ElevenLabs STT model to use. Valid values are ["scribe_v1", "scribe_v2", "scribe_v2_realtime"]. See https://elevenlabs.io/docs/overview/models#models-overview
model_id:
# An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically
language_code:
# Custom base URL for the API. Optional.
base_url:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Only supported for Scribe v1 model. Default is True
tag_audio_events:
# Whether to include word-level timestamps in the transcription. Default is false.
include_timestamps:
simplismart:
# API key for SimpliSmart. See https://docs.simplismart.ai/model-suite/settings/api-keys
api_key:
# Model identifier for the backend STT model. One of ["openai/whisper-large-v2", "openai/whisper-large-v3", "openai/whisper-large-v3-turbo"]
# Default is "openai/whisper-large-v3-turbo"
model:
# Language code for transcription (default: "en"). See https://docs.simplismart.ai/get-started/playground/transcription-models#supported-languages-with-their-codes
language:
# Operation to perform. "transcribe" converts speech to text in the original language, "translate" translates into English. Default is "transcribe".
task:
# If true, disables timestamp generation in transcripts. Default is true
without_timestamps:
# Minimum duration (ms) for a valid speech segment. Default is 0
min_speech_duration_ms:
# Decoding temperature (affects randomness). Default is 0.0
temperature:
# Whether to permit multilingual recognition. Default is false
multilingual:
vosk:
# Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
# Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
# - vosk-model-en-us-0.22-lgraph (English US)
# - vosk-model-small-cn-0.22 (Chinese)
# - vosk-model-small-de-0.15 (German)
# - vosk-model-small-en-in-0.4 (English India)
# - vosk-model-small-es-0.42 (Spanish)
# - vosk-model-small-fr-0.22 (French)
# - vosk-model-small-hi-0.22 (Hindi)
# - vosk-model-small-it-0.22 (Italian)
# - vosk-model-small-ja-0.22 (Japanese)
# - vosk-model-small-nl-0.22 (Dutch)
# - vosk-model-small-pt-0.3 (Portuguese)
# - vosk-model-small-ru-0.22 (Russian)
model: vosk-model-en-us-0.22-lgraph
# Language code for reference. It has no effect other than observability purposes.
# If a pre-installed "model" is declared, this will be set automatically if empty.
language:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Whether to override Vosk's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false
sherpa:
# sherpa streaming model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-sherpa"
# Below is the list of pre-installed models in the container (available at https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models):
# - sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06 (English)
# - sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06 (Spanish)
# - sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06 (German)
# - sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06 (French)
# - sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10 (Multilingual: Arabic, English, Indonesian, Japanese, Russian, Thai, Vietnamese, Chinese)
model: sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06
# Language code for reference. Auto-detected from model name if not set.
language:
# Runtime provider for sherpa-onnx. Supported values: "cpu" or "cuda". Default is "cpu".
# Learn about GPU acceleration at https://openvidu.io/docs/ai/live-captions/#gpu-acceleration-for-sherpa-provider
provider:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Number of threads for ONNX Runtime. Default is 2.
num_threads:
# Recognizer type ("transducer", "paraformer", "zipformer_ctc", "nemo_ctc", "t_one_ctc"). Auto-detected from model name if not set.
recognizer_type:
# Decoding method ("greedy_search", "modified_beam_search"). Default is "greedy_search".
decoding_method:
# Whether to override sherpa's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false

View File

@ -14,14 +14,4 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
fi
# Replace the LAN_PRIVATE_IP in the .env file
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env
# If sillicon mac, enable EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU flag
if [ "$(uname -m)" = "arm64" ]; then
if ! grep -q "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU" .env; then
echo "# Enable this flag to run Docker Desktop on Apple Silicon Macs" >> .env
echo "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1" >> .env
else
sed -i'' -e "s/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=.*/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1/g" .env
fi
fi
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env

View File

@ -1,7 +1,6 @@
services:
caddy-proxy:
image: docker.io/openvidu/openvidu-caddy-local:3.3.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-caddy-local:3.6.0
container_name: caddy-proxy
restart: unless-stopped
extra_hosts:
@ -18,37 +17,40 @@ services:
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
env_file:
- ./meet.env
volumes:
- ./custom-layout:/var/www/custom-layout
- /etc/localtime:/etc/localtime:ro
ports:
- 5443:5443
- 6443:6443
- 7443:7443
- 7880:7880
- 9443:9443
- 9080:9080
depends_on:
setup:
condition: service_completed_successfully
redis:
image: docker.io/redis:7.4.4-alpine
platform: linux/amd64
image: docker.io/redis:8.6.1-alpine
container_name: redis
restart: unless-stopped
ports:
- 6379:6379
volumes:
- redis:/data
- /etc/localtime:/etc/localtime:ro
command: >
redis-server
--bind 0.0.0.0
--requirepass ${REDIS_PASSWORD:-}
redis-server --bind 0.0.0.0 --requirepass ${REDIS_PASSWORD:-}
depends_on:
setup:
condition: service_completed_successfully
minio:
image: docker.io/bitnami/minio:2025.5.24-debian-12-r1
platform: linux/amd64
image: docker.io/openvidu/minio:2025.10.15-debian-12-r9
restart: unless-stopped
ports:
- 9000:9000
@ -57,23 +59,25 @@ services:
- MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY:-}
- MINIO_DEFAULT_BUCKETS=openvidu-appdata
- MINIO_CONSOLE_SUBPATH=/minio-console
- MINIO_BROWSER=on
- MINIO_BROWSER_REDIRECT_URL=http://localhost:7880/minio-console
volumes:
- minio-data:/bitnami/minio/data
- minio-certs:/certs
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
mongo:
image: docker.io/bitnami/mongodb:8.0.9
platform: linux/amd64
image: docker.io/openvidu/mongodb:8.0.19-r1
container_name: mongo
restart: unless-stopped
ports:
- 27017:27017
volumes:
- mongo-data:/bitnami/mongodb
- /etc/localtime:/etc/localtime:ro
environment:
- MONGODB_ROOT_USER=${MONGO_ADMIN_USERNAME:-}
- MONGODB_ROOT_PASSWORD=${MONGO_ADMIN_PASSWORD:-}
@ -81,14 +85,12 @@ services:
- MONGODB_REPLICA_SET_MODE=primary
- MONGODB_REPLICA_SET_NAME=rs0
- MONGODB_REPLICA_SET_KEY=devreplicasetkey
- EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=${EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU:-0}
depends_on:
setup:
condition: service_completed_successfully
dashboard:
image: docker.io/openvidu/openvidu-dashboard:3.3.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-dashboard:3.6.0
container_name: dashboard
restart: unless-stopped
environment:
@ -96,18 +98,20 @@ services:
- ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
- ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- DATABASE_URL=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
volumes:
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
openvidu:
image: docker.io/openvidu/openvidu-server-pro:3.3.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-server-pro:3.6.0
restart: unless-stopped
container_name: openvidu
extra_hosts:
- host.docker.internal:host-gateway
environment:
- LAN_MODE=${LAN_MODE:-false}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- OPENVIDU_DEPLOYMENT_TYPE=local
- OPENVIDU_ENVIRONMENT=on_premise
@ -120,13 +124,13 @@ services:
volumes:
- ./livekit.yaml:/etc/livekit.yaml
- ./scripts/entrypoint_openvidu.sh:/scripts/entrypoint.sh
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
ingress:
image: docker.io/openvidu/ingress:3.3.0
platform: linux/amd64
image: docker.io/openvidu/ingress:3.6.0
container_name: ingress
restart: unless-stopped
extra_hosts:
@ -139,13 +143,13 @@ services:
- INGRESS_CONFIG_FILE=/etc/ingress.yaml
volumes:
- ./ingress.yaml:/etc/ingress.yaml
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
egress:
image: docker.io/livekit/egress:v1.9.1
platform: linux/amd64
image: docker.io/openvidu/egress:3.6.0
restart: unless-stopped
container_name: egress
extra_hosts:
@ -155,49 +159,42 @@ services:
volumes:
- ./egress.yaml:/etc/egress.yaml
- egress-data:/home/egress
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
default-app:
image: docker.io/openvidu/openvidu-call:3.3.0-demo
platform: linux/amd64
container_name: openvidu-call
openvidu-meet:
image: docker.io/openvidu/openvidu-meet:3.6.0
container_name: openvidu-meet
restart: on-failure
extra_hosts:
- host.docker.internal:host-gateway
environment:
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_MODE=${LAN_MODE:-false}
- LAN_DOMAIN=${LAN_DOMAIN:-}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- SERVER_PORT=6080
- CALL_NAME_ID=OpenViduCall-LOCAL
- LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
- CALL_PRIVATE_ACCESS=${CALL_PRIVATE_ACCESS:-false}
- CALL_USER=${CALL_USER:-}
- CALL_SECRET=${CALL_SECRET:-}
- CALL_RECORDING=${CALL_RECORDING:-}
- CALL_ADMIN_USER=${CALL_ADMIN_USER:-admin}
- CALL_ADMIN_SECRET=${CALL_ADMIN_SECRET:-admin}
- CALL_LOG_LEVEL=${CALL_LOG_LEVEL:-info}
- CALL_S3_BUCKET=${CALL_S3_BUCKET:-openvidu-appdata}
- CALL_S3_SERVICE_ENDPOINT=${CALL_S3_SERVICE_ENDPOINT:-http://minio:9000}
- CALL_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
- CALL_S3_SECRET_KEY=${MINIO_SECRET_KEY}
- CALL_AWS_REGION=${CALL_AWS_REGION:-us-east-1}
- CALL_S3_WITH_PATH_STYLE_ACCESS=${CALL_S3_WITH_PATH_STYLE_ACCESS:-true}
- MEET_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MEET_S3_SECRET_KEY=${MINIO_SECRET_KEY}
- MEET_REDIS_PASSWORD=${REDIS_PASSWORD:-}
- MEET_MONGO_URI=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
- MEET_CONFIG_DIR=/config/meet.env
volumes:
- ./scripts/entrypoint_default_app.sh:/scripts/entrypoint.sh
- ./meet.env:/config/meet.env
- ./scripts/entrypoint_openvidu_meet.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
entrypoint: /bin/sh /scripts/entrypoint.sh
depends_on:
setup:
condition: service_completed_successfully
openvidu-v2compatibility:
image: docker.io/openvidu/openvidu-v2compatibility:3.3.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-v2compatibility:3.6.0
restart: unless-stopped
container_name: openvidu-v2compatibility
entrypoint: /bin/sh /scripts/entrypoint.sh
@ -233,27 +230,33 @@ services:
- ./recordings:/opt/openvidu/recordings
- ./scripts/entrypoint_v2comp.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
ready-check:
image: docker.io/curlimages/curl:8.13.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: ready-check
restart: on-failure
volumes:
- /etc/localtime:/etc/localtime:ro
environment:
- MODE=local-ready-check
- OPENVIDU_ENVIRONMENT=local-platform
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_DOMAIN=${LAN_DOMAIN:-}
- LAN_MODE=${LAN_MODE:-false}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- DASHBOARD_ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
- DASHBOARD_ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
env_file:
- ./meet.env
depends_on:
- openvidu
- ingress
@ -261,22 +264,17 @@ services:
- dashboard
- minio
- mongo
volumes:
- ./scripts/ready-check.sh:/scripts/ready-check.sh
- ./scripts/utils.sh:/scripts/utils.sh
command: /bin/sh /scripts/ready-check.sh
operator:
image: docker.io/openvidu/openvidu-operator:3.3.0
platform: linux/amd64
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: operator
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- agents-config:/agents-config
- ./:/deployment
- /etc/localtime:/etc/localtime:ro
environment:
- PLATFORM=linux/amd64
- MODE=agent-manager-local
- DEPLOYMENT_FILES_DIR=/deployment
- AGENTS_CONFIG_DIR=/agents-config
@ -292,7 +290,6 @@ services:
condition: service_completed_successfully
setup:
image: docker.io/busybox:1.37.0
platform: linux/amd64
container_name: setup
restart: "no"
volumes:
@ -300,6 +297,7 @@ services:
- mongo-data:/mongo
- egress-data:/egress
- ./scripts/setup.sh:/scripts/setup.sh
- /etc/localtime:/etc/localtime:ro
environment:
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_MODE=${LAN_MODE:-false}

View File

@ -27,6 +27,10 @@ storage:
# account_name: your_account_name
# account_key: your_account_key
# container_name: openvidu-appdata
# gcp:
# credentials_json: |
# your_credentials_json
# bucket: openvidu-appdata
# CPU cost for each type of Egress operation.
cpu_cost:
@ -38,3 +42,20 @@ cpu_cost:
participant_cpu_cost: 0.01
track_composite_cpu_cost: 0.01
track_cpu_cost: 0.01
openvidu:
# Allocation strategy for new egress requests
# - cpuload: the node with the lowest CPU load will be selected. Distributes the CPU load evenly across all nodes.
# - binpack: some node already hosting at least one egress will be selected. Fills up nodes before assigning work to new ones.
allocation_strategy: cpuload
# Whether to use system-wide CPU monitoring or egress process CPU monitoring. This affects the allocation of new egress requests.
# It is preferable to set this value to:
# - true: when the egress service is running in a shared server also hosting other CPU-intensive services.
# - false: when the egress service is running in a dedicated server.
use_global_cpu_monitoring: true
# Disables the automatic killing of the most expensive egress when CPU is overloaded.
# The default "false" value helps keeping the node stable, but may cause unexpected egress terminations under high load.
disable_cpu_overload_killer: false
# Minimum available disk space in MB required to accept new egress requests.
# Default: 512 MB. Set to a negative value (e.g., -1) to disable disk space checking.
min_disk_space_mb: 512

View File

@ -56,7 +56,7 @@ webhook:
urls:
- http://host.docker.internal:4443/livekit/webhook # For OpenVidu 2 compatibility
- http://host.docker.internal:6080/livekit/webhook
- http://default-app:6080/livekit/webhook
- http://openvidu-meet:6080/livekit/webhook
ingress:
rtmp_base_url: rtmp://localhost:1935/rtmp
whip_base_url: http://localhost:8085/whip

36
pro/meet.env Normal file
View File

@ -0,0 +1,36 @@
# OpenVidu Meet configuration
# Static environment variables loaded via MEET_CONFIG_DIR
SERVER_PORT=6080
MEET_NAME_ID=openviduMeet-LOCAL
MEET_LOG_LEVEL=info
MEET_COOKIE_SECURE=false
MEET_INITIAL_ADMIN_USER=admin
MEET_INITIAL_ADMIN_PASSWORD=admin
MEET_INITIAL_API_KEY=meet-api-key
MEET_INITIAL_WEBHOOK_ENABLED=true
MEET_INITIAL_WEBHOOK_URL=http://host.docker.internal:6080/webhook
LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
# S3 configuration
MEET_S3_BUCKET=openvidu-appdata
MEET_S3_SUBBUCKET=openvidu-meet
MEET_S3_SERVICE_ENDPOINT=http://minio:9000
MEET_AWS_REGION=us-east-1
MEET_S3_WITH_PATH_STYLE_ACCESS=true
# Storage backend type
MEET_BLOB_STORAGE_MODE=s3
# Redis configuration
MEET_REDIS_HOST=redis
MEET_REDIS_PORT=6379
MEET_REDIS_DB=0
# MongoDB configuration
MEET_MONGO_ENABLED=true
MEET_MONGO_DB_NAME=openvidu-meet
# Enable live captions using OpenVidu Speech to Text agent
MEET_CAPTIONS_ENABLED=false

View File

@ -1,7 +1,8 @@
#!/bin/sh
set -e
if [ "$LAN_PRIVATE_IP" != "none" ]; then
if [ "$LAN_PRIVATE_IP" != "" ] && [ "$LAN_MODE" = 'true' ]; then
echo "Using as NODE_IP: $LAN_PRIVATE_IP"
export NODE_IP="$LAN_PRIVATE_IP"
fi

View File

@ -2,7 +2,7 @@
. /scripts/utils.sh
URL=$(getDeploymentUrl)
URL=$(getDeploymentUrl ws)
export LIVEKIT_URL="${URL}"
/usr/local/bin/entrypoint.sh

View File

@ -1,70 +0,0 @@
#!/bin/sh
. /scripts/utils.sh
trap 'handle_sigint' SIGINT
handle_sigint() {
echo "SIGINT signal received, exiting..."
exit 1
}
wait_for_service() {
SERVICE_NAME=$1
SERVICE_URL=$2
shift 2
EXTRA=$@
if [ -n "$EXTRA" ]; then
until curl $EXTRA $SERVICE_URL > /dev/null; do
echo "Waiting for $SERVICE_NAME to start...";
sleep 1;
done;
else
until curl --silent --head --fail $SERVICE_URL > /dev/null; do
echo "Waiting for $SERVICE_NAME to start...";
sleep 1;
done;
fi;
}
wait_for_service 'OpenVidu' 'http://openvidu:7880'
wait_for_service 'Ingress' 'http://ingress:9091'
wait_for_service 'Egress' 'http://egress:9091'
wait_for_service 'Dashboard' 'http://dashboard:5000'
wait_for_service 'Minio' 'http://minio:9000/minio/health/live'
wait_for_service 'Minio Console' 'http://minio:9001/minio-console'
wait_for_service 'Mongo' 'http://mongo:27017' --connect-timeout 10 --silent
LAN_HTTP_URL=$(getDeploymentUrl http)
LAN_WS_URL=$(getDeploymentUrl ws)
for i in $(seq 1 10); do
echo 'Starting OpenVidu... Please be patient...'
sleep 1
done;
echo ''
echo ''
echo '========================================='
echo '🎉 OpenVidu is ready! 🎉'
echo '========================================='
echo ''
echo 'OpenVidu Server & LiveKit Server URLs:'
echo ''
echo ' - From this machine:'
echo ''
echo ' - http://localhost:7880'
echo ' - ws://localhost:7880'
echo ''
echo ' - From other devices in your LAN:'
echo ''
echo " - $LAN_HTTP_URL"
echo " - $LAN_WS_URL"
echo ''
echo '========================================='
echo ''
echo 'OpenVidu Developer UI (services and passwords):'
echo ''
echo ' - http://localhost:7880'
echo " - $LAN_HTTP_URL"
echo ''
echo '========================================='

View File

@ -1,6 +1,11 @@
#!/bin/sh
if [ -z "$LAN_PRIVATE_IP" ]; then
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
exit 1
fi
if [ "$LAN_MODE" = 'true' ] && [ -z "$LAN_PRIVATE_IP" ]; then
echo '------------------------'
echo ''
echo 'LAN_PRIVATE_IP is required in the .env file.'
@ -14,7 +19,9 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
echo 'If it can'\''t be found, you can manually set it in the .env file'
echo '------------------------'
exit 1
else
fi
if [ "$LAN_MODE" = 'true' ] && [ -n "$LAN_PRIVATE_IP" ]; then
# Check if the LAN_PRIVATE_IP is reachable
if ! ping -c 1 -W 1 "$LAN_PRIVATE_IP" > /dev/null; then
echo "ERROR: LAN_PRIVATE_IP $LAN_PRIVATE_IP is not reachable"
@ -26,15 +33,16 @@ else
echo " - MacOS: ./configure_lan_private_ip_macos.sh"
echo " - Windows: .\configure_lan_private_ip_windows.bat"
echo ""
echo " If you don't want to access OpenVidu through your LAN,"
echo " you can run without LAN_MODE enabled, simply set"
echo " the following variables in the .env file:"
echo " USE_HTTPS=false"
echo " LAN_MODE=false"
echo ""
exit 1
fi
fi
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
exit 1
fi
# Prepare volumes
mkdir -p /minio/data
mkdir -p /mongo/data