Compare commits

...

123 Commits

Author SHA1 Message Date
GitHub Actions
3425341e81 Bump to version 3.6.0 2026-03-06 12:05:23 +00:00
pabloFuente
8a4923e4a8 Add sherpa.provider YAML property to agent-speech-processing.yaml 2026-03-04 13:09:53 +01:00
pabloFuente
ea27ec1f96 Minor update of YAML comment 2026-02-25 18:30:18 +01:00
cruizba
c145494bbd Change speech processing default from automatic to manual
The agent will now only connect to Rooms when explicitly requested
via the Agent Dispatch API instead of auto-connecting to all new Rooms.
2026-02-25 18:22:19 +01:00
cruizba
9bff284b8f Bump Docker image versions for Redis, Minio and MongoDB
- Redis: 8.2.2-alpine -> 8.6.1-alpine
- Minio: 2025.9.7-debian-12-r3 -> 2025.10.15-debian-12-r9
- MongoDB: 8.0.15-r0 -> 8.0.19-r1
2026-02-24 15:29:10 +01:00
pabloFuente
1724fa5c18 Add use_global_cpu_monitoring and min_disk_space_mb to egress config 2026-02-20 14:10:57 +01:00
pabloFuente
3d06d98ea0 Beautify docker-compose.yaml 2026-02-18 13:19:34 +01:00
pabloFuente
ff54026aad Default live_captions provider to vosk 2026-02-12 12:33:31 +01:00
cruizba
c778720ba5 Add meet.env file to caddy-proxy and ready-check which needs MEET_INITIAL_* env variables. 2026-02-09 20:03:23 +01:00
cruizba
0422cbd8c2 Refactor OpenVidu Meet configuration: consolidate environment variables into meet.env and update docker-compose to use new configuration structure 2026-02-09 19:49:59 +01:00
pabloFuente
7c22e68ab5 Update agent-speech-processing.yaml files 2026-02-06 10:57:12 +01:00
cruizba
1fd49f308c Add MEET_BASE_PATH environment variable to configuration files 2026-02-05 19:12:34 +01:00
pabloFuente
8fc6edaa87 Fix agent-speech-processing.yaml files sherpa model names 2026-02-04 17:04:24 +01:00
pabloFuente
803dfbbfa8 Updated agent-speech-processing.yaml files with new providers 2026-02-04 16:39:15 +01:00
cruizba
0ee45ec06f Route 9080 through caddy to redir to /meet 2026-02-02 22:00:22 +01:00
pabloFuente
2ea399dc42 Add nvidia and vosk live_captions providers to YAML 2026-01-21 11:48:59 +01:00
pabloFuente
d51a1b2cdf Update agent-speech-processing.yaml 2026-01-19 14:07:35 +01:00
cruizba
7c8908707b Revert to main version 2026-01-10 03:08:48 +01:00
Carlos Ruiz Ballesteros
2ae40b3a40
Merge pull request #13 from OpenVidu/development
Merge 3.5.0
2025-12-29 16:41:25 +01:00
cruizba
96af8554fb Bump to version 3.5.0 2025-12-29 16:39:57 +01:00
cruizba
0e1a3cfea0 Add arm64 support 2025-12-02 13:31:22 +01:00
cruizba
3ce20ad7ba Install tzdata on images and use timezone of host. Should fix https://github.com/OpenVidu/openvidu-local-deployment/issues/9 2025-11-28 23:10:04 +01:00
cruizba
5e13135101 Add MEET_MONGO_URI environment variable to OpenVidu Meet service 2025-11-25 12:45:23 +01:00
cruizba
749eaa56ec Bump Docker images 2025-10-17 21:32:54 +02:00
pabloFuente
1fb923ca05 Update agent-speech-processing.yaml 2025-10-14 12:30:00 +02:00
cruizba
5a932730ad Revert "Bump to version 3.4.1"
This reverts commit 376e4dee58308e9cbb1b81121de566cc95d99b24.
2025-10-13 20:35:35 +02:00
Carlos Ruiz Ballesteros
2f2635429f
Merge pull request #11 from OpenVidu/development
Merge 3.4.1
2025-10-13 20:35:06 +02:00
cruizba
376e4dee58 Bump to version 3.4.1 2025-10-13 20:34:25 +02:00
cruizba
1eda3c348b Revert "Bump to version 3.4.0"
This reverts commit 1234088d307664ca5a2f3ea99d7d59f31258f8d4.
2025-10-01 19:30:35 +02:00
cruizba
1234088d30 Bump to version 3.4.0 2025-10-01 19:30:05 +02:00
Carlos Ruiz Ballesteros
a9d78ea908
Merge pull request #10 from OpenVidu/development
Merge 3.4.0
2025-10-01 19:25:16 +02:00
Carlos Ruiz Ballesteros
9b0f9ec7c6
Merge branch 'main' into development 2025-10-01 19:24:28 +02:00
pabloFuente
31e6d6943e Increase values for agent-speech-processing load_threshold to 1.0 2025-09-26 17:36:48 +02:00
pabloFuente
2ec5048b84 Add new config to agent-speech-processing.yaml [load_threshold, log_level] 2025-09-26 17:34:55 +02:00
cruizba
afcf531588 Revert "Add MEET_BASE_URL"
This reverts commit 8e644bddb89d6588f27dacb7355fee1ac8ef7bb9.
2025-09-26 14:23:05 +02:00
pabloFuente
e0d70bf1ac Add new egress config [allocation_strategy, disable_cpu_overload_killer] 2025-09-26 13:17:42 +02:00
Piwccle
2b8bc05dc2 Add GCP configuration placeholders in egress.yaml files 2025-09-22 15:31:22 +02:00
cruizba
8e644bddb8 Add MEET_BASE_URL 2025-09-17 20:30:10 +02:00
juancarmore
e6c20beb03 Update MEET_INITIAL_WEBHOOK_URL to use port 6080 in docker-compose files 2025-09-11 14:04:12 +02:00
cruizba
d8903420a4 Update MongoDB image to 8.0.12-r0 and change OPENVIDU_ENVIRONMENT to local-platform 2025-09-01 02:05:38 +02:00
cruizba
44697e87ad Set OPENVIDU_ENVIRONMENT to local in docker-compose files 2025-08-27 20:30:22 +02:00
cruizba
539703ff94 Rename OpenVidu Meet configuration variables for clarity 2025-08-27 18:55:06 +02:00
cruizba
acc9df23ed Update MinIO and MongoDB images to use OpenVidu registry 2025-08-21 15:00:35 +02:00
cruizba
b60adf9ed2 Update MinIO and MongoDB images to use OpenVidu registry 2025-08-21 14:59:41 +02:00
pabloFuente
6b7729be90 Update egress from v1.9.1 to v1.10.0 2025-08-20 17:07:39 +02:00
cruizba
8f5da0fd01 Do not configure LAN_PRIVATE_IP in openvidu if LAN_MODE is not true. 2025-07-08 12:05:37 +02:00
cruizba
332e51231b Add instructions for disabling LAN_MODE in setup script 2025-07-08 11:55:00 +02:00
cruizba
412985ca3f Update ready-check service to use openvidu-operator image and set local-ready-check mode 2025-07-08 11:29:51 +02:00
cruizba
277c15b2c0 Update ready-check service to use own image. Improve output of ready-check 2025-07-07 21:32:43 +02:00
cruizba
32844c1ef2 Update OpenVidu Meet ports and webhook URLs to use 6080 internally. Expose port 9080 2025-07-07 13:58:28 +02:00
cruizba
2144d51c21 Allow LAN_MODE=false and USE_HTTPS=false without defining LAN_PRIVATE_IP 2025-07-04 22:38:38 +02:00
cruizba
fb03ffbae6 Add Meet env vars to caddy-proxy 2025-07-04 22:04:55 +02:00
cruizba
33d05574f6 Update environment configuration for OpenVidu Meet 2025-07-04 21:25:30 +02:00
cruizba
251885b802 Update entrypoint_openvidu_meet.sh to include 'ws' in getDeploymentUrl 2025-07-04 21:17:44 +02:00
cruizba
7e7de0fe33 Add openvidu-meet 2025-07-04 21:16:37 +02:00
pabloFuente
1fe72ba2af Remove spitch provider from agent-speech-processing.yaml 2025-07-02 21:50:42 +02:00
pabloFuente
a6d0a062be Updated agent-speech-processing.yaml with Spitch provider 2025-07-02 20:15:00 +02:00
cruizba
54640c9260 Revert "Bump to version 3.3.0"
This reverts commit 4bf87d6485edc518420ac55684c83a58328b3f1e.
2025-06-26 22:25:15 +02:00
Carlos Ruiz Ballesteros
1d2da2e10d
Merge pull request #8 from OpenVidu/development
Merge 3.3.0
2025-06-26 22:21:51 +02:00
cruizba
4bf87d6485 Bump to version 3.3.0 2025-06-26 22:20:38 +02:00
pabloFuente
8a268d8e65 Updated agent-speech-processing.yaml 2025-06-24 19:12:24 +02:00
pabloFuente
c692d9b86d Added azure_openai to agent-speech-processing.yaml. Fixed other providers 2025-06-19 13:38:18 +02:00
pabloFuente
32e533f892 Added sarvam STT AI provider to agent-speech-processing.yaml 2025-06-18 20:25:05 +02:00
pabloFuente
236b4779f9 Updated agent-speech-processing.yaml 2025-06-18 19:16:33 +02:00
pabloFuente
b88e1420fd Added agent-speech-processing.yaml to community and pro local deployments 2025-06-16 18:37:02 +02:00
cruizba
f16c0a8a64 Add operator service to docker-compose for agent management 2025-06-10 11:01:21 +02:00
cruizba
7970659f69 Revert "Bump to version 3.2.0"
This reverts commit 9edcb4f442fec3b1ec83d65ebc19b10d65138f31.
2025-06-04 17:21:57 +02:00
Carlos Ruiz Ballesteros
f8643a88f8
Merge pull request #7 from OpenVidu/development
Merge 3.2.0
2025-06-04 17:20:23 +02:00
cruizba
9edcb4f442 Bump to version 3.2.0 2025-06-04 17:18:44 +02:00
Piwccle
c8815935f9 Added azure config to egress.yaml in both editions 2025-06-04 11:10:52 +02:00
cruizba
8f631a0dc4 Bump Redis image version to 7.4.4 2025-06-02 20:43:47 +02:00
cruizba
f2d4c5bc4e Remove egress limits in local-deployment. 2025-05-29 19:42:15 +02:00
cruizba
a6ef487fea Remove agents operator 2025-05-29 18:17:46 +02:00
cruizba
87a7ae754a Revert redis to 7.4.3-alpine 2025-05-29 17:54:09 +02:00
cruizba
01eebf2fbc Bump Docker images for 3.2.0 2025-05-29 15:31:11 +02:00
cruizba
ec504bc234 Typo 2025-05-29 14:47:21 +02:00
cruizba
9b62254b7d Update MinIO bucket names to use 'openvidu-appdata' for consistency across configurations 2025-05-29 14:46:12 +02:00
cruizba
294d6887ad Add extra_hosts configuration for ingress service in Docker Compose files 2025-05-28 12:02:30 +02:00
cruizba
0dd80a1d28 Update CPU cost defaults in egress configuration files to prevent resource overuse 2025-04-01 18:59:17 +02:00
pabloFuente
b81f4e212b Rename YAML property speech_to_text to speech_processing 2025-03-21 12:17:57 +01:00
pabloFuente
9340e1b3a4 Rename "speech-to-text" to "speech-processing" 2025-03-21 12:07:08 +01:00
cruizba
0752a979c4 Add OpenVidu operator service to Docker Compose configurations to launch agents 2025-03-20 19:55:35 +01:00
cruizba
8f3bbf5099 Update Docker images to use 'main' tag 2025-02-17 16:49:41 +01:00
Carlos Ruiz Ballesteros
1a91afb02c
Merge pull request #6 from OpenVidu/development
3.1.0 merge
2025-02-17 16:47:48 +01:00
Carlos Ruiz Ballesteros
5014420e16
Merge branch 'main' into development 2025-02-17 16:47:06 +01:00
cruizba
b6a5ab77f4 Bump to version 3.1.0 2025-02-17 16:43:44 +01:00
cruizba
2d2804268e Bump for Redis, Minio, MongoDB, Ingress, and curl. 2025-02-10 18:13:35 +01:00
cruizba
f27a07e9a3 Update egress image to v1.9.0 and restructure egress.yaml storage configuration 2025-01-22 00:38:02 +01:00
cruizba
1fc4aecad5 Revert "Bump to 3.1.0-dev2"
This reverts commit 8b9b3cd963dec89fc00d6f7db6d2c6977e629398.
2025-01-21 20:26:33 +01:00
cruizba
8b9b3cd963 Bump to 3.1.0-dev2 2025-01-21 20:25:30 +01:00
cruizba
fdc8db4186 Update ingress image to openvidu/ingress 2025-01-20 14:03:54 +01:00
cruizba
21705e613e Revert "Bump to 3.1.0-dev1"
This reverts commit c972c6f38e5be3436f27159dc9c68da1b5cc7b4a.
2025-01-17 18:17:33 +01:00
cruizba
c972c6f38e Bump to 3.1.0-dev1 2025-01-17 18:16:32 +01:00
cruizba
79dcfeaa00 Revert "Bump to 3.1.0-beta1"
This reverts commit 79c97c1b3bfc64a9a291f64d8cbee4f947096d07.
2025-01-17 13:59:09 +01:00
cruizba
79c97c1b3b Bump to 3.1.0-beta1 2025-01-17 13:57:30 +01:00
cruizba
d4fa981bf1 Bump egress to v1.8.6 2024-12-05 20:09:32 +01:00
cruizba
aeaa053844 Bump mongodb and minio version 2024-12-05 20:07:38 +01:00
cruizba
b4abde4e59 Bump to version 3.0.0 2024-11-29 13:14:12 +01:00
cruizba
5b73604503 Merge branch 'development' 2024-11-29 13:12:32 +01:00
cruizba
69c3927653 Update custom layout to 3.0.0 2024-11-29 13:11:44 +01:00
cruizba
9f4f719252 Bump version of external services 2024-11-27 17:08:07 +01:00
cruizba
e1206bcbc0 Missing env variable 2024-11-27 16:33:03 +01:00
cruizba
a9a8c2c255 Start mongodb as replicaset 2024-11-27 16:09:58 +01:00
pabloFuente
5d9788b6a7 Update README evaluation mode information. 2024-10-18 12:56:55 +02:00
cruizba
e428094e80 Merge branch 'development' 2024-10-15 17:10:37 +02:00
pabloFuente
447d9a0599 Update egress from v1.8.0 to v1.8.2 and ingress from v1.2.0 to v1.4.2 2024-10-10 13:30:26 +02:00
pabloFuente
4929fb3c43 Fix shellcheck warning 2024-10-07 20:00:42 +02:00
cruizba
bdabf1f84b Add linux/amd64 platform to all services in docker-compose for Mac M1 compatibility 2024-10-01 18:10:21 +02:00
cruizba
c2b22ba06a Merge branch 'development' 2024-07-17 16:58:55 +02:00
Carlos Santos
ee9335d3db Fixed duplicate env vars in default app 2024-07-17 14:27:37 +02:00
cruizba
a89fd77289 Merge branch 'development' 2024-07-16 20:13:45 +02:00
cruizba
f66a78cda7 Add new OpenVidu Call env variables 2024-07-16 20:13:19 +02:00
cruizba
03a2b664f8 Bump to version 3.0.0-beta2 2024-07-16 17:52:26 +02:00
cruizba
80be205267 Refactor entrypoint_openvidu.sh; pro: set container IP as private IP.
In pro local deployment, by using `LIVEKIT_OPENVIDU_NODE_PRIVATE_IP`, the
private IP of the container will be announced as a candidate, allowing
Egress and Ingress traffic to flow through their respective containers,
to openvidu server.
2024-07-10 11:57:48 +02:00
cruizba
1530c08ef8 community: Use port_range_start/end instead of udp_port for ICE range
Use port_range_start/end instead of udp_port for ICE range also
in community edition
2024-07-08 19:18:27 +02:00
Juan Navarro
345dc5327d
Use "port_range_start/end" instead of udp_port for ICE range
mediasoup is not compatible with the UDP port multiplexing that LiveKit
uses when the `udp_port` setting is enabled, so it won't work anyways
and it's better to bt consistent between both Pion and mediasoup
engines.

Also extend the port range to 100 ports, given that now the multiplexing
feature won't be in use. 100 seems like a big enough range for local
development.
2024-07-08 18:20:47 +02:00
Carlos Ruiz Ballesteros
0d4c3df493
Merge pull request #1 from OpenVidu/v2compat_config
fixup: Added webhook endpoint in Dockerfile
2024-07-08 13:57:42 +02:00
Carlos Santos
49b720d9ec livekit.yaml: Updated webhook endpoint 2024-07-08 13:54:56 +02:00
Carlos Ruiz Ballesteros
d1577d0cf0
Merge branch 'development' into v2compat_config 2024-07-05 20:54:27 +02:00
Carlos Santos
9362e66795 fixup: Added webhook endpoint in Dockerfile 2024-07-05 13:59:09 +02:00
cruizba
53ddb5013d Update OpenVidu images to version 3.0.0-beta1 2024-06-25 18:34:06 +02:00
cruizba
42b87dc257 Use main version 2024-06-24 22:47:47 +02:00
cruizba
5a58d90aee Bump to version 3.0.0-dev6 2024-06-24 22:45:48 +02:00
27 changed files with 1281 additions and 314 deletions

View File

@ -50,7 +50,7 @@ docker compose up
## OpenVidu PRO (Evaluation Mode)
> OpenVidu PRO can be executed locally in evaluation mode for free for development and testing purposes.
> Some limits apply: maximum 2 concurrent Rooms, 8 Participants per Room, 5 minutes duration per Room.
> Some limits apply: max 8 Participants across all Rooms and max 5 minutes duration per Room.
### Install OpenVidu PRO

View File

@ -29,3 +29,6 @@ MINIO_SECRET_KEY=minioadmin
# Mongo configuration.
MONGO_ADMIN_USERNAME=mongoadmin
MONGO_ADMIN_PASSWORD=mongoadmin
# OpenVidu Meet base path
MEET_BASE_PATH=/meet

View File

@ -0,0 +1,437 @@
# Docker image of the agent.
docker_image: docker.io/openvidu/agent-speech-processing-vosk:3.6.0
# Whether to run the agent or not.
enabled: false
# Maximum CPU load threshold for the agent to accept new jobs. Value between 0 and 1.
load_threshold: 1.0
# Log level for the agent [DEBUG, INFO, WARN, ERROR, CRITICAL]
log_level: INFO
live_captions:
# How this agent will connect to Rooms [manual, automatic]
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
# - automatic: the agent will automatically connect to new Rooms.
processing: manual
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox, nvidia, elevenlabs, simplismart, vosk, sherpa]
# The custom configuration for the selected provider must be set below
provider: vosk
aws:
# Credentials for AWS Transcribe. See https://docs.aws.amazon.com/transcribe/latest/dg/what-is.html
aws_access_key_id:
aws_secret_access_key:
aws_default_region:
# See https://docs.aws.amazon.com/transcribe/latest/dg/supported-languages.html
language:
# The name of the custom vocabulary you want to use.
# See https://docs.aws.amazon.com/transcribe/latest/dg/custom-vocabulary.html
vocabulary_name:
# The name of the custom language model you want to use.
# See https://docs.aws.amazon.com/transcribe/latest/dg/custom-language-models-using.html
language_model_name:
# Whether or not to enable partial result stabilization. Partial result stabilization can reduce latency in your output, but may impact accuracy.
# See https://docs.aws.amazon.com/transcribe/latest/dg/streaming-partial-results.html#streaming-partial-result-stabilization
enable_partial_results_stabilization:
# Specify the level of stability to use when you enable partial results stabilization (enable_partial_results_stabilization: true). Valid values: high | medium | low
# See https://docs.aws.amazon.com/transcribe/latest/dg/streaming-partial-results.html#streaming-partial-result-stabilization
partial_results_stability:
# The name of the custom vocabulary filter you want to use to mask or remove words.
# See https://docs.aws.amazon.com/transcribe/latest/dg/vocabulary-filtering.html
vocab_filter_name:
# The method used to filter the vocabulary. Valid values: mask | remove | tag
# See https://docs.aws.amazon.com/transcribe/latest/dg/vocabulary-filtering.html
vocab_filter_method:
azure:
# Credentials for Azure Speech Service.
# One of these combinations must be set:
# - speech_host
# - speech_key + speech_region
# - speech_auth_token + speech_region
# See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-speech-to-text?tabs=macos%2Cterminal&pivots=programming-language-python#prerequisites
speech_host:
speech_key:
speech_auth_token:
speech_region:
# Azure handles multiple languages and can auto-detect the language used. It requires the candidate set to be set. E.g. ["en-US", "es-ES"]
# See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt#supported-languages
language:
# Removes profanity (swearing), or replaces letters of profane words with stars. Valid values: Masked | Removed | Raw
# See https://learn.microsoft.com/en-us/azure/ai-services/translator/profanity-filtering
profanity:
# List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
phrase_list:
# Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
explicit_punctuation:
azure_openai:
# Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
# Azure OpenAI API key
azure_api_key:
# Azure Active Directory token
azure_ad_token:
# Azure OpenAI endpoint in the following format: https://{your-resource-name}.openai.azure.com. Mandatory value.
azure_endpoint:
# Name of your model deployment. If given with `azure_endpoint`, sets the base client URL to include `/deployments/{azure_deployment}`.
azure_deployment:
# OpenAI REST API version used for the request. Mandatory value.
api_version:
# OpenAI organization ID.
organization:
# OpenAI project ID.
project:
# The language code to use for transcription (e.g., "en" for English).
language:
# Whether to automatically detect the language.
detect_language:
# ID of the model to use for speech-to-text.
model:
# Initial prompt to guide the transcription.
prompt:
google:
# Credentials for Google Cloud. This is the content of a Google Cloud credential JSON file.
# Below is a dummy example for a credential type of "Service Account" (https://cloud.google.com/iam/docs/service-account-creds#key-types)
credentials_info: |
{
"type": "service_account",
"project_id": "my-project",
"private_key_id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"private_key": "-----BEGIN PRIVATE KEY-----\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n-----END PRIVATE KEY-----\n",
"client_email": "my-email@my-project.iam.gserviceaccount.com",
"client_id": "xxxxxxxxxxxxxxxxxxxxx",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/my-email%40my-project.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}
# Which model to use for recognition. If not set, uses the default model for the selected language.
# See https://cloud.google.com/speech-to-text/docs/transcription-model
model:
# The location to use for recognition. Default is "us-central1". Latency will be best if the location is close to your users.
# Check supported languages and locations at https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
location:
# List of language codes to recognize. Default is ["en-US"].
# See https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
languages:
# Whether to detect the language of the audio. Default is true.
detect_language:
# If 'true', adds punctuation to recognition result hypotheses. This feature is only available in select languages. Setting this
# for requests in other languages has no effect at all. The default 'false' value does not add punctuation to result hypotheses.
# See https://cloud.google.com/speech-to-text/docs/automatic-punctuation
punctuate:
# The spoken punctuation behavior for the call. If not set, uses default behavior based on model of choice.
# e.g. command_and_search will enable spoken punctuation by default. If 'true', replaces spoken punctuation
# with the corresponding symbols in the request. For example, "how are you question mark" becomes "how are you?".
# See https://cloud.google.com/speech-to-text/docs/spoken-punctuation for support. If 'false', spoken punctuation is not replaced.
spoken_punctuation:
# Whether to return interim (non-final) transcription results. Defaults to true.
interim_results:
openai:
# API key for OpenAI. See https://platform.openai.com/api-keys
api_key:
# The OpenAI model to use for transcription. See https://platform.openai.com/docs/guides/speech-to-text
model:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Optional text prompt to guide the transcription. Only supported for whisper-1.
prompt:
groq:
# API key for Groq. See https://console.groq.com/keys
api_key:
# See https://console.groq.com/docs/speech-to-text
model:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Prompt to guide the model's style or specify how to spell unfamiliar words. 224 tokens max.
prompt:
# Base URL for the Groq API. By default "https://api.groq.com/openai/v1"
base_url:
deepgram:
# See https://console.deepgram.com/
api_key:
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.model
model:
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.language
language:
# Whether to enable automatic language detection. See https://developers.deepgram.com/docs/language-detection
detect_language: false
# Whether to return interim (non-final) transcription results. See https://developers.deepgram.com/docs/interim-results
interim_results: true
# Whether to apply smart formatting to numbers, dates, etc. See https://developers.deepgram.com/docs/smart-format
smart_format: false
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. See https://developers.deepgram.com/docs/smart-format#using-no-delay
no_delay: true
# Whether to add punctuations to the transcription. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
punctuate: true
# Whether to include filler words (um, uh, etc.) in transcription. See https://developers.deepgram.com/docs/filler-words
filler_words: true
# Whether to filter profanity from the transcription. See https://developers.deepgram.com/docs/profanity-filter
profanity_filter: false
# Whether to transcribe numbers as numerals. See https://developers.deepgram.com/docs/numerals
numerals: false
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). keywords does not work with Nova-3 models. Use keyterms instead.
# keywords:
# - [OpenVidu, 1.5]
# - [WebRTC, 1]
# List of key terms to improve recognition accuracy. keyterms is supported by Nova-3 models.
# Commented below is an example
keyterms:
# - "OpenVidu"
# - "WebRTC"
assemblyai:
# API key for AssemblyAI. See https://www.assemblyai.com/dashboard/api-keys
api_key:
# The confidence threshold (0.0 to 1.0) to use when determining if the end of a turn has been reached.
end_of_turn_confidence_threshold:
# The minimum amount of silence in milliseconds required to detect end of turn when confident.
min_end_of_turn_silence_when_confident:
# The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered.
max_turn_silence:
# Whether to return formatted final transcripts (proper punctuation, letter casing...). If enabled, formatted final transcripts are emitted shortly following an end-of-turn detection.
format_turns: true
# List of keyterms to improve recognition accuracy for specific words and phrases.
keyterms_prompt:
# - "OpenVidu"
# - "WebRTC"
fal:
# API key for fal. See https://fal.ai/dashboard/keys
api_key:
# See https://fal.ai/models/fal-ai/wizper/api#schema
language:
clova:
# Secret key issued when registering the app
api_key:
# API Gateway's unique invoke URL created in CLOVA Speech Domain.
# See https://guide.ncloud-docs.com/docs/en/clovaspeech-domain#create-domain
invoke_url:
# See https://api.ncloud-docs.com/docs/en/ai-application-service-clovaspeech-longsentence
language:
# Value between 0 and 1 indicating the threshold for the confidence score of the transcribed text. Default is 0.5.
# If the confidence score is lower than the threshold, the transcription event is not sent to the client.
# For a definition of the confidence score see https://api.ncloud-docs.com/docs/en/ai-application-service-clovaspeech-grpc
threshold:
speechmatics:
# API key for Speechmatics. See https://portal.speechmatics.com/manage-access/
api_key:
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/speech-to-text/languages#transcription-languages
language:
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/speech-to-text/languages#operating-points
operating_point:
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/speech-to-text/realtime/output#partial-transcripts
enable_partials:
# Enable speaker diarization. When enabled, the STT engine will determine and attribute words to unique speakers. The speaker_sensitivity parameter can be used to adjust the sensitivity of diarization
enable_diarization:
# RFC-5646 language code to make spelling rules more consistent in the transcription output. See https://docs.speechmatics.com/features/word-tagging#output-locale
output_locale:
# The delay in seconds between the end of a spoken word and returning the final transcript results. See https://docs.speechmatics.com/features/realtime-latency#configuration-example
max_delay:
# See https://docs.speechmatics.com/features/realtime-latency#configuration-example
max_delay_mode:
# Configuration for speaker diarization. See https://docs.speechmatics.com/features/diarization
speaker_diarization_config:
# See https://docs.speechmatics.com/features/diarization#max-speakers
max_speakers:
# See https://docs.speechmatics.com/features/diarization#speaker-sensitivity
speaker_sensitivity:
# See https://docs.speechmatics.com/features/diarization#prefer-current-speaker
prefer_current_speaker:
# Permitted punctuation marks for advanced punctuation. See https://docs.speechmatics.com/features/punctuation-settings
# Commented is an example of punctuation settings
punctuation_overrides:
# permitted_marks: [ ".", "," ]
# sensitivity: 0.4
# See https://docs.speechmatics.com/features/custom-dictionary
# Commented below is an example of a custom dictionary
additional_vocab:
# - content: financial crisis
# - content: gnocchi
# sounds_like:
# - nyohki
# - nokey
# - nochi
# - content: CEO
# sounds_like:
# - C.E.O.
gladia:
# API key for Gladia. See https://app.gladia.io/account
api_key:
# Whether to return interim (non-final) transcription results. Defaults to True
interim_results:
# List of language codes to use for recognition. Defaults to None (auto-detect). See https://docs.gladia.io/chapters/limits-and-specifications/languages
languages:
# Whether to allow switching between languages during recognition. Defaults to True
code_switching:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-audio-enhancer
pre_processing_audio_enhancer:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-speech-threshold
pre_processing_speech_threshold:
sarvam:
# API key for Sarvam. See https://dashboard.sarvam.ai/key-management
api_key:
# BCP-47 language code for supported Indian languages. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.language_code.language_code
language:
# The Sarvam STT model to use. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.model.model
model:
mistralai:
# API key for Mistral AI. See https://console.mistral.ai/api-keys
api_key:
# Name of the Voxtral STT model to use. Default to voxtral-mini-latest. See https://docs.mistral.ai/capabilities/audio/
model:
# The language code to use for transcription (e.g., "en" for English)
language:
cartesia:
# API key for Cartesia. See https://play.cartesia.ai/keys
api_key:
# The Cartesia STT model to use
model:
# The language code to use for transcription (e.g., "en" for English)
language:
soniox:
# API key for Soniox. See https://console.soniox.com/
api_key:
# Set language hints when possible to significantly improve accuracy. See: https://soniox.com/docs/stt/concepts/language-hints
language_hints:
# - "en"
# - "es"
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
context:
nvidia:
# API key for NVIDIA. See https://build.nvidia.com/explore/speech?integrate_nim=true&hosted_api=true&modal=integrate-nim
# Required when using NVIDIA's cloud services. To use a self-hosted NVIDIA Riva server setup "server" and "use_ssl" instead.
api_key:
# The NVIDIA Riva ASR model to use. Default is "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
# See available models: https://build.nvidia.com/search/models?filters=usecase%3Ausecase_speech_to_text
model:
# The NVIDIA function ID for the model. Default is "1598d209-5e27-4d3c-8079-4751568b1081"
function_id:
# Whether to add punctuation to transcription results. Default is true.
punctuate:
# The language code for transcription. Default is "en-US"
language_code:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# The NVIDIA Riva server address. Default is "grpc.nvcf.nvidia.com:443"
# For self-hosted NIM, use your server address (e.g., "localhost:50051")
server:
# Whether to use SSL for the connection. Default is true.
# Set to false for locally hosted Riva NIM services without SSL.
use_ssl:
spitch:
# API key for Spitch. See https://docs.spitch.app/keys
api_key:
# Language short code for the generated speech. For supported values, see https://docs.spitch.app/concepts/languages
language:
elevenlabs:
# API key for ElevenLabs. See https://elevenlabs.io/app/settings/api-keys
api_key:
# The ElevenLabs STT model to use. Valid values are ["scribe_v1", "scribe_v2", "scribe_v2_realtime"]. See https://elevenlabs.io/docs/overview/models#models-overview
model_id:
# An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically
language_code:
# Custom base URL for the API. Optional.
base_url:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Only supported for Scribe v1 model. Default is True
tag_audio_events:
# Whether to include word-level timestamps in the transcription. Default is false.
include_timestamps:
simplismart:
# API key for SimpliSmart. See https://docs.simplismart.ai/model-suite/settings/api-keys
api_key:
# Model identifier for the backend STT model. One of ["openai/whisper-large-v2", "openai/whisper-large-v3", "openai/whisper-large-v3-turbo"]
# Default is "openai/whisper-large-v3-turbo"
model:
# Language code for transcription (default: "en"). See https://docs.simplismart.ai/get-started/playground/transcription-models#supported-languages-with-their-codes
language:
# Operation to perform. "transcribe" converts speech to text in the original language, "translate" translates into English. Default is "transcribe".
task:
# If true, disables timestamp generation in transcripts. Default is true
without_timestamps:
# Minimum duration (ms) for a valid speech segment. Default is 0
min_speech_duration_ms:
# Decoding temperature (affects randomness). Default is 0.0
temperature:
# Whether to permit multilingual recognition. Default is false
multilingual:
vosk:
# Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
# Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
# - vosk-model-en-us-0.22-lgraph (English US)
# - vosk-model-small-cn-0.22 (Chinese)
# - vosk-model-small-de-0.15 (German)
# - vosk-model-small-en-in-0.4 (English India)
# - vosk-model-small-es-0.42 (Spanish)
# - vosk-model-small-fr-0.22 (French)
# - vosk-model-small-hi-0.22 (Hindi)
# - vosk-model-small-it-0.22 (Italian)
# - vosk-model-small-ja-0.22 (Japanese)
# - vosk-model-small-nl-0.22 (Dutch)
# - vosk-model-small-pt-0.3 (Portuguese)
# - vosk-model-small-ru-0.22 (Russian)
model: vosk-model-en-us-0.22-lgraph
# Language code for reference. It has no effect other than observability purposes.
# If a pre-installed "model" is declared, this will be set automatically if empty.
language:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Whether to override Vosk's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false
sherpa:
# sherpa streaming model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-sherpa"
# Below is the list of pre-installed models in the container (available at https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models):
# - sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06 (English)
# - sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06 (Spanish)
# - sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06 (German)
# - sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06 (French)
# - sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10 (Multilingual: Arabic, English, Indonesian, Japanese, Russian, Thai, Vietnamese, Chinese)
model: sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06
# Language code for reference. Auto-detected from model name if not set.
language:
# Runtime provider for sherpa-onnx. Supported values: "cpu" or "cuda". Default is "cpu".
# Learn about GPU acceleration at https://openvidu.io/docs/ai/live-captions/#gpu-acceleration-for-sherpa-provider
provider:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Number of threads for ONNX Runtime. Default is 2.
num_threads:
# Recognizer type ("transducer", "paraformer", "zipformer_ctc", "nemo_ctc", "t_one_ctc"). Auto-detected from model name if not set.
recognizer_type:
# Decoding method ("greedy_search", "modified_beam_search"). Default is "greedy_search".
decoding_method:
# Whether to override sherpa's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false

View File

@ -14,14 +14,4 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
fi
# Replace the LAN_PRIVATE_IP in the .env file
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env
# If sillicon mac, enable EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU flag
if [ "$(uname -m)" = "arm64" ]; then
if ! grep -q "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU" .env; then
echo "# Enable this flag to run Docker Desktop on Apple Silicon Macs" >> .env
echo "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1" >> .env
else
sed -i'' -e "s/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=.*/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1/g" .env
fi
fi
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env

View File

@ -1,6 +1,6 @@
services:
caddy-proxy:
image: docker.io/openvidu/openvidu-caddy-local:3.0.0-dev5
image: docker.io/openvidu/openvidu-caddy-local:3.6.0
container_name: caddy-proxy
restart: unless-stopped
extra_hosts:
@ -16,35 +16,40 @@ services:
- DASHBOARD_ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
env_file:
- ./meet.env
volumes:
- ./custom-layout:/var/www/custom-layout
- /etc/localtime:/etc/localtime:ro
ports:
- 5443:5443
- 6443:6443
- 7443:7443
- 7880:7880
- 9443:9443
- 9080:9080
depends_on:
setup:
condition: service_completed_successfully
redis:
image: docker.io/redis:7.2.5-alpine
image: docker.io/redis:8.6.1-alpine
container_name: redis
restart: unless-stopped
ports:
- 6379:6379
volumes:
- redis:/data
- /etc/localtime:/etc/localtime:ro
command: >
redis-server
--bind 0.0.0.0
--requirepass ${REDIS_PASSWORD:-}
redis-server --bind 0.0.0.0 --requirepass ${REDIS_PASSWORD:-}
depends_on:
setup:
condition: service_completed_successfully
minio:
image: docker.io/bitnami/minio:2024.6.13
image: docker.io/openvidu/minio:2025.10.15-debian-12-r9
container_name: minio
restart: unless-stopped
ports:
@ -52,70 +57,82 @@ services:
environment:
- MINIO_ROOT_USER=${MINIO_ACCESS_KEY:-}
- MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY:-}
- MINIO_DEFAULT_BUCKETS=openvidu
- MINIO_DEFAULT_BUCKETS=openvidu-appdata
- MINIO_CONSOLE_SUBPATH=/minio-console
- MINIO_BROWSER=on
- MINIO_BROWSER_REDIRECT_URL=http://localhost:7880/minio-console
volumes:
- minio-data:/bitnami/minio/data
- minio-certs:/certs
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
mongo:
image: docker.io/bitnami/mongodb:7.0.11
image: docker.io/openvidu/mongodb:8.0.19-r1
container_name: mongo
restart: unless-stopped
ports:
- 27017:27017
volumes:
- mongo-data:/bitnami/mongodb
- /etc/localtime:/etc/localtime:ro
environment:
- MONGODB_ROOT_USER=${MONGO_ADMIN_USERNAME:-}
- MONGODB_ROOT_PASSWORD=${MONGO_ADMIN_PASSWORD:-}
- EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=${EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU:-0}
- MONGODB_ADVERTISED_HOSTNAME=mongo
- MONGODB_REPLICA_SET_MODE=primary
- MONGODB_REPLICA_SET_NAME=rs0
- MONGODB_REPLICA_SET_KEY=devreplicasetkey
depends_on:
setup:
condition: service_completed_successfully
dashboard:
image: docker.io/openvidu/openvidu-dashboard:3.0.0-dev5
image: docker.io/openvidu/openvidu-dashboard:3.6.0
container_name: dashboard
restart: unless-stopped
environment:
- SERVER_PORT=5000
- ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
- ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- DATABASE_URL=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017
- DATABASE_URL=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
volumes:
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
openvidu:
image: docker.io/openvidu/openvidu-server:3.0.0-dev5
image: docker.io/openvidu/openvidu-server:3.6.0
restart: unless-stopped
container_name: openvidu
extra_hosts:
- host.docker.internal:host-gateway
environment:
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- LAN_MODE=${LAN_MODE:-false}
ports:
- 3478:3478/udp
- 7881:7881/tcp
- 7882-7892:7882-7892/udp
- 7900-7999:7900-7999/udp
entrypoint: /bin/sh /scripts/entrypoint.sh
command: --config /etc/livekit.yaml
volumes:
- ./livekit.yaml:/tmp/livekit.yaml
- ./livekit.yaml:/etc/livekit.yaml
- ./scripts/entrypoint_openvidu.sh:/scripts/entrypoint.sh
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
ingress:
image: docker.io/livekit/ingress:v1.2.0
image: docker.io/openvidu/ingress:3.6.0
container_name: ingress
restart: unless-stopped
extra_hosts:
- host.docker.internal:host-gateway
ports:
- 1935:1935
- 8085:8085
@ -124,12 +141,13 @@ services:
- INGRESS_CONFIG_FILE=/etc/ingress.yaml
volumes:
- ./ingress.yaml:/etc/ingress.yaml
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
egress:
image: docker.io/livekit/egress:v1.8.2
image: docker.io/openvidu/egress:3.6.0
restart: unless-stopped
container_name: egress
extra_hosts:
@ -138,52 +156,86 @@ services:
- EGRESS_CONFIG_FILE=/etc/egress.yaml
volumes:
- ./egress.yaml:/etc/egress.yaml
- egress-data:/home/egress
- egress-data:/home/egress/tmp
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
default-app:
image: docker.io/openvidu/openvidu-call:3.0.0-dev5-demo
container_name: openvidu-call
openvidu-meet:
image: docker.io/openvidu/openvidu-meet:3.6.0
container_name: openvidu-meet
restart: on-failure
extra_hosts:
- host.docker.internal:host-gateway
environment:
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_MODE=${LAN_MODE:-false}
- LAN_DOMAIN=${LAN_DOMAIN:-}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- SERVER_PORT=6080
- LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
- CALL_PRIVATE_ACCESS=DISABLED
- CALL_USER=${CALL_USER:-}
- CALL_SECRET=${CALL_SECRET:-}
- CALL_ADMIN_SECRET=${CALL_ADMIN_SECRET:-}
- CALL_RECORDING=${CALL_RECORDING:-}
- MEET_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MEET_S3_SECRET_KEY=${MINIO_SECRET_KEY}
- MEET_REDIS_PASSWORD=${REDIS_PASSWORD:-}
- MEET_MONGO_URI=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
- MEET_CONFIG_DIR=/config/meet.env
volumes:
- ./scripts/entrypoint_default_app.sh:/scripts/entrypoint.sh
- ./meet.env:/config/meet.env
- ./scripts/entrypoint_openvidu_meet.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
entrypoint: /bin/sh /scripts/entrypoint.sh
depends_on:
setup:
condition: service_completed_successfully
operator:
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: operator
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- agents-config:/agents-config
- ./:/deployment
- /etc/localtime:/etc/localtime:ro
environment:
- MODE=agent-manager-local
- DEPLOYMENT_FILES_DIR=/deployment
- AGENTS_CONFIG_DIR=/agents-config
- NETWORK_NAME=openvidu-community
- AGENTS_CONFIG_VOLUME=openvidu-agents-config
- LIVEKIT_URL=ws://openvidu:7880/
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- REDIS_ADDRESS=redis:6379
- REDIS_PASSWORD=${REDIS_PASSWORD:-}
depends_on:
setup:
condition: service_completed_successfully
ready-check:
image: docker.io/curlimages/curl:8.6.0
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: ready-check
restart: on-failure
volumes:
- /etc/localtime:/etc/localtime:ro
environment:
- MODE=local-ready-check
- OPENVIDU_ENVIRONMENT=local-platform
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_DOMAIN=${LAN_DOMAIN:-}
- LAN_MODE=${LAN_MODE:-false}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- DASHBOARD_ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
- DASHBOARD_ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
env_file:
- ./meet.env
depends_on:
- openvidu
- ingress
@ -191,13 +243,9 @@ services:
- dashboard
- minio
- mongo
volumes:
- ./scripts/ready-check.sh:/scripts/ready-check.sh
- ./scripts/utils.sh:/scripts/utils.sh
command: /bin/sh /scripts/ready-check.sh
setup:
image: docker.io/busybox:1.36.1
image: docker.io/busybox:1.37.0
container_name: setup
restart: "no"
volumes:
@ -205,6 +253,7 @@ services:
- mongo-data:/mongo
- egress-data:/egress
- ./scripts/setup.sh:/scripts/setup.sh
- /etc/localtime:/etc/localtime:ro
environment:
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_MODE=${LAN_MODE:-false}
@ -214,9 +263,21 @@ services:
command: /bin/sh /scripts/setup.sh
volumes:
agents-config:
name: openvidu-agents-config
minio-certs:
name: openvidu-minio-certs
mongodb-config:
name: openvidu-mongodb-config
redis:
name: openvidu-redis
minio-data:
name: openvidu-minio-data
mongo-data:
name: openvidu-mongo-data
egress-data:
name: openvidu-egress-data
networks:
default:
name: openvidu-community

View File

@ -10,21 +10,29 @@ ws_url: ws://openvidu:7880
health_port: 9091
# Files will be moved here when uploads fail.
backup_storage: /home/egress/backup_storage
backup:
prefix: /home/egress/backup_storage
# Storage for recordings.
s3:
access_key: minioadmin
secret: minioadmin
# Default region for minio
region: us-east-1
endpoint: http://minio:9000
bucket: openvidu
force_path_style: true
storage:
s3:
access_key: minioadmin
secret: minioadmin
# Default region for minio
region: us-east-1
endpoint: http://minio:9000
bucket: openvidu-appdata
force_path_style: true
#azure:
# account_name: your_account_name
# account_key: your_account_key
# container_name: openvidu-appdata
# gcp:
# credentials_json: |
# your_credentials_json
# bucket: openvidu-appdata
# CPU cost for each type of Egress operation.
# Defaults are set for unlimited operations.
# Change these values according to your needs.
cpu_cost:
max_cpu_utilization: 0.80
room_composite_cpu_cost: 0.01
@ -34,3 +42,20 @@ cpu_cost:
participant_cpu_cost: 0.01
track_composite_cpu_cost: 0.01
track_cpu_cost: 0.01
openvidu:
# Allocation strategy for new egress requests
# - cpuload: the node with the lowest CPU load will be selected. Distributes the CPU load evenly across all nodes.
# - binpack: some node already hosting at least one egress will be selected. Fills up nodes before assigning work to new ones.
allocation_strategy: cpuload
# Whether to use system-wide CPU monitoring or egress process CPU monitoring. This affects the allocation of new egress requests.
# It is preferable to set this value to:
# - true: when the egress service is running in a shared server also hosting other CPU-intensive services.
# - false: when the egress service is running in a dedicated server.
use_global_cpu_monitoring: true
# Disables the automatic killing of the most expensive egress when CPU is overloaded.
# The default "false" value helps keeping the node stable, but may cause unexpected egress terminations under high load.
disable_cpu_overload_killer: false
# Minimum available disk space in MB required to accept new egress requests.
# Default: 512 MB. Set to a negative value (e.g., -1) to disable disk space checking.
min_disk_space_mb: 512

View File

@ -4,7 +4,7 @@ openvidu:
enabled: true
interval: 10s
expiration: 768h # 32 days
mongo_url: mongodb://mongoadmin:mongoadmin@mongo:27017
mongo_url: mongodb://mongoadmin:mongoadmin@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
# LiveKit configuration
port: 7880
@ -12,7 +12,8 @@ bind_addresses:
- ""
rtc:
tcp_port: 7881
udp_port: 7882-7892
port_range_start: 7900
port_range_end: 7999
redis:
address: redis:6379
username: ""
@ -29,8 +30,8 @@ keys:
webhook:
api_key: devkey
urls:
- http://host.docker.internal:6080/webhook
- http://default-app:6080/livekit/webhook
- http://host.docker.internal:6080/livekit/webhook
- http://openvidu-meet:6080/livekit/webhook
ingress:
rtmp_base_url: rtmp://localhost:1935/rtmp
whip_base_url: http://localhost:8085/whip

36
community/meet.env Normal file
View File

@ -0,0 +1,36 @@
# OpenVidu Meet configuration
# Static environment variables loaded via MEET_CONFIG_DIR
SERVER_PORT=6080
MEET_NAME_ID=openviduMeet-LOCAL
MEET_LOG_LEVEL=info
MEET_COOKIE_SECURE=false
MEET_INITIAL_ADMIN_USER=admin
MEET_INITIAL_ADMIN_PASSWORD=admin
MEET_INITIAL_API_KEY=meet-api-key
MEET_INITIAL_WEBHOOK_ENABLED=true
MEET_INITIAL_WEBHOOK_URL=http://host.docker.internal:6080/webhook
LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
# S3 configuration
MEET_S3_BUCKET=openvidu-appdata
MEET_S3_SUBBUCKET=openvidu-meet
MEET_S3_SERVICE_ENDPOINT=http://minio:9000
MEET_AWS_REGION=us-east-1
MEET_S3_WITH_PATH_STYLE_ACCESS=true
# Storage backend type
MEET_BLOB_STORAGE_MODE=s3
# Redis configuration
MEET_REDIS_HOST=redis
MEET_REDIS_PORT=6379
MEET_REDIS_DB=0
# MongoDB configuration
MEET_MONGO_ENABLED=true
MEET_MONGO_DB_NAME=openvidu-meet
# Enable live captions using OpenVidu Speech to Text agent
MEET_CAPTIONS_ENABLED=false

View File

@ -1,20 +1,9 @@
#!/bin/sh
set -e
CONFIG_FILE_TMP="/tmp/livekit.yaml"
CONFIG_FILE="/etc/livekit.yaml"
LAN_PRIVATE_IP="${LAN_PRIVATE_IP:-}"
cp ${CONFIG_FILE_TMP} ${CONFIG_FILE}
if [ "$LAN_PRIVATE_IP" != "none" ]; then
if ! grep -q "^[[:space:]]*node_ip:.*" "$CONFIG_FILE"; then
if grep -q "^rtc:" "$CONFIG_FILE"; then
sed -i "/^rtc:/a \ node_ip: $LAN_PRIVATE_IP" "$CONFIG_FILE"
else
echo "rtc:" >> "$CONFIG_FILE"
echo " node_ip: $LAN_PRIVATE_IP" >> "$CONFIG_FILE"
fi
fi
if [ "$LAN_PRIVATE_IP" != "" ] && [ "$LAN_MODE" = 'true' ]; then
echo "Using as NODE_IP: $LAN_PRIVATE_IP"
export NODE_IP="$LAN_PRIVATE_IP"
fi
./livekit-server "$@"

View File

@ -2,7 +2,7 @@
. /scripts/utils.sh
URL=$(getDeploymentUrl)
URL=$(getDeploymentUrl ws)
export LIVEKIT_URL="${URL}"
/usr/local/bin/entrypoint.sh

View File

@ -1,70 +0,0 @@
#!/bin/sh
. /scripts/utils.sh
trap 'handle_sigint' SIGINT
handle_sigint() {
echo "SIGINT signal received, exiting..."
exit 1
}
wait_for_service() {
SERVICE_NAME=$1
SERVICE_URL=$2
shift 2
EXTRA=$@
if [ -n "$EXTRA" ]; then
until curl $EXTRA $SERVICE_URL > /dev/null; do
echo "Waiting for $SERVICE_NAME to start...";
sleep 1;
done;
else
until curl --silent --head --fail $SERVICE_URL > /dev/null; do
echo "Waiting for $SERVICE_NAME to start...";
sleep 1;
done;
fi;
}
wait_for_service 'OpenVidu' 'http://openvidu:7880'
wait_for_service 'Ingress' 'http://ingress:9091'
wait_for_service 'Egress' 'http://egress:9091'
wait_for_service 'Dashboard' 'http://dashboard:5000'
wait_for_service 'Minio' 'http://minio:9000/minio/health/live'
wait_for_service 'Minio Console' 'http://minio:9001/minio-console'
wait_for_service 'Mongo' 'http://mongo:27017' --connect-timeout 10 --silent
LAN_HTTP_URL=$(getDeploymentUrl http)
LAN_WS_URL=$(getDeploymentUrl ws)
for i in $(seq 1 10); do
echo 'Starting OpenVidu... Please be patient...'
sleep 1
done;
echo ''
echo ''
echo '========================================='
echo '🎉 OpenVidu is ready! 🎉'
echo '========================================='
echo ''
echo 'OpenVidu Server & LiveKit Server URLs:'
echo ''
echo ' - From this machine:'
echo ''
echo ' - http://localhost:7880'
echo ' - ws://localhost:7880'
echo ''
echo ' - From other devices in your LAN:'
echo ''
echo " - $LAN_HTTP_URL"
echo " - $LAN_WS_URL"
echo ''
echo '========================================='
echo ''
echo 'OpenVidu Developer UI (services and passwords):'
echo ''
echo ' - http://localhost:7880'
echo " - $LAN_HTTP_URL"
echo ''
echo '========================================='

View File

@ -1,6 +1,11 @@
#!/bin/sh
if [ -z "$LAN_PRIVATE_IP" ]; then
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
exit 1
fi
if [ "$LAN_MODE" = 'true' ] && [ -z "$LAN_PRIVATE_IP" ]; then
echo '------------------------'
echo ''
echo 'LAN_PRIVATE_IP is required in the .env file.'
@ -14,7 +19,9 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
echo 'If it can'\''t be found, you can manually set it in the .env file'
echo '------------------------'
exit 1
else
fi
if [ "$LAN_MODE" = 'true' ] && [ -n "$LAN_PRIVATE_IP" ]; then
# Check if the LAN_PRIVATE_IP is reachable
if ! ping -c 1 -W 1 "$LAN_PRIVATE_IP" > /dev/null; then
echo "ERROR: LAN_PRIVATE_IP $LAN_PRIVATE_IP is not reachable"
@ -26,15 +33,16 @@ else
echo " - MacOS: ./configure_lan_private_ip_macos.sh"
echo " - Windows: .\configure_lan_private_ip_windows.bat"
echo ""
echo " If you don't want to access OpenVidu through your LAN,"
echo " you can run without LAN_MODE enabled, simply set"
echo " the following variables in the .env file:"
echo " USE_HTTPS=false"
echo " LAN_MODE=false"
echo ""
exit 1
fi
fi
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
exit 1
fi
# Prepare volumes
mkdir -p /minio/data
mkdir -p /mongo/data

View File

@ -29,3 +29,6 @@ MINIO_SECRET_KEY=minioadmin
# Mongo configuration.
MONGO_ADMIN_USERNAME=mongoadmin
MONGO_ADMIN_PASSWORD=mongoadmin
# OpenVidu Meet base path
MEET_BASE_PATH=/meet

View File

@ -0,0 +1,437 @@
# Docker image of the agent.
docker_image: docker.io/openvidu/agent-speech-processing-vosk:3.6.0
# Whether to run the agent or not.
enabled: false
# Maximum CPU load threshold for the agent to accept new jobs. Value between 0 and 1.
load_threshold: 1.0
# Log level for the agent [DEBUG, INFO, WARN, ERROR, CRITICAL]
log_level: INFO
live_captions:
# How this agent will connect to Rooms [manual, automatic]
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
# - automatic: the agent will automatically connect to new Rooms.
processing: manual
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox, nvidia, elevenlabs, simplismart, vosk, sherpa]
# The custom configuration for the selected provider must be set below
provider: vosk
aws:
# Credentials for AWS Transcribe. See https://docs.aws.amazon.com/transcribe/latest/dg/what-is.html
aws_access_key_id:
aws_secret_access_key:
aws_default_region:
# See https://docs.aws.amazon.com/transcribe/latest/dg/supported-languages.html
language:
# The name of the custom vocabulary you want to use.
# See https://docs.aws.amazon.com/transcribe/latest/dg/custom-vocabulary.html
vocabulary_name:
# The name of the custom language model you want to use.
# See https://docs.aws.amazon.com/transcribe/latest/dg/custom-language-models-using.html
language_model_name:
# Whether or not to enable partial result stabilization. Partial result stabilization can reduce latency in your output, but may impact accuracy.
# See https://docs.aws.amazon.com/transcribe/latest/dg/streaming-partial-results.html#streaming-partial-result-stabilization
enable_partial_results_stabilization:
# Specify the level of stability to use when you enable partial results stabilization (enable_partial_results_stabilization: true). Valid values: high | medium | low
# See https://docs.aws.amazon.com/transcribe/latest/dg/streaming-partial-results.html#streaming-partial-result-stabilization
partial_results_stability:
# The name of the custom vocabulary filter you want to use to mask or remove words.
# See https://docs.aws.amazon.com/transcribe/latest/dg/vocabulary-filtering.html
vocab_filter_name:
# The method used to filter the vocabulary. Valid values: mask | remove | tag
# See https://docs.aws.amazon.com/transcribe/latest/dg/vocabulary-filtering.html
vocab_filter_method:
azure:
# Credentials for Azure Speech Service.
# One of these combinations must be set:
# - speech_host
# - speech_key + speech_region
# - speech_auth_token + speech_region
# See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-speech-to-text?tabs=macos%2Cterminal&pivots=programming-language-python#prerequisites
speech_host:
speech_key:
speech_auth_token:
speech_region:
# Azure handles multiple languages and can auto-detect the language used. It requires the candidate set to be set. E.g. ["en-US", "es-ES"]
# See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt#supported-languages
language:
# Removes profanity (swearing), or replaces letters of profane words with stars. Valid values: Masked | Removed | Raw
# See https://learn.microsoft.com/en-us/azure/ai-services/translator/profanity-filtering
profanity:
# List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
phrase_list:
# Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
explicit_punctuation:
azure_openai:
# Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
# Azure OpenAI API key
azure_api_key:
# Azure Active Directory token
azure_ad_token:
# Azure OpenAI endpoint in the following format: https://{your-resource-name}.openai.azure.com. Mandatory value.
azure_endpoint:
# Name of your model deployment. If given with `azure_endpoint`, sets the base client URL to include `/deployments/{azure_deployment}`.
azure_deployment:
# OpenAI REST API version used for the request. Mandatory value.
api_version:
# OpenAI organization ID.
organization:
# OpenAI project ID.
project:
# The language code to use for transcription (e.g., "en" for English).
language:
# Whether to automatically detect the language.
detect_language:
# ID of the model to use for speech-to-text.
model:
# Initial prompt to guide the transcription.
prompt:
google:
# Credentials for Google Cloud. This is the content of a Google Cloud credential JSON file.
# Below is a dummy example for a credential type of "Service Account" (https://cloud.google.com/iam/docs/service-account-creds#key-types)
credentials_info: |
{
"type": "service_account",
"project_id": "my-project",
"private_key_id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"private_key": "-----BEGIN PRIVATE KEY-----\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n-----END PRIVATE KEY-----\n",
"client_email": "my-email@my-project.iam.gserviceaccount.com",
"client_id": "xxxxxxxxxxxxxxxxxxxxx",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/my-email%40my-project.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}
# Which model to use for recognition. If not set, uses the default model for the selected language.
# See https://cloud.google.com/speech-to-text/docs/transcription-model
model:
# The location to use for recognition. Default is "us-central1". Latency will be best if the location is close to your users.
# Check supported languages and locations at https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
location:
# List of language codes to recognize. Default is ["en-US"].
# See https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
languages:
# Whether to detect the language of the audio. Default is true.
detect_language:
# If 'true', adds punctuation to recognition result hypotheses. This feature is only available in select languages. Setting this
# for requests in other languages has no effect at all. The default 'false' value does not add punctuation to result hypotheses.
# See https://cloud.google.com/speech-to-text/docs/automatic-punctuation
punctuate:
# The spoken punctuation behavior for the call. If not set, uses default behavior based on model of choice.
# e.g. command_and_search will enable spoken punctuation by default. If 'true', replaces spoken punctuation
# with the corresponding symbols in the request. For example, "how are you question mark" becomes "how are you?".
# See https://cloud.google.com/speech-to-text/docs/spoken-punctuation for support. If 'false', spoken punctuation is not replaced.
spoken_punctuation:
# Whether to return interim (non-final) transcription results. Defaults to true.
interim_results:
openai:
# API key for OpenAI. See https://platform.openai.com/api-keys
api_key:
# The OpenAI model to use for transcription. See https://platform.openai.com/docs/guides/speech-to-text
model:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Optional text prompt to guide the transcription. Only supported for whisper-1.
prompt:
groq:
# API key for Groq. See https://console.groq.com/keys
api_key:
# See https://console.groq.com/docs/speech-to-text
model:
# The language of the input audio. Supplying the input language in ISO-639-1 format
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
language:
# Whether to automatically detect the language.
detect_language:
# Prompt to guide the model's style or specify how to spell unfamiliar words. 224 tokens max.
prompt:
# Base URL for the Groq API. By default "https://api.groq.com/openai/v1"
base_url:
deepgram:
# See https://console.deepgram.com/
api_key:
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.model
model:
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.language
language:
# Whether to enable automatic language detection. See https://developers.deepgram.com/docs/language-detection
detect_language: false
# Whether to return interim (non-final) transcription results. See https://developers.deepgram.com/docs/interim-results
interim_results: true
# Whether to apply smart formatting to numbers, dates, etc. See https://developers.deepgram.com/docs/smart-format
smart_format: false
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. See https://developers.deepgram.com/docs/smart-format#using-no-delay
no_delay: true
# Whether to add punctuations to the transcription. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
punctuate: true
# Whether to include filler words (um, uh, etc.) in transcription. See https://developers.deepgram.com/docs/filler-words
filler_words: true
# Whether to filter profanity from the transcription. See https://developers.deepgram.com/docs/profanity-filter
profanity_filter: false
# Whether to transcribe numbers as numerals. See https://developers.deepgram.com/docs/numerals
numerals: false
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). keywords does not work with Nova-3 models. Use keyterms instead.
# keywords:
# - [OpenVidu, 1.5]
# - [WebRTC, 1]
# List of key terms to improve recognition accuracy. keyterms is supported by Nova-3 models.
# Commented below is an example
keyterms:
# - "OpenVidu"
# - "WebRTC"
assemblyai:
# API key for AssemblyAI. See https://www.assemblyai.com/dashboard/api-keys
api_key:
# The confidence threshold (0.0 to 1.0) to use when determining if the end of a turn has been reached.
end_of_turn_confidence_threshold:
# The minimum amount of silence in milliseconds required to detect end of turn when confident.
min_end_of_turn_silence_when_confident:
# The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered.
max_turn_silence:
# Whether to return formatted final transcripts (proper punctuation, letter casing...). If enabled, formatted final transcripts are emitted shortly following an end-of-turn detection.
format_turns: true
# List of keyterms to improve recognition accuracy for specific words and phrases.
keyterms_prompt:
# - "OpenVidu"
# - "WebRTC"
fal:
# API key for fal. See https://fal.ai/dashboard/keys
api_key:
# See https://fal.ai/models/fal-ai/wizper/api#schema
language:
clova:
# Secret key issued when registering the app
api_key:
# API Gateway's unique invoke URL created in CLOVA Speech Domain.
# See https://guide.ncloud-docs.com/docs/en/clovaspeech-domain#create-domain
invoke_url:
# See https://api.ncloud-docs.com/docs/en/ai-application-service-clovaspeech-longsentence
language:
# Value between 0 and 1 indicating the threshold for the confidence score of the transcribed text. Default is 0.5.
# If the confidence score is lower than the threshold, the transcription event is not sent to the client.
# For a definition of the confidence score see https://api.ncloud-docs.com/docs/en/ai-application-service-clovaspeech-grpc
threshold:
speechmatics:
# API key for Speechmatics. See https://portal.speechmatics.com/manage-access/
api_key:
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/speech-to-text/languages#transcription-languages
language:
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/speech-to-text/languages#operating-points
operating_point:
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/speech-to-text/realtime/output#partial-transcripts
enable_partials:
# Enable speaker diarization. When enabled, the STT engine will determine and attribute words to unique speakers. The speaker_sensitivity parameter can be used to adjust the sensitivity of diarization
enable_diarization:
# RFC-5646 language code to make spelling rules more consistent in the transcription output. See https://docs.speechmatics.com/features/word-tagging#output-locale
output_locale:
# The delay in seconds between the end of a spoken word and returning the final transcript results. See https://docs.speechmatics.com/features/realtime-latency#configuration-example
max_delay:
# See https://docs.speechmatics.com/features/realtime-latency#configuration-example
max_delay_mode:
# Configuration for speaker diarization. See https://docs.speechmatics.com/features/diarization
speaker_diarization_config:
# See https://docs.speechmatics.com/features/diarization#max-speakers
max_speakers:
# See https://docs.speechmatics.com/features/diarization#speaker-sensitivity
speaker_sensitivity:
# See https://docs.speechmatics.com/features/diarization#prefer-current-speaker
prefer_current_speaker:
# Permitted punctuation marks for advanced punctuation. See https://docs.speechmatics.com/features/punctuation-settings
# Commented is an example of punctuation settings
punctuation_overrides:
# permitted_marks: [ ".", "," ]
# sensitivity: 0.4
# See https://docs.speechmatics.com/features/custom-dictionary
# Commented below is an example of a custom dictionary
additional_vocab:
# - content: financial crisis
# - content: gnocchi
# sounds_like:
# - nyohki
# - nokey
# - nochi
# - content: CEO
# sounds_like:
# - C.E.O.
gladia:
# API key for Gladia. See https://app.gladia.io/account
api_key:
# Whether to return interim (non-final) transcription results. Defaults to True
interim_results:
# List of language codes to use for recognition. Defaults to None (auto-detect). See https://docs.gladia.io/chapters/limits-and-specifications/languages
languages:
# Whether to allow switching between languages during recognition. Defaults to True
code_switching:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-audio-enhancer
pre_processing_audio_enhancer:
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-speech-threshold
pre_processing_speech_threshold:
sarvam:
# API key for Sarvam. See https://dashboard.sarvam.ai/key-management
api_key:
# BCP-47 language code for supported Indian languages. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.language_code.language_code
language:
# The Sarvam STT model to use. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.model.model
model:
mistralai:
# API key for Mistral AI. See https://console.mistral.ai/api-keys
api_key:
# Name of the Voxtral STT model to use. Default to voxtral-mini-latest. See https://docs.mistral.ai/capabilities/audio/
model:
# The language code to use for transcription (e.g., "en" for English)
language:
cartesia:
# API key for Cartesia. See https://play.cartesia.ai/keys
api_key:
# The Cartesia STT model to use
model:
# The language code to use for transcription (e.g., "en" for English)
language:
soniox:
# API key for Soniox. See https://console.soniox.com/
api_key:
# Set language hints when possible to significantly improve accuracy. See: https://soniox.com/docs/stt/concepts/language-hints
language_hints:
# - "en"
# - "es"
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
context:
nvidia:
# API key for NVIDIA. See https://build.nvidia.com/explore/speech?integrate_nim=true&hosted_api=true&modal=integrate-nim
# Required when using NVIDIA's cloud services. To use a self-hosted NVIDIA Riva server setup "server" and "use_ssl" instead.
api_key:
# The NVIDIA Riva ASR model to use. Default is "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
# See available models: https://build.nvidia.com/search/models?filters=usecase%3Ausecase_speech_to_text
model:
# The NVIDIA function ID for the model. Default is "1598d209-5e27-4d3c-8079-4751568b1081"
function_id:
# Whether to add punctuation to transcription results. Default is true.
punctuate:
# The language code for transcription. Default is "en-US"
language_code:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# The NVIDIA Riva server address. Default is "grpc.nvcf.nvidia.com:443"
# For self-hosted NIM, use your server address (e.g., "localhost:50051")
server:
# Whether to use SSL for the connection. Default is true.
# Set to false for locally hosted Riva NIM services without SSL.
use_ssl:
spitch:
# API key for Spitch. See https://docs.spitch.app/keys
api_key:
# Language short code for the generated speech. For supported values, see https://docs.spitch.app/concepts/languages
language:
elevenlabs:
# API key for ElevenLabs. See https://elevenlabs.io/app/settings/api-keys
api_key:
# The ElevenLabs STT model to use. Valid values are ["scribe_v1", "scribe_v2", "scribe_v2_realtime"]. See https://elevenlabs.io/docs/overview/models#models-overview
model_id:
# An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically
language_code:
# Custom base URL for the API. Optional.
base_url:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Only supported for Scribe v1 model. Default is True
tag_audio_events:
# Whether to include word-level timestamps in the transcription. Default is false.
include_timestamps:
simplismart:
# API key for SimpliSmart. See https://docs.simplismart.ai/model-suite/settings/api-keys
api_key:
# Model identifier for the backend STT model. One of ["openai/whisper-large-v2", "openai/whisper-large-v3", "openai/whisper-large-v3-turbo"]
# Default is "openai/whisper-large-v3-turbo"
model:
# Language code for transcription (default: "en"). See https://docs.simplismart.ai/get-started/playground/transcription-models#supported-languages-with-their-codes
language:
# Operation to perform. "transcribe" converts speech to text in the original language, "translate" translates into English. Default is "transcribe".
task:
# If true, disables timestamp generation in transcripts. Default is true
without_timestamps:
# Minimum duration (ms) for a valid speech segment. Default is 0
min_speech_duration_ms:
# Decoding temperature (affects randomness). Default is 0.0
temperature:
# Whether to permit multilingual recognition. Default is false
multilingual:
vosk:
# Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
# Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
# - vosk-model-en-us-0.22-lgraph (English US)
# - vosk-model-small-cn-0.22 (Chinese)
# - vosk-model-small-de-0.15 (German)
# - vosk-model-small-en-in-0.4 (English India)
# - vosk-model-small-es-0.42 (Spanish)
# - vosk-model-small-fr-0.22 (French)
# - vosk-model-small-hi-0.22 (Hindi)
# - vosk-model-small-it-0.22 (Italian)
# - vosk-model-small-ja-0.22 (Japanese)
# - vosk-model-small-nl-0.22 (Dutch)
# - vosk-model-small-pt-0.3 (Portuguese)
# - vosk-model-small-ru-0.22 (Russian)
model: vosk-model-en-us-0.22-lgraph
# Language code for reference. It has no effect other than observability purposes.
# If a pre-installed "model" is declared, this will be set automatically if empty.
language:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Whether to override Vosk's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false
sherpa:
# sherpa streaming model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-sherpa"
# Below is the list of pre-installed models in the container (available at https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models):
# - sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06 (English)
# - sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06 (Spanish)
# - sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06 (German)
# - sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06 (French)
# - sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10 (Multilingual: Arabic, English, Indonesian, Japanese, Russian, Thai, Vietnamese, Chinese)
model: sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06
# Language code for reference. Auto-detected from model name if not set.
language:
# Runtime provider for sherpa-onnx. Supported values: "cpu" or "cuda". Default is "cpu".
# Learn about GPU acceleration at https://openvidu.io/docs/ai/live-captions/#gpu-acceleration-for-sherpa-provider
provider:
# Audio sample rate in Hz. Default is 16000.
sample_rate:
# Whether to return interim/partial results during recognition. Default is true.
partial_results:
# Number of threads for ONNX Runtime. Default is 2.
num_threads:
# Recognizer type ("transducer", "paraformer", "zipformer_ctc", "nemo_ctc", "t_one_ctc"). Auto-detected from model name if not set.
recognizer_type:
# Decoding method ("greedy_search", "modified_beam_search"). Default is "greedy_search".
decoding_method:
# Whether to override sherpa's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
use_silero_vad: false

View File

@ -14,14 +14,4 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
fi
# Replace the LAN_PRIVATE_IP in the .env file
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env
# If sillicon mac, enable EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU flag
if [ "$(uname -m)" = "arm64" ]; then
if ! grep -q "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU" .env; then
echo "# Enable this flag to run Docker Desktop on Apple Silicon Macs" >> .env
echo "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1" >> .env
else
sed -i'' -e "s/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=.*/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1/g" .env
fi
fi
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env

View File

@ -5,7 +5,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>OpenVidu Layout</title>
<script src="openvidu-browser-3.0.0-dev2.min.js"></script>
<script src="openvidu-browser-v2compatibility-3.0.0.min.js"></script>
<script src="opentok-layout.min.js"></script>
<style>
body {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
services:
caddy-proxy:
image: docker.io/openvidu/openvidu-caddy-local:3.0.0-dev5
image: docker.io/openvidu/openvidu-caddy-local:3.6.0
container_name: caddy-proxy
restart: unless-stopped
extra_hosts:
@ -17,107 +17,124 @@ services:
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
env_file:
- ./meet.env
volumes:
- ./custom-layout:/var/www/custom-layout
- /etc/localtime:/etc/localtime:ro
ports:
- 5443:5443
- 6443:6443
- 7443:7443
- 7880:7880
- 9443:9443
- 9080:9080
depends_on:
setup:
condition: service_completed_successfully
redis:
image: docker.io/redis:7.2.5-alpine
image: docker.io/redis:8.6.1-alpine
container_name: redis
restart: unless-stopped
ports:
- 6379:6379
volumes:
- redis:/data
- /etc/localtime:/etc/localtime:ro
command: >
redis-server
--bind 0.0.0.0
--requirepass ${REDIS_PASSWORD:-}
redis-server --bind 0.0.0.0 --requirepass ${REDIS_PASSWORD:-}
depends_on:
setup:
condition: service_completed_successfully
minio:
image: docker.io/bitnami/minio:2024.6.13
image: docker.io/openvidu/minio:2025.10.15-debian-12-r9
restart: unless-stopped
ports:
- 9000:9000
environment:
- MINIO_ROOT_USER=${MINIO_ACCESS_KEY:-}
- MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY:-}
- MINIO_DEFAULT_BUCKETS=openvidu
- MINIO_DEFAULT_BUCKETS=openvidu-appdata
- MINIO_CONSOLE_SUBPATH=/minio-console
- MINIO_BROWSER=on
- MINIO_BROWSER_REDIRECT_URL=http://localhost:7880/minio-console
volumes:
- minio-data:/bitnami/minio/data
- minio-certs:/certs
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
mongo:
image: docker.io/bitnami/mongodb:7.0.11
image: docker.io/openvidu/mongodb:8.0.19-r1
container_name: mongo
restart: unless-stopped
ports:
- 27017:27017
volumes:
- mongo-data:/bitnami/mongodb
- /etc/localtime:/etc/localtime:ro
environment:
- MONGODB_ROOT_USER=${MONGO_ADMIN_USERNAME:-}
- MONGODB_ROOT_PASSWORD=${MONGO_ADMIN_PASSWORD:-}
- EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=${EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU:-0}
- MONGODB_ADVERTISED_HOSTNAME=mongo
- MONGODB_REPLICA_SET_MODE=primary
- MONGODB_REPLICA_SET_NAME=rs0
- MONGODB_REPLICA_SET_KEY=devreplicasetkey
depends_on:
setup:
condition: service_completed_successfully
dashboard:
image: docker.io/openvidu/openvidu-dashboard:3.0.0-dev5
image: docker.io/openvidu/openvidu-dashboard:3.6.0
container_name: dashboard
restart: unless-stopped
environment:
- SERVER_PORT=5000
- ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
- ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- DATABASE_URL=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017
- DATABASE_URL=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
volumes:
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
openvidu:
image: docker.io/openvidu/openvidu-server-pro:3.0.0-dev5
image: docker.io/openvidu/openvidu-server-pro:3.6.0
restart: unless-stopped
container_name: openvidu
extra_hosts:
- host.docker.internal:host-gateway
environment:
- LAN_MODE=${LAN_MODE:-false}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- OPENVIDU_DEPLOYMENT_TYPE=local
- OPENVIDU_ENVIRONMENT=on_premise
ports:
- 3478:3478/udp
- 7881:7881/tcp
- 7882-7892:7882-7892/udp
- 7900-7999:7900-7999/udp
entrypoint: /bin/sh /scripts/entrypoint.sh
command: --config /etc/livekit.yaml
volumes:
- ./livekit.yaml:/tmp/livekit.yaml
- ./livekit.yaml:/etc/livekit.yaml
- ./scripts/entrypoint_openvidu.sh:/scripts/entrypoint.sh
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
ingress:
image: docker.io/livekit/ingress:v1.2.0
image: docker.io/openvidu/ingress:3.6.0
container_name: ingress
restart: unless-stopped
extra_hosts:
- host.docker.internal:host-gateway
ports:
- 1935:1935
- 8085:8085
@ -126,12 +143,13 @@ services:
- INGRESS_CONFIG_FILE=/etc/ingress.yaml
volumes:
- ./ingress.yaml:/etc/ingress.yaml
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
egress:
image: docker.io/livekit/egress:v1.8.2
image: docker.io/openvidu/egress:3.6.0
restart: unless-stopped
container_name: egress
extra_hosts:
@ -141,41 +159,47 @@ services:
volumes:
- ./egress.yaml:/etc/egress.yaml
- egress-data:/home/egress
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
default-app:
image: docker.io/openvidu/openvidu-call:3.0.0-dev5-demo
container_name: openvidu-call
openvidu-meet:
image: docker.io/openvidu/openvidu-meet:3.6.0
container_name: openvidu-meet
restart: on-failure
extra_hosts:
- host.docker.internal:host-gateway
environment:
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_MODE=${LAN_MODE:-false}
- LAN_DOMAIN=${LAN_DOMAIN:-}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- SERVER_PORT=6080
- LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
- CALL_PRIVATE_ACCESS=DISABLED
- CALL_USER=${CALL_USER:-}
- CALL_SECRET=${CALL_SECRET:-}
- CALL_ADMIN_SECRET=${CALL_ADMIN_SECRET:-}
- CALL_RECORDING=${CALL_RECORDING:-}
- MEET_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MEET_S3_SECRET_KEY=${MINIO_SECRET_KEY}
- MEET_REDIS_PASSWORD=${REDIS_PASSWORD:-}
- MEET_MONGO_URI=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
- MEET_CONFIG_DIR=/config/meet.env
volumes:
- ./scripts/entrypoint_default_app.sh:/scripts/entrypoint.sh
- ./meet.env:/config/meet.env
- ./scripts/entrypoint_openvidu_meet.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
entrypoint: /bin/sh /scripts/entrypoint.sh
depends_on:
setup:
condition: service_completed_successfully
openvidu-v2compatibility:
image: docker.io/openvidu/openvidu-v2compatibility:3.0.0-dev5
image: docker.io/openvidu/openvidu-v2compatibility:3.6.0
restart: unless-stopped
container_name: openvidu-v2compatibility
entrypoint: /bin/sh /scripts/entrypoint.sh
extra_hosts:
- host.docker.internal:host-gateway
ports:
- 4443:4443
environment:
@ -190,7 +214,7 @@ services:
- V2COMPAT_LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- V2COMPAT_OPENVIDU_RECORDING_PATH=/opt/openvidu/recordings
- V2COMPAT_OPENVIDU_PRO_RECORDING_STORAGE=local
- V2COMPAT_OPENVIDU_PRO_AWS_S3_BUCKET=openvidu
- V2COMPAT_OPENVIDU_PRO_AWS_S3_BUCKET=openvidu-appdata
- V2COMPAT_OPENVIDU_PRO_AWS_S3_SERVICE_ENDPOINT=http://minio:9000
- V2COMPAT_OPENVIDU_PRO_AWS_REGION=us-east-1
- V2COMPAT_OPENVIDU_PRO_AWS_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
@ -200,31 +224,39 @@ services:
- V2COMPAT_REDIS_PASSWORD=${REDIS_PASSWORD:-}
- V2COMPAT_REDIS_DB=0
- V2COMPAT_OPENVIDU_WEBHOOK=false
- V2COMPAT_OPENVIDU_WEBHOOK_ENDPOINT=http://host.docker.internal:7777/webhook
- OPENVIDU_DEPLOYMENT_TYPE=local
volumes:
- ./recordings:/opt/openvidu/recordings
- ./scripts/entrypoint_v2comp.sh:/scripts/entrypoint.sh
- ./scripts/utils.sh:/scripts/utils.sh
- /etc/localtime:/etc/localtime:ro
depends_on:
setup:
condition: service_completed_successfully
ready-check:
image: docker.io/curlimages/curl:8.6.0
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: ready-check
restart: on-failure
volumes:
- /etc/localtime:/etc/localtime:ro
environment:
- MODE=local-ready-check
- OPENVIDU_ENVIRONMENT=local-platform
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_DOMAIN=${LAN_DOMAIN:-}
- LAN_MODE=${LAN_MODE:-false}
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- DASHBOARD_ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
- DASHBOARD_ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
env_file:
- ./meet.env
depends_on:
- openvidu
- ingress
@ -232,13 +264,32 @@ services:
- dashboard
- minio
- mongo
volumes:
- ./scripts/ready-check.sh:/scripts/ready-check.sh
- ./scripts/utils.sh:/scripts/utils.sh
command: /bin/sh /scripts/ready-check.sh
operator:
image: docker.io/openvidu/openvidu-operator:3.6.0
container_name: operator
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- agents-config:/agents-config
- ./:/deployment
- /etc/localtime:/etc/localtime:ro
environment:
- MODE=agent-manager-local
- DEPLOYMENT_FILES_DIR=/deployment
- AGENTS_CONFIG_DIR=/agents-config
- NETWORK_NAME=openvidu-pro
- AGENTS_CONFIG_VOLUME=openvidu-pro-agents-config
- LIVEKIT_URL=ws://openvidu:7880/
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
- REDIS_ADDRESS=redis:6379
- REDIS_PASSWORD=${REDIS_PASSWORD:-}
depends_on:
setup:
condition: service_completed_successfully
setup:
image: docker.io/busybox:1.36.1
image: docker.io/busybox:1.37.0
container_name: setup
restart: "no"
volumes:
@ -246,6 +297,7 @@ services:
- mongo-data:/mongo
- egress-data:/egress
- ./scripts/setup.sh:/scripts/setup.sh
- /etc/localtime:/etc/localtime:ro
environment:
- USE_HTTPS=${USE_HTTPS:-false}
- LAN_MODE=${LAN_MODE:-false}
@ -255,9 +307,21 @@ services:
command: /bin/sh /scripts/setup.sh
volumes:
agents-config:
name: openvidu-pro-agents-config
minio-certs:
name: openvidu-pro-minio-certs
mongodb-config:
name: openvidu-pro-mongodb-config
redis:
name: openvidu-pro-redis
minio-data:
name: openvidu-pro-minio-data
mongo-data:
name: openvidu-pro-mongo-data
egress-data:
name: openvidu-pro-egress-data
networks:
default:
name: openvidu-pro

View File

@ -10,21 +10,29 @@ ws_url: ws://openvidu:7880
health_port: 9091
# Files will be moved here when uploads fail.
backup_storage: /home/egress/backup_storage
backup:
prefix: /home/egress/backup_storage
# Storage for recordings.
s3:
access_key: minioadmin
secret: minioadmin
# Default region for minio
region: us-east-1
endpoint: http://minio:9000
bucket: openvidu
force_path_style: true
storage:
s3:
access_key: minioadmin
secret: minioadmin
# Default region for minio
region: us-east-1
endpoint: http://minio:9000
bucket: openvidu-appdata
force_path_style: true
#azure:
# account_name: your_account_name
# account_key: your_account_key
# container_name: openvidu-appdata
# gcp:
# credentials_json: |
# your_credentials_json
# bucket: openvidu-appdata
# CPU cost for each type of Egress operation.
# Defaults are set for unlimited operations.
# Change these values according to your needs.
cpu_cost:
max_cpu_utilization: 0.80
room_composite_cpu_cost: 0.01
@ -34,3 +42,20 @@ cpu_cost:
participant_cpu_cost: 0.01
track_composite_cpu_cost: 0.01
track_cpu_cost: 0.01
openvidu:
# Allocation strategy for new egress requests
# - cpuload: the node with the lowest CPU load will be selected. Distributes the CPU load evenly across all nodes.
# - binpack: some node already hosting at least one egress will be selected. Fills up nodes before assigning work to new ones.
allocation_strategy: cpuload
# Whether to use system-wide CPU monitoring or egress process CPU monitoring. This affects the allocation of new egress requests.
# It is preferable to set this value to:
# - true: when the egress service is running in a shared server also hosting other CPU-intensive services.
# - false: when the egress service is running in a dedicated server.
use_global_cpu_monitoring: true
# Disables the automatic killing of the most expensive egress when CPU is overloaded.
# The default "false" value helps keeping the node stable, but may cause unexpected egress terminations under high load.
disable_cpu_overload_killer: false
# Minimum available disk space in MB required to accept new egress requests.
# Default: 512 MB. Set to a negative value (e.g., -1) to disable disk space checking.
min_disk_space_mb: 512

View File

@ -4,7 +4,7 @@ openvidu:
enabled: true
interval: 10s
expiration: 768h # 32 days
mongo_url: mongodb://mongoadmin:mongoadmin@mongo:27017
mongo_url: mongodb://mongoadmin:mongoadmin@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
rtc:
# WebRTC engine selection
# Values: pion, mediasoup
@ -36,7 +36,8 @@ bind_addresses:
- ""
rtc:
tcp_port: 7881
udp_port: 7882-7892
port_range_start: 7900
port_range_end: 7999
redis:
address: redis:6379
username: ""
@ -54,8 +55,8 @@ webhook:
api_key: devkey
urls:
- http://host.docker.internal:4443/livekit/webhook # For OpenVidu 2 compatibility
- http://host.docker.internal:6080/webhook
- http://default-app:6080/livekit/webhook
- http://host.docker.internal:6080/livekit/webhook
- http://openvidu-meet:6080/livekit/webhook
ingress:
rtmp_base_url: rtmp://localhost:1935/rtmp
whip_base_url: http://localhost:8085/whip

36
pro/meet.env Normal file
View File

@ -0,0 +1,36 @@
# OpenVidu Meet configuration
# Static environment variables loaded via MEET_CONFIG_DIR
SERVER_PORT=6080
MEET_NAME_ID=openviduMeet-LOCAL
MEET_LOG_LEVEL=info
MEET_COOKIE_SECURE=false
MEET_INITIAL_ADMIN_USER=admin
MEET_INITIAL_ADMIN_PASSWORD=admin
MEET_INITIAL_API_KEY=meet-api-key
MEET_INITIAL_WEBHOOK_ENABLED=true
MEET_INITIAL_WEBHOOK_URL=http://host.docker.internal:6080/webhook
LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
# S3 configuration
MEET_S3_BUCKET=openvidu-appdata
MEET_S3_SUBBUCKET=openvidu-meet
MEET_S3_SERVICE_ENDPOINT=http://minio:9000
MEET_AWS_REGION=us-east-1
MEET_S3_WITH_PATH_STYLE_ACCESS=true
# Storage backend type
MEET_BLOB_STORAGE_MODE=s3
# Redis configuration
MEET_REDIS_HOST=redis
MEET_REDIS_PORT=6379
MEET_REDIS_DB=0
# MongoDB configuration
MEET_MONGO_ENABLED=true
MEET_MONGO_DB_NAME=openvidu-meet
# Enable live captions using OpenVidu Speech to Text agent
MEET_CAPTIONS_ENABLED=false

View File

@ -1,20 +1,13 @@
#!/bin/sh
set -e
CONFIG_FILE_TMP="/tmp/livekit.yaml"
CONFIG_FILE="/etc/livekit.yaml"
LAN_PRIVATE_IP="${LAN_PRIVATE_IP:-}"
cp ${CONFIG_FILE_TMP} ${CONFIG_FILE}
if [ "$LAN_PRIVATE_IP" != "none" ]; then
if ! grep -q "^[[:space:]]*node_ip:.*" "$CONFIG_FILE"; then
if grep -q "^rtc:" "$CONFIG_FILE"; then
sed -i "/^rtc:/a \ node_ip: $LAN_PRIVATE_IP" "$CONFIG_FILE"
else
echo "rtc:" >> "$CONFIG_FILE"
echo " node_ip: $LAN_PRIVATE_IP" >> "$CONFIG_FILE"
fi
fi
if [ "$LAN_PRIVATE_IP" != "" ] && [ "$LAN_MODE" = 'true' ]; then
echo "Using as NODE_IP: $LAN_PRIVATE_IP"
export NODE_IP="$LAN_PRIVATE_IP"
fi
# Configure container private IP as node private IP
LIVEKIT_OPENVIDU_NODE_PRIVATE_IP="$(hostname -i)"
export LIVEKIT_OPENVIDU_NODE_PRIVATE_IP
./livekit-server "$@"

View File

@ -2,7 +2,7 @@
. /scripts/utils.sh
URL=$(getDeploymentUrl)
URL=$(getDeploymentUrl ws)
export LIVEKIT_URL="${URL}"
/usr/local/bin/entrypoint.sh

View File

@ -1,70 +0,0 @@
#!/bin/sh
. /scripts/utils.sh
trap 'handle_sigint' SIGINT
handle_sigint() {
echo "SIGINT signal received, exiting..."
exit 1
}
wait_for_service() {
SERVICE_NAME=$1
SERVICE_URL=$2
shift 2
EXTRA=$@
if [ -n "$EXTRA" ]; then
until curl $EXTRA $SERVICE_URL > /dev/null; do
echo "Waiting for $SERVICE_NAME to start...";
sleep 1;
done;
else
until curl --silent --head --fail $SERVICE_URL > /dev/null; do
echo "Waiting for $SERVICE_NAME to start...";
sleep 1;
done;
fi;
}
wait_for_service 'OpenVidu' 'http://openvidu:7880'
wait_for_service 'Ingress' 'http://ingress:9091'
wait_for_service 'Egress' 'http://egress:9091'
wait_for_service 'Dashboard' 'http://dashboard:5000'
wait_for_service 'Minio' 'http://minio:9000/minio/health/live'
wait_for_service 'Minio Console' 'http://minio:9001/minio-console'
wait_for_service 'Mongo' 'http://mongo:27017' --connect-timeout 10 --silent
LAN_HTTP_URL=$(getDeploymentUrl http)
LAN_WS_URL=$(getDeploymentUrl ws)
for i in $(seq 1 10); do
echo 'Starting OpenVidu... Please be patient...'
sleep 1
done;
echo ''
echo ''
echo '========================================='
echo '🎉 OpenVidu is ready! 🎉'
echo '========================================='
echo ''
echo 'OpenVidu Server & LiveKit Server URLs:'
echo ''
echo ' - From this machine:'
echo ''
echo ' - http://localhost:7880'
echo ' - ws://localhost:7880'
echo ''
echo ' - From other devices in your LAN:'
echo ''
echo " - $LAN_HTTP_URL"
echo " - $LAN_WS_URL"
echo ''
echo '========================================='
echo ''
echo 'OpenVidu Developer UI (services and passwords):'
echo ''
echo ' - http://localhost:7880'
echo " - $LAN_HTTP_URL"
echo ''
echo '========================================='

View File

@ -1,6 +1,11 @@
#!/bin/sh
if [ -z "$LAN_PRIVATE_IP" ]; then
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
exit 1
fi
if [ "$LAN_MODE" = 'true' ] && [ -z "$LAN_PRIVATE_IP" ]; then
echo '------------------------'
echo ''
echo 'LAN_PRIVATE_IP is required in the .env file.'
@ -14,7 +19,9 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
echo 'If it can'\''t be found, you can manually set it in the .env file'
echo '------------------------'
exit 1
else
fi
if [ "$LAN_MODE" = 'true' ] && [ -n "$LAN_PRIVATE_IP" ]; then
# Check if the LAN_PRIVATE_IP is reachable
if ! ping -c 1 -W 1 "$LAN_PRIVATE_IP" > /dev/null; then
echo "ERROR: LAN_PRIVATE_IP $LAN_PRIVATE_IP is not reachable"
@ -26,15 +33,16 @@ else
echo " - MacOS: ./configure_lan_private_ip_macos.sh"
echo " - Windows: .\configure_lan_private_ip_windows.bat"
echo ""
echo " If you don't want to access OpenVidu through your LAN,"
echo " you can run without LAN_MODE enabled, simply set"
echo " the following variables in the .env file:"
echo " USE_HTTPS=false"
echo " LAN_MODE=false"
echo ""
exit 1
fi
fi
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
exit 1
fi
# Prepare volumes
mkdir -p /minio/data
mkdir -p /mongo/data