Compare commits
67 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3425341e81 | ||
|
|
8a4923e4a8 | ||
|
|
ea27ec1f96 | ||
|
|
c145494bbd | ||
|
|
9bff284b8f | ||
|
|
1724fa5c18 | ||
|
|
3d06d98ea0 | ||
|
|
ff54026aad | ||
|
|
c778720ba5 | ||
|
|
0422cbd8c2 | ||
|
|
7c22e68ab5 | ||
|
|
1fd49f308c | ||
|
|
8fc6edaa87 | ||
|
|
803dfbbfa8 | ||
|
|
0ee45ec06f | ||
|
|
2ea399dc42 | ||
|
|
d51a1b2cdf | ||
|
|
7c8908707b | ||
|
|
2ae40b3a40 | ||
|
|
96af8554fb | ||
|
|
0e1a3cfea0 | ||
|
|
3ce20ad7ba | ||
|
|
5e13135101 | ||
|
|
749eaa56ec | ||
|
|
1fb923ca05 | ||
|
|
5a932730ad | ||
|
|
2f2635429f | ||
|
|
376e4dee58 | ||
|
|
1eda3c348b | ||
|
|
1234088d30 | ||
|
|
a9d78ea908 | ||
|
|
9b0f9ec7c6 | ||
|
|
31e6d6943e | ||
|
|
2ec5048b84 | ||
|
|
afcf531588 | ||
|
|
e0d70bf1ac | ||
|
|
2b8bc05dc2 | ||
|
|
8e644bddb8 | ||
|
|
e6c20beb03 | ||
|
|
d8903420a4 | ||
|
|
44697e87ad | ||
|
|
539703ff94 | ||
|
|
acc9df23ed | ||
|
|
b60adf9ed2 | ||
|
|
6b7729be90 | ||
|
|
8f5da0fd01 | ||
|
|
332e51231b | ||
|
|
412985ca3f | ||
|
|
277c15b2c0 | ||
|
|
32844c1ef2 | ||
|
|
2144d51c21 | ||
|
|
fb03ffbae6 | ||
|
|
33d05574f6 | ||
|
|
251885b802 | ||
|
|
7e7de0fe33 | ||
|
|
1fe72ba2af | ||
|
|
a6d0a062be | ||
|
|
54640c9260 | ||
|
|
1d2da2e10d | ||
|
|
4bf87d6485 | ||
|
|
8a268d8e65 | ||
|
|
c692d9b86d | ||
|
|
32e533f892 | ||
|
|
236b4779f9 | ||
|
|
b88e1420fd | ||
|
|
f16c0a8a64 | ||
|
|
7970659f69 |
@ -30,9 +30,5 @@ MINIO_SECRET_KEY=minioadmin
|
||||
MONGO_ADMIN_USERNAME=mongoadmin
|
||||
MONGO_ADMIN_PASSWORD=mongoadmin
|
||||
|
||||
# Default App (OpenVidu Call)
|
||||
CALL_PRIVATE_ACCESS=false
|
||||
CALL_USER=
|
||||
CALL_SECRET=
|
||||
CALL_ADMIN_USER=admin
|
||||
CALL_ADMIN_SECRET=admin
|
||||
# OpenVidu Meet base path
|
||||
MEET_BASE_PATH=/meet
|
||||
|
||||
437
community/agent-speech-processing.yaml
Normal file
437
community/agent-speech-processing.yaml
Normal file
@ -0,0 +1,437 @@
|
||||
# Docker image of the agent.
|
||||
docker_image: docker.io/openvidu/agent-speech-processing-vosk:3.6.0
|
||||
|
||||
# Whether to run the agent or not.
|
||||
enabled: false
|
||||
|
||||
# Maximum CPU load threshold for the agent to accept new jobs. Value between 0 and 1.
|
||||
load_threshold: 1.0
|
||||
|
||||
# Log level for the agent [DEBUG, INFO, WARN, ERROR, CRITICAL]
|
||||
log_level: INFO
|
||||
|
||||
live_captions:
|
||||
# How this agent will connect to Rooms [manual, automatic]
|
||||
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
|
||||
# - automatic: the agent will automatically connect to new Rooms.
|
||||
processing: manual
|
||||
|
||||
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox, nvidia, elevenlabs, simplismart, vosk, sherpa]
|
||||
# The custom configuration for the selected provider must be set below
|
||||
provider: vosk
|
||||
|
||||
aws:
|
||||
# Credentials for AWS Transcribe. See https://docs.aws.amazon.com/transcribe/latest/dg/what-is.html
|
||||
aws_access_key_id:
|
||||
aws_secret_access_key:
|
||||
aws_default_region:
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/supported-languages.html
|
||||
language:
|
||||
# The name of the custom vocabulary you want to use.
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/custom-vocabulary.html
|
||||
vocabulary_name:
|
||||
# The name of the custom language model you want to use.
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/custom-language-models-using.html
|
||||
language_model_name:
|
||||
# Whether or not to enable partial result stabilization. Partial result stabilization can reduce latency in your output, but may impact accuracy.
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/streaming-partial-results.html#streaming-partial-result-stabilization
|
||||
enable_partial_results_stabilization:
|
||||
# Specify the level of stability to use when you enable partial results stabilization (enable_partial_results_stabilization: true). Valid values: high | medium | low
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/streaming-partial-results.html#streaming-partial-result-stabilization
|
||||
partial_results_stability:
|
||||
# The name of the custom vocabulary filter you want to use to mask or remove words.
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/vocabulary-filtering.html
|
||||
vocab_filter_name:
|
||||
# The method used to filter the vocabulary. Valid values: mask | remove | tag
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/vocabulary-filtering.html
|
||||
vocab_filter_method:
|
||||
|
||||
azure:
|
||||
# Credentials for Azure Speech Service.
|
||||
# One of these combinations must be set:
|
||||
# - speech_host
|
||||
# - speech_key + speech_region
|
||||
# - speech_auth_token + speech_region
|
||||
# See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-speech-to-text?tabs=macos%2Cterminal&pivots=programming-language-python#prerequisites
|
||||
speech_host:
|
||||
speech_key:
|
||||
speech_auth_token:
|
||||
speech_region:
|
||||
# Azure handles multiple languages and can auto-detect the language used. It requires the candidate set to be set. E.g. ["en-US", "es-ES"]
|
||||
# See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt#supported-languages
|
||||
language:
|
||||
# Removes profanity (swearing), or replaces letters of profane words with stars. Valid values: Masked | Removed | Raw
|
||||
# See https://learn.microsoft.com/en-us/azure/ai-services/translator/profanity-filtering
|
||||
profanity:
|
||||
# List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
|
||||
phrase_list:
|
||||
# Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
|
||||
explicit_punctuation:
|
||||
|
||||
azure_openai:
|
||||
# Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
|
||||
# Azure OpenAI API key
|
||||
azure_api_key:
|
||||
# Azure Active Directory token
|
||||
azure_ad_token:
|
||||
# Azure OpenAI endpoint in the following format: https://{your-resource-name}.openai.azure.com. Mandatory value.
|
||||
azure_endpoint:
|
||||
# Name of your model deployment. If given with `azure_endpoint`, sets the base client URL to include `/deployments/{azure_deployment}`.
|
||||
azure_deployment:
|
||||
# OpenAI REST API version used for the request. Mandatory value.
|
||||
api_version:
|
||||
# OpenAI organization ID.
|
||||
organization:
|
||||
# OpenAI project ID.
|
||||
project:
|
||||
# The language code to use for transcription (e.g., "en" for English).
|
||||
language:
|
||||
# Whether to automatically detect the language.
|
||||
detect_language:
|
||||
# ID of the model to use for speech-to-text.
|
||||
model:
|
||||
# Initial prompt to guide the transcription.
|
||||
prompt:
|
||||
|
||||
google:
|
||||
# Credentials for Google Cloud. This is the content of a Google Cloud credential JSON file.
|
||||
# Below is a dummy example for a credential type of "Service Account" (https://cloud.google.com/iam/docs/service-account-creds#key-types)
|
||||
credentials_info: |
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "my-project",
|
||||
"private_key_id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "my-email@my-project.iam.gserviceaccount.com",
|
||||
"client_id": "xxxxxxxxxxxxxxxxxxxxx",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/my-email%40my-project.iam.gserviceaccount.com",
|
||||
"universe_domain": "googleapis.com"
|
||||
}
|
||||
# Which model to use for recognition. If not set, uses the default model for the selected language.
|
||||
# See https://cloud.google.com/speech-to-text/docs/transcription-model
|
||||
model:
|
||||
# The location to use for recognition. Default is "us-central1". Latency will be best if the location is close to your users.
|
||||
# Check supported languages and locations at https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
|
||||
location:
|
||||
# List of language codes to recognize. Default is ["en-US"].
|
||||
# See https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
|
||||
languages:
|
||||
# Whether to detect the language of the audio. Default is true.
|
||||
detect_language:
|
||||
# If 'true', adds punctuation to recognition result hypotheses. This feature is only available in select languages. Setting this
|
||||
# for requests in other languages has no effect at all. The default 'false' value does not add punctuation to result hypotheses.
|
||||
# See https://cloud.google.com/speech-to-text/docs/automatic-punctuation
|
||||
punctuate:
|
||||
# The spoken punctuation behavior for the call. If not set, uses default behavior based on model of choice.
|
||||
# e.g. command_and_search will enable spoken punctuation by default. If 'true', replaces spoken punctuation
|
||||
# with the corresponding symbols in the request. For example, "how are you question mark" becomes "how are you?".
|
||||
# See https://cloud.google.com/speech-to-text/docs/spoken-punctuation for support. If 'false', spoken punctuation is not replaced.
|
||||
spoken_punctuation:
|
||||
# Whether to return interim (non-final) transcription results. Defaults to true.
|
||||
interim_results:
|
||||
|
||||
openai:
|
||||
# API key for OpenAI. See https://platform.openai.com/api-keys
|
||||
api_key:
|
||||
# The OpenAI model to use for transcription. See https://platform.openai.com/docs/guides/speech-to-text
|
||||
model:
|
||||
# The language of the input audio. Supplying the input language in ISO-639-1 format
|
||||
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
|
||||
language:
|
||||
# Whether to automatically detect the language.
|
||||
detect_language:
|
||||
# Optional text prompt to guide the transcription. Only supported for whisper-1.
|
||||
prompt:
|
||||
|
||||
groq:
|
||||
# API key for Groq. See https://console.groq.com/keys
|
||||
api_key:
|
||||
# See https://console.groq.com/docs/speech-to-text
|
||||
model:
|
||||
# The language of the input audio. Supplying the input language in ISO-639-1 format
|
||||
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
|
||||
language:
|
||||
# Whether to automatically detect the language.
|
||||
detect_language:
|
||||
# Prompt to guide the model's style or specify how to spell unfamiliar words. 224 tokens max.
|
||||
prompt:
|
||||
# Base URL for the Groq API. By default "https://api.groq.com/openai/v1"
|
||||
base_url:
|
||||
|
||||
deepgram:
|
||||
# See https://console.deepgram.com/
|
||||
api_key:
|
||||
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.model
|
||||
model:
|
||||
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.language
|
||||
language:
|
||||
# Whether to enable automatic language detection. See https://developers.deepgram.com/docs/language-detection
|
||||
detect_language: false
|
||||
# Whether to return interim (non-final) transcription results. See https://developers.deepgram.com/docs/interim-results
|
||||
interim_results: true
|
||||
# Whether to apply smart formatting to numbers, dates, etc. See https://developers.deepgram.com/docs/smart-format
|
||||
smart_format: false
|
||||
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. See https://developers.deepgram.com/docs/smart-format#using-no-delay
|
||||
no_delay: true
|
||||
# Whether to add punctuations to the transcription. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
|
||||
punctuate: true
|
||||
# Whether to include filler words (um, uh, etc.) in transcription. See https://developers.deepgram.com/docs/filler-words
|
||||
filler_words: true
|
||||
# Whether to filter profanity from the transcription. See https://developers.deepgram.com/docs/profanity-filter
|
||||
profanity_filter: false
|
||||
# Whether to transcribe numbers as numerals. See https://developers.deepgram.com/docs/numerals
|
||||
numerals: false
|
||||
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). keywords does not work with Nova-3 models. Use keyterms instead.
|
||||
# keywords:
|
||||
# - [OpenVidu, 1.5]
|
||||
# - [WebRTC, 1]
|
||||
# List of key terms to improve recognition accuracy. keyterms is supported by Nova-3 models.
|
||||
# Commented below is an example
|
||||
keyterms:
|
||||
# - "OpenVidu"
|
||||
# - "WebRTC"
|
||||
|
||||
assemblyai:
|
||||
# API key for AssemblyAI. See https://www.assemblyai.com/dashboard/api-keys
|
||||
api_key:
|
||||
# The confidence threshold (0.0 to 1.0) to use when determining if the end of a turn has been reached.
|
||||
end_of_turn_confidence_threshold:
|
||||
# The minimum amount of silence in milliseconds required to detect end of turn when confident.
|
||||
min_end_of_turn_silence_when_confident:
|
||||
# The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered.
|
||||
max_turn_silence:
|
||||
# Whether to return formatted final transcripts (proper punctuation, letter casing...). If enabled, formatted final transcripts are emitted shortly following an end-of-turn detection.
|
||||
format_turns: true
|
||||
# List of keyterms to improve recognition accuracy for specific words and phrases.
|
||||
keyterms_prompt:
|
||||
# - "OpenVidu"
|
||||
# - "WebRTC"
|
||||
|
||||
fal:
|
||||
# API key for fal. See https://fal.ai/dashboard/keys
|
||||
api_key:
|
||||
# See https://fal.ai/models/fal-ai/wizper/api#schema
|
||||
language:
|
||||
|
||||
clova:
|
||||
# Secret key issued when registering the app
|
||||
api_key:
|
||||
# API Gateway's unique invoke URL created in CLOVA Speech Domain.
|
||||
# See https://guide.ncloud-docs.com/docs/en/clovaspeech-domain#create-domain
|
||||
invoke_url:
|
||||
# See https://api.ncloud-docs.com/docs/en/ai-application-service-clovaspeech-longsentence
|
||||
language:
|
||||
# Value between 0 and 1 indicating the threshold for the confidence score of the transcribed text. Default is 0.5.
|
||||
# If the confidence score is lower than the threshold, the transcription event is not sent to the client.
|
||||
# For a definition of the confidence score see https://api.ncloud-docs.com/docs/en/ai-application-service-clovaspeech-grpc
|
||||
threshold:
|
||||
|
||||
speechmatics:
|
||||
# API key for Speechmatics. See https://portal.speechmatics.com/manage-access/
|
||||
api_key:
|
||||
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/speech-to-text/languages#transcription-languages
|
||||
language:
|
||||
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/speech-to-text/languages#operating-points
|
||||
operating_point:
|
||||
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/speech-to-text/realtime/output#partial-transcripts
|
||||
enable_partials:
|
||||
# Enable speaker diarization. When enabled, the STT engine will determine and attribute words to unique speakers. The speaker_sensitivity parameter can be used to adjust the sensitivity of diarization
|
||||
enable_diarization:
|
||||
# RFC-5646 language code to make spelling rules more consistent in the transcription output. See https://docs.speechmatics.com/features/word-tagging#output-locale
|
||||
output_locale:
|
||||
# The delay in seconds between the end of a spoken word and returning the final transcript results. See https://docs.speechmatics.com/features/realtime-latency#configuration-example
|
||||
max_delay:
|
||||
# See https://docs.speechmatics.com/features/realtime-latency#configuration-example
|
||||
max_delay_mode:
|
||||
# Configuration for speaker diarization. See https://docs.speechmatics.com/features/diarization
|
||||
speaker_diarization_config:
|
||||
# See https://docs.speechmatics.com/features/diarization#max-speakers
|
||||
max_speakers:
|
||||
# See https://docs.speechmatics.com/features/diarization#speaker-sensitivity
|
||||
speaker_sensitivity:
|
||||
# See https://docs.speechmatics.com/features/diarization#prefer-current-speaker
|
||||
prefer_current_speaker:
|
||||
# Permitted punctuation marks for advanced punctuation. See https://docs.speechmatics.com/features/punctuation-settings
|
||||
# Commented is an example of punctuation settings
|
||||
punctuation_overrides:
|
||||
# permitted_marks: [ ".", "," ]
|
||||
# sensitivity: 0.4
|
||||
# See https://docs.speechmatics.com/features/custom-dictionary
|
||||
# Commented below is an example of a custom dictionary
|
||||
additional_vocab:
|
||||
# - content: financial crisis
|
||||
# - content: gnocchi
|
||||
# sounds_like:
|
||||
# - nyohki
|
||||
# - nokey
|
||||
# - nochi
|
||||
# - content: CEO
|
||||
# sounds_like:
|
||||
# - C.E.O.
|
||||
|
||||
gladia:
|
||||
# API key for Gladia. See https://app.gladia.io/account
|
||||
api_key:
|
||||
# Whether to return interim (non-final) transcription results. Defaults to True
|
||||
interim_results:
|
||||
# List of language codes to use for recognition. Defaults to None (auto-detect). See https://docs.gladia.io/chapters/limits-and-specifications/languages
|
||||
languages:
|
||||
# Whether to allow switching between languages during recognition. Defaults to True
|
||||
code_switching:
|
||||
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-audio-enhancer
|
||||
pre_processing_audio_enhancer:
|
||||
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-speech-threshold
|
||||
pre_processing_speech_threshold:
|
||||
|
||||
sarvam:
|
||||
# API key for Sarvam. See https://dashboard.sarvam.ai/key-management
|
||||
api_key:
|
||||
# BCP-47 language code for supported Indian languages. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.language_code.language_code
|
||||
language:
|
||||
# The Sarvam STT model to use. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.model.model
|
||||
model:
|
||||
|
||||
mistralai:
|
||||
# API key for Mistral AI. See https://console.mistral.ai/api-keys
|
||||
api_key:
|
||||
# Name of the Voxtral STT model to use. Default to voxtral-mini-latest. See https://docs.mistral.ai/capabilities/audio/
|
||||
model:
|
||||
# The language code to use for transcription (e.g., "en" for English)
|
||||
language:
|
||||
|
||||
cartesia:
|
||||
# API key for Cartesia. See https://play.cartesia.ai/keys
|
||||
api_key:
|
||||
# The Cartesia STT model to use
|
||||
model:
|
||||
# The language code to use for transcription (e.g., "en" for English)
|
||||
language:
|
||||
|
||||
soniox:
|
||||
# API key for Soniox. See https://console.soniox.com/
|
||||
api_key:
|
||||
# Set language hints when possible to significantly improve accuracy. See: https://soniox.com/docs/stt/concepts/language-hints
|
||||
language_hints:
|
||||
# - "en"
|
||||
# - "es"
|
||||
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
|
||||
context:
|
||||
|
||||
nvidia:
|
||||
# API key for NVIDIA. See https://build.nvidia.com/explore/speech?integrate_nim=true&hosted_api=true&modal=integrate-nim
|
||||
# Required when using NVIDIA's cloud services. To use a self-hosted NVIDIA Riva server setup "server" and "use_ssl" instead.
|
||||
api_key:
|
||||
# The NVIDIA Riva ASR model to use. Default is "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
|
||||
# See available models: https://build.nvidia.com/search/models?filters=usecase%3Ausecase_speech_to_text
|
||||
model:
|
||||
# The NVIDIA function ID for the model. Default is "1598d209-5e27-4d3c-8079-4751568b1081"
|
||||
function_id:
|
||||
# Whether to add punctuation to transcription results. Default is true.
|
||||
punctuate:
|
||||
# The language code for transcription. Default is "en-US"
|
||||
language_code:
|
||||
# Audio sample rate in Hz. Default is 16000.
|
||||
sample_rate:
|
||||
# The NVIDIA Riva server address. Default is "grpc.nvcf.nvidia.com:443"
|
||||
# For self-hosted NIM, use your server address (e.g., "localhost:50051")
|
||||
server:
|
||||
# Whether to use SSL for the connection. Default is true.
|
||||
# Set to false for locally hosted Riva NIM services without SSL.
|
||||
use_ssl:
|
||||
|
||||
spitch:
|
||||
# API key for Spitch. See https://docs.spitch.app/keys
|
||||
api_key:
|
||||
# Language short code for the generated speech. For supported values, see https://docs.spitch.app/concepts/languages
|
||||
language:
|
||||
|
||||
elevenlabs:
|
||||
# API key for ElevenLabs. See https://elevenlabs.io/app/settings/api-keys
|
||||
api_key:
|
||||
# The ElevenLabs STT model to use. Valid values are ["scribe_v1", "scribe_v2", "scribe_v2_realtime"]. See https://elevenlabs.io/docs/overview/models#models-overview
|
||||
model_id:
|
||||
# An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically
|
||||
language_code:
|
||||
# Custom base URL for the API. Optional.
|
||||
base_url:
|
||||
# Audio sample rate in Hz. Default is 16000.
|
||||
sample_rate:
|
||||
# Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Only supported for Scribe v1 model. Default is True
|
||||
tag_audio_events:
|
||||
# Whether to include word-level timestamps in the transcription. Default is false.
|
||||
include_timestamps:
|
||||
|
||||
simplismart:
|
||||
# API key for SimpliSmart. See https://docs.simplismart.ai/model-suite/settings/api-keys
|
||||
api_key:
|
||||
# Model identifier for the backend STT model. One of ["openai/whisper-large-v2", "openai/whisper-large-v3", "openai/whisper-large-v3-turbo"]
|
||||
# Default is "openai/whisper-large-v3-turbo"
|
||||
model:
|
||||
# Language code for transcription (default: "en"). See https://docs.simplismart.ai/get-started/playground/transcription-models#supported-languages-with-their-codes
|
||||
language:
|
||||
# Operation to perform. "transcribe" converts speech to text in the original language, "translate" translates into English. Default is "transcribe".
|
||||
task:
|
||||
# If true, disables timestamp generation in transcripts. Default is true
|
||||
without_timestamps:
|
||||
# Minimum duration (ms) for a valid speech segment. Default is 0
|
||||
min_speech_duration_ms:
|
||||
# Decoding temperature (affects randomness). Default is 0.0
|
||||
temperature:
|
||||
# Whether to permit multilingual recognition. Default is false
|
||||
multilingual:
|
||||
|
||||
vosk:
|
||||
# Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
|
||||
# Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
|
||||
# - vosk-model-en-us-0.22-lgraph (English US)
|
||||
# - vosk-model-small-cn-0.22 (Chinese)
|
||||
# - vosk-model-small-de-0.15 (German)
|
||||
# - vosk-model-small-en-in-0.4 (English India)
|
||||
# - vosk-model-small-es-0.42 (Spanish)
|
||||
# - vosk-model-small-fr-0.22 (French)
|
||||
# - vosk-model-small-hi-0.22 (Hindi)
|
||||
# - vosk-model-small-it-0.22 (Italian)
|
||||
# - vosk-model-small-ja-0.22 (Japanese)
|
||||
# - vosk-model-small-nl-0.22 (Dutch)
|
||||
# - vosk-model-small-pt-0.3 (Portuguese)
|
||||
# - vosk-model-small-ru-0.22 (Russian)
|
||||
model: vosk-model-en-us-0.22-lgraph
|
||||
# Language code for reference. It has no effect other than observability purposes.
|
||||
# If a pre-installed "model" is declared, this will be set automatically if empty.
|
||||
language:
|
||||
# Audio sample rate in Hz. Default is 16000.
|
||||
sample_rate:
|
||||
# Whether to return interim/partial results during recognition. Default is true.
|
||||
partial_results:
|
||||
# Whether to override Vosk's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
|
||||
use_silero_vad: false
|
||||
|
||||
sherpa:
|
||||
# sherpa streaming model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-sherpa"
|
||||
# Below is the list of pre-installed models in the container (available at https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models):
|
||||
# - sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06 (English)
|
||||
# - sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06 (Spanish)
|
||||
# - sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06 (German)
|
||||
# - sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06 (French)
|
||||
# - sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10 (Multilingual: Arabic, English, Indonesian, Japanese, Russian, Thai, Vietnamese, Chinese)
|
||||
model: sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06
|
||||
# Language code for reference. Auto-detected from model name if not set.
|
||||
language:
|
||||
# Runtime provider for sherpa-onnx. Supported values: "cpu" or "cuda". Default is "cpu".
|
||||
# Learn about GPU acceleration at https://openvidu.io/docs/ai/live-captions/#gpu-acceleration-for-sherpa-provider
|
||||
provider:
|
||||
# Audio sample rate in Hz. Default is 16000.
|
||||
sample_rate:
|
||||
# Whether to return interim/partial results during recognition. Default is true.
|
||||
partial_results:
|
||||
# Number of threads for ONNX Runtime. Default is 2.
|
||||
num_threads:
|
||||
# Recognizer type ("transducer", "paraformer", "zipformer_ctc", "nemo_ctc", "t_one_ctc"). Auto-detected from model name if not set.
|
||||
recognizer_type:
|
||||
# Decoding method ("greedy_search", "modified_beam_search"). Default is "greedy_search".
|
||||
decoding_method:
|
||||
# Whether to override sherpa's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
|
||||
use_silero_vad: false
|
||||
@ -14,14 +14,4 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
|
||||
fi
|
||||
|
||||
# Replace the LAN_PRIVATE_IP in the .env file
|
||||
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env
|
||||
|
||||
# If sillicon mac, enable EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU flag
|
||||
if [ "$(uname -m)" = "arm64" ]; then
|
||||
if ! grep -q "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU" .env; then
|
||||
echo "# Enable this flag to run Docker Desktop on Apple Silicon Macs" >> .env
|
||||
echo "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1" >> .env
|
||||
else
|
||||
sed -i'' -e "s/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=.*/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1/g" .env
|
||||
fi
|
||||
fi
|
||||
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env
|
||||
@ -1,7 +1,6 @@
|
||||
services:
|
||||
caddy-proxy:
|
||||
image: docker.io/openvidu/openvidu-caddy-local:3.2.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/openvidu-caddy-local:3.6.0
|
||||
container_name: caddy-proxy
|
||||
restart: unless-stopped
|
||||
extra_hosts:
|
||||
@ -17,37 +16,40 @@ services:
|
||||
- DASHBOARD_ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
|
||||
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
|
||||
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
|
||||
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
|
||||
env_file:
|
||||
- ./meet.env
|
||||
volumes:
|
||||
- ./custom-layout:/var/www/custom-layout
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
ports:
|
||||
- 5443:5443
|
||||
- 6443:6443
|
||||
- 7443:7443
|
||||
- 7880:7880
|
||||
- 9443:9443
|
||||
- 9080:9080
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
redis:
|
||||
image: docker.io/redis:7.4.4-alpine
|
||||
platform: linux/amd64
|
||||
image: docker.io/redis:8.6.1-alpine
|
||||
container_name: redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 6379:6379
|
||||
volumes:
|
||||
- redis:/data
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
command: >
|
||||
redis-server
|
||||
--bind 0.0.0.0
|
||||
--requirepass ${REDIS_PASSWORD:-}
|
||||
redis-server --bind 0.0.0.0 --requirepass ${REDIS_PASSWORD:-}
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
minio:
|
||||
image: docker.io/bitnami/minio:2025.5.24-debian-12-r1
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/minio:2025.10.15-debian-12-r9
|
||||
container_name: minio
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
@ -57,23 +59,25 @@ services:
|
||||
- MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY:-}
|
||||
- MINIO_DEFAULT_BUCKETS=openvidu-appdata
|
||||
- MINIO_CONSOLE_SUBPATH=/minio-console
|
||||
- MINIO_BROWSER=on
|
||||
- MINIO_BROWSER_REDIRECT_URL=http://localhost:7880/minio-console
|
||||
volumes:
|
||||
- minio-data:/bitnami/minio/data
|
||||
- minio-certs:/certs
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
mongo:
|
||||
image: docker.io/bitnami/mongodb:8.0.9
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/mongodb:8.0.19-r1
|
||||
container_name: mongo
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongo-data:/bitnami/mongodb
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
- MONGODB_ROOT_USER=${MONGO_ADMIN_USERNAME:-}
|
||||
- MONGODB_ROOT_PASSWORD=${MONGO_ADMIN_PASSWORD:-}
|
||||
@ -81,14 +85,12 @@ services:
|
||||
- MONGODB_REPLICA_SET_MODE=primary
|
||||
- MONGODB_REPLICA_SET_NAME=rs0
|
||||
- MONGODB_REPLICA_SET_KEY=devreplicasetkey
|
||||
- EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=${EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU:-0}
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
dashboard:
|
||||
image: docker.io/openvidu/openvidu-dashboard:3.2.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/openvidu-dashboard:3.6.0
|
||||
container_name: dashboard
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
@ -96,19 +98,21 @@ services:
|
||||
- ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
|
||||
- ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
|
||||
- DATABASE_URL=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
|
||||
volumes:
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
openvidu:
|
||||
image: docker.io/openvidu/openvidu-server:3.2.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/openvidu-server:3.6.0
|
||||
restart: unless-stopped
|
||||
container_name: openvidu
|
||||
extra_hosts:
|
||||
- host.docker.internal:host-gateway
|
||||
environment:
|
||||
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
|
||||
- LAN_MODE=${LAN_MODE:-false}
|
||||
ports:
|
||||
- 3478:3478/udp
|
||||
- 7881:7881/tcp
|
||||
@ -118,13 +122,13 @@ services:
|
||||
volumes:
|
||||
- ./livekit.yaml:/etc/livekit.yaml
|
||||
- ./scripts/entrypoint_openvidu.sh:/scripts/entrypoint.sh
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
ingress:
|
||||
image: docker.io/openvidu/ingress:3.2.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/ingress:3.6.0
|
||||
container_name: ingress
|
||||
restart: unless-stopped
|
||||
extra_hosts:
|
||||
@ -137,13 +141,13 @@ services:
|
||||
- INGRESS_CONFIG_FILE=/etc/ingress.yaml
|
||||
volumes:
|
||||
- ./ingress.yaml:/etc/ingress.yaml
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
egress:
|
||||
image: docker.io/livekit/egress:v1.9.1
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/egress:3.6.0
|
||||
restart: unless-stopped
|
||||
container_name: egress
|
||||
extra_hosts:
|
||||
@ -153,62 +157,85 @@ services:
|
||||
volumes:
|
||||
- ./egress.yaml:/etc/egress.yaml
|
||||
- egress-data:/home/egress/tmp
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
default-app:
|
||||
image: docker.io/openvidu/openvidu-call:3.2.0-demo
|
||||
platform: linux/amd64
|
||||
container_name: openvidu-call
|
||||
openvidu-meet:
|
||||
image: docker.io/openvidu/openvidu-meet:3.6.0
|
||||
container_name: openvidu-meet
|
||||
restart: on-failure
|
||||
extra_hosts:
|
||||
- host.docker.internal:host-gateway
|
||||
environment:
|
||||
- USE_HTTPS=${USE_HTTPS:-false}
|
||||
- LAN_MODE=${LAN_MODE:-false}
|
||||
- LAN_DOMAIN=${LAN_DOMAIN:-}
|
||||
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
|
||||
- SERVER_PORT=6080
|
||||
- CALL_NAME_ID=OpenViduCall-LOCAL
|
||||
- LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
|
||||
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
|
||||
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
|
||||
- CALL_PRIVATE_ACCESS=${CALL_PRIVATE_ACCESS:-false}
|
||||
- CALL_USER=${CALL_USER:-}
|
||||
- CALL_SECRET=${CALL_SECRET:-}
|
||||
- CALL_RECORDING=${CALL_RECORDING:-}
|
||||
- CALL_ADMIN_USER=${CALL_ADMIN_USER:-admin}
|
||||
- CALL_ADMIN_SECRET=${CALL_ADMIN_SECRET:-admin}
|
||||
- CALL_LOG_LEVEL=${CALL_LOG_LEVEL:-info}
|
||||
- CALL_S3_BUCKET=${CALL_S3_BUCKET:-openvidu-appdata}
|
||||
- CALL_S3_SERVICE_ENDPOINT=${CALL_S3_SERVICE_ENDPOINT:-http://minio:9000}
|
||||
- CALL_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
|
||||
- CALL_S3_SECRET_KEY=${MINIO_SECRET_KEY}
|
||||
- CALL_AWS_REGION=${CALL_AWS_REGION:-us-east-1}
|
||||
- CALL_S3_WITH_PATH_STYLE_ACCESS=${CALL_S3_WITH_PATH_STYLE_ACCESS:-true}
|
||||
- MEET_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
|
||||
- MEET_S3_SECRET_KEY=${MINIO_SECRET_KEY}
|
||||
- MEET_REDIS_PASSWORD=${REDIS_PASSWORD:-}
|
||||
- MEET_MONGO_URI=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
|
||||
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
|
||||
- MEET_CONFIG_DIR=/config/meet.env
|
||||
volumes:
|
||||
- ./scripts/entrypoint_default_app.sh:/scripts/entrypoint.sh
|
||||
- ./meet.env:/config/meet.env
|
||||
- ./scripts/entrypoint_openvidu_meet.sh:/scripts/entrypoint.sh
|
||||
- ./scripts/utils.sh:/scripts/utils.sh
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
entrypoint: /bin/sh /scripts/entrypoint.sh
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
operator:
|
||||
image: docker.io/openvidu/openvidu-operator:3.6.0
|
||||
container_name: operator
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- agents-config:/agents-config
|
||||
- ./:/deployment
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
- MODE=agent-manager-local
|
||||
- DEPLOYMENT_FILES_DIR=/deployment
|
||||
- AGENTS_CONFIG_DIR=/agents-config
|
||||
- NETWORK_NAME=openvidu-community
|
||||
- AGENTS_CONFIG_VOLUME=openvidu-agents-config
|
||||
- LIVEKIT_URL=ws://openvidu:7880/
|
||||
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
|
||||
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
|
||||
- REDIS_ADDRESS=redis:6379
|
||||
- REDIS_PASSWORD=${REDIS_PASSWORD:-}
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
ready-check:
|
||||
image: docker.io/curlimages/curl:8.13.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/openvidu-operator:3.6.0
|
||||
container_name: ready-check
|
||||
restart: on-failure
|
||||
volumes:
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
- MODE=local-ready-check
|
||||
- OPENVIDU_ENVIRONMENT=local-platform
|
||||
- USE_HTTPS=${USE_HTTPS:-false}
|
||||
- LAN_DOMAIN=${LAN_DOMAIN:-}
|
||||
- LAN_MODE=${LAN_MODE:-false}
|
||||
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
|
||||
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
|
||||
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
|
||||
- DASHBOARD_ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
|
||||
- DASHBOARD_ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
|
||||
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
|
||||
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
|
||||
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
|
||||
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
|
||||
env_file:
|
||||
- ./meet.env
|
||||
depends_on:
|
||||
- openvidu
|
||||
- ingress
|
||||
@ -216,14 +243,9 @@ services:
|
||||
- dashboard
|
||||
- minio
|
||||
- mongo
|
||||
volumes:
|
||||
- ./scripts/ready-check.sh:/scripts/ready-check.sh
|
||||
- ./scripts/utils.sh:/scripts/utils.sh
|
||||
command: /bin/sh /scripts/ready-check.sh
|
||||
|
||||
setup:
|
||||
image: docker.io/busybox:1.37.0
|
||||
platform: linux/amd64
|
||||
container_name: setup
|
||||
restart: "no"
|
||||
volumes:
|
||||
@ -231,6 +253,7 @@ services:
|
||||
- mongo-data:/mongo
|
||||
- egress-data:/egress
|
||||
- ./scripts/setup.sh:/scripts/setup.sh
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
- USE_HTTPS=${USE_HTTPS:-false}
|
||||
- LAN_MODE=${LAN_MODE:-false}
|
||||
|
||||
@ -27,6 +27,10 @@ storage:
|
||||
# account_name: your_account_name
|
||||
# account_key: your_account_key
|
||||
# container_name: openvidu-appdata
|
||||
# gcp:
|
||||
# credentials_json: |
|
||||
# your_credentials_json
|
||||
# bucket: openvidu-appdata
|
||||
|
||||
# CPU cost for each type of Egress operation.
|
||||
cpu_cost:
|
||||
@ -38,3 +42,20 @@ cpu_cost:
|
||||
participant_cpu_cost: 0.01
|
||||
track_composite_cpu_cost: 0.01
|
||||
track_cpu_cost: 0.01
|
||||
|
||||
openvidu:
|
||||
# Allocation strategy for new egress requests
|
||||
# - cpuload: the node with the lowest CPU load will be selected. Distributes the CPU load evenly across all nodes.
|
||||
# - binpack: some node already hosting at least one egress will be selected. Fills up nodes before assigning work to new ones.
|
||||
allocation_strategy: cpuload
|
||||
# Whether to use system-wide CPU monitoring or egress process CPU monitoring. This affects the allocation of new egress requests.
|
||||
# It is preferable to set this value to:
|
||||
# - true: when the egress service is running in a shared server also hosting other CPU-intensive services.
|
||||
# - false: when the egress service is running in a dedicated server.
|
||||
use_global_cpu_monitoring: true
|
||||
# Disables the automatic killing of the most expensive egress when CPU is overloaded.
|
||||
# The default "false" value helps keeping the node stable, but may cause unexpected egress terminations under high load.
|
||||
disable_cpu_overload_killer: false
|
||||
# Minimum available disk space in MB required to accept new egress requests.
|
||||
# Default: 512 MB. Set to a negative value (e.g., -1) to disable disk space checking.
|
||||
min_disk_space_mb: 512
|
||||
@ -31,7 +31,7 @@ webhook:
|
||||
api_key: devkey
|
||||
urls:
|
||||
- http://host.docker.internal:6080/livekit/webhook
|
||||
- http://default-app:6080/livekit/webhook
|
||||
- http://openvidu-meet:6080/livekit/webhook
|
||||
ingress:
|
||||
rtmp_base_url: rtmp://localhost:1935/rtmp
|
||||
whip_base_url: http://localhost:8085/whip
|
||||
|
||||
36
community/meet.env
Normal file
36
community/meet.env
Normal file
@ -0,0 +1,36 @@
|
||||
# OpenVidu Meet configuration
|
||||
# Static environment variables loaded via MEET_CONFIG_DIR
|
||||
|
||||
SERVER_PORT=6080
|
||||
MEET_NAME_ID=openviduMeet-LOCAL
|
||||
MEET_LOG_LEVEL=info
|
||||
MEET_COOKIE_SECURE=false
|
||||
MEET_INITIAL_ADMIN_USER=admin
|
||||
MEET_INITIAL_ADMIN_PASSWORD=admin
|
||||
MEET_INITIAL_API_KEY=meet-api-key
|
||||
MEET_INITIAL_WEBHOOK_ENABLED=true
|
||||
MEET_INITIAL_WEBHOOK_URL=http://host.docker.internal:6080/webhook
|
||||
|
||||
LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
|
||||
|
||||
# S3 configuration
|
||||
MEET_S3_BUCKET=openvidu-appdata
|
||||
MEET_S3_SUBBUCKET=openvidu-meet
|
||||
MEET_S3_SERVICE_ENDPOINT=http://minio:9000
|
||||
MEET_AWS_REGION=us-east-1
|
||||
MEET_S3_WITH_PATH_STYLE_ACCESS=true
|
||||
|
||||
# Storage backend type
|
||||
MEET_BLOB_STORAGE_MODE=s3
|
||||
|
||||
# Redis configuration
|
||||
MEET_REDIS_HOST=redis
|
||||
MEET_REDIS_PORT=6379
|
||||
MEET_REDIS_DB=0
|
||||
|
||||
# MongoDB configuration
|
||||
MEET_MONGO_ENABLED=true
|
||||
MEET_MONGO_DB_NAME=openvidu-meet
|
||||
|
||||
# Enable live captions using OpenVidu Speech to Text agent
|
||||
MEET_CAPTIONS_ENABLED=false
|
||||
@ -1,7 +1,8 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
if [ "$LAN_PRIVATE_IP" != "none" ]; then
|
||||
if [ "$LAN_PRIVATE_IP" != "" ] && [ "$LAN_MODE" = 'true' ]; then
|
||||
echo "Using as NODE_IP: $LAN_PRIVATE_IP"
|
||||
export NODE_IP="$LAN_PRIVATE_IP"
|
||||
fi
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
. /scripts/utils.sh
|
||||
|
||||
URL=$(getDeploymentUrl)
|
||||
URL=$(getDeploymentUrl ws)
|
||||
export LIVEKIT_URL="${URL}"
|
||||
|
||||
/usr/local/bin/entrypoint.sh
|
||||
@ -1,70 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
. /scripts/utils.sh
|
||||
|
||||
trap 'handle_sigint' SIGINT
|
||||
|
||||
handle_sigint() {
|
||||
echo "SIGINT signal received, exiting..."
|
||||
exit 1
|
||||
}
|
||||
|
||||
wait_for_service() {
|
||||
SERVICE_NAME=$1
|
||||
SERVICE_URL=$2
|
||||
shift 2
|
||||
EXTRA=$@
|
||||
if [ -n "$EXTRA" ]; then
|
||||
until curl $EXTRA $SERVICE_URL > /dev/null; do
|
||||
echo "Waiting for $SERVICE_NAME to start...";
|
||||
sleep 1;
|
||||
done;
|
||||
else
|
||||
until curl --silent --head --fail $SERVICE_URL > /dev/null; do
|
||||
echo "Waiting for $SERVICE_NAME to start...";
|
||||
sleep 1;
|
||||
done;
|
||||
fi;
|
||||
}
|
||||
|
||||
wait_for_service 'OpenVidu' 'http://openvidu:7880'
|
||||
wait_for_service 'Ingress' 'http://ingress:9091'
|
||||
wait_for_service 'Egress' 'http://egress:9091'
|
||||
wait_for_service 'Dashboard' 'http://dashboard:5000'
|
||||
wait_for_service 'Minio' 'http://minio:9000/minio/health/live'
|
||||
wait_for_service 'Minio Console' 'http://minio:9001/minio-console'
|
||||
wait_for_service 'Mongo' 'http://mongo:27017' --connect-timeout 10 --silent
|
||||
|
||||
LAN_HTTP_URL=$(getDeploymentUrl http)
|
||||
LAN_WS_URL=$(getDeploymentUrl ws)
|
||||
|
||||
for i in $(seq 1 10); do
|
||||
echo 'Starting OpenVidu... Please be patient...'
|
||||
sleep 1
|
||||
done;
|
||||
echo ''
|
||||
echo ''
|
||||
echo '========================================='
|
||||
echo '🎉 OpenVidu is ready! 🎉'
|
||||
echo '========================================='
|
||||
echo ''
|
||||
echo 'OpenVidu Server & LiveKit Server URLs:'
|
||||
echo ''
|
||||
echo ' - From this machine:'
|
||||
echo ''
|
||||
echo ' - http://localhost:7880'
|
||||
echo ' - ws://localhost:7880'
|
||||
echo ''
|
||||
echo ' - From other devices in your LAN:'
|
||||
echo ''
|
||||
echo " - $LAN_HTTP_URL"
|
||||
echo " - $LAN_WS_URL"
|
||||
echo ''
|
||||
echo '========================================='
|
||||
echo ''
|
||||
echo 'OpenVidu Developer UI (services and passwords):'
|
||||
echo ''
|
||||
echo ' - http://localhost:7880'
|
||||
echo " - $LAN_HTTP_URL"
|
||||
echo ''
|
||||
echo '========================================='
|
||||
@ -1,6 +1,11 @@
|
||||
#!/bin/sh
|
||||
|
||||
if [ -z "$LAN_PRIVATE_IP" ]; then
|
||||
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
|
||||
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$LAN_MODE" = 'true' ] && [ -z "$LAN_PRIVATE_IP" ]; then
|
||||
echo '------------------------'
|
||||
echo ''
|
||||
echo 'LAN_PRIVATE_IP is required in the .env file.'
|
||||
@ -14,7 +19,9 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
|
||||
echo 'If it can'\''t be found, you can manually set it in the .env file'
|
||||
echo '------------------------'
|
||||
exit 1
|
||||
else
|
||||
fi
|
||||
|
||||
if [ "$LAN_MODE" = 'true' ] && [ -n "$LAN_PRIVATE_IP" ]; then
|
||||
# Check if the LAN_PRIVATE_IP is reachable
|
||||
if ! ping -c 1 -W 1 "$LAN_PRIVATE_IP" > /dev/null; then
|
||||
echo "ERROR: LAN_PRIVATE_IP $LAN_PRIVATE_IP is not reachable"
|
||||
@ -26,15 +33,16 @@ else
|
||||
echo " - MacOS: ./configure_lan_private_ip_macos.sh"
|
||||
echo " - Windows: .\configure_lan_private_ip_windows.bat"
|
||||
echo ""
|
||||
echo " If you don't want to access OpenVidu through your LAN,"
|
||||
echo " you can run without LAN_MODE enabled, simply set"
|
||||
echo " the following variables in the .env file:"
|
||||
echo " USE_HTTPS=false"
|
||||
echo " LAN_MODE=false"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
|
||||
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Prepare volumes
|
||||
mkdir -p /minio/data
|
||||
mkdir -p /mongo/data
|
||||
|
||||
8
pro/.env
8
pro/.env
@ -30,9 +30,5 @@ MINIO_SECRET_KEY=minioadmin
|
||||
MONGO_ADMIN_USERNAME=mongoadmin
|
||||
MONGO_ADMIN_PASSWORD=mongoadmin
|
||||
|
||||
# Default App (OpenVidu Call)
|
||||
CALL_PRIVATE_ACCESS=false
|
||||
CALL_USER=
|
||||
CALL_SECRET=
|
||||
CALL_ADMIN_USER=admin
|
||||
CALL_ADMIN_SECRET=admin
|
||||
# OpenVidu Meet base path
|
||||
MEET_BASE_PATH=/meet
|
||||
|
||||
437
pro/agent-speech-processing.yaml
Normal file
437
pro/agent-speech-processing.yaml
Normal file
@ -0,0 +1,437 @@
|
||||
# Docker image of the agent.
|
||||
docker_image: docker.io/openvidu/agent-speech-processing-vosk:3.6.0
|
||||
|
||||
# Whether to run the agent or not.
|
||||
enabled: false
|
||||
|
||||
# Maximum CPU load threshold for the agent to accept new jobs. Value between 0 and 1.
|
||||
load_threshold: 1.0
|
||||
|
||||
# Log level for the agent [DEBUG, INFO, WARN, ERROR, CRITICAL]
|
||||
log_level: INFO
|
||||
|
||||
live_captions:
|
||||
# How this agent will connect to Rooms [manual, automatic]
|
||||
# - manual: the agent will connect to new Rooms only when your application dictates it by using the Agent Dispatch API.
|
||||
# - automatic: the agent will automatically connect to new Rooms.
|
||||
processing: manual
|
||||
|
||||
# Which speech-to-text AI provider to use [aws, azure, google, openai, azure_openai, groq, deepgram, assemblyai, fal, clova, speechmatics, gladia, sarvam, mistralai, cartesia, soniox, nvidia, elevenlabs, simplismart, vosk, sherpa]
|
||||
# The custom configuration for the selected provider must be set below
|
||||
provider: vosk
|
||||
|
||||
aws:
|
||||
# Credentials for AWS Transcribe. See https://docs.aws.amazon.com/transcribe/latest/dg/what-is.html
|
||||
aws_access_key_id:
|
||||
aws_secret_access_key:
|
||||
aws_default_region:
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/supported-languages.html
|
||||
language:
|
||||
# The name of the custom vocabulary you want to use.
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/custom-vocabulary.html
|
||||
vocabulary_name:
|
||||
# The name of the custom language model you want to use.
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/custom-language-models-using.html
|
||||
language_model_name:
|
||||
# Whether or not to enable partial result stabilization. Partial result stabilization can reduce latency in your output, but may impact accuracy.
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/streaming-partial-results.html#streaming-partial-result-stabilization
|
||||
enable_partial_results_stabilization:
|
||||
# Specify the level of stability to use when you enable partial results stabilization (enable_partial_results_stabilization: true). Valid values: high | medium | low
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/streaming-partial-results.html#streaming-partial-result-stabilization
|
||||
partial_results_stability:
|
||||
# The name of the custom vocabulary filter you want to use to mask or remove words.
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/vocabulary-filtering.html
|
||||
vocab_filter_name:
|
||||
# The method used to filter the vocabulary. Valid values: mask | remove | tag
|
||||
# See https://docs.aws.amazon.com/transcribe/latest/dg/vocabulary-filtering.html
|
||||
vocab_filter_method:
|
||||
|
||||
azure:
|
||||
# Credentials for Azure Speech Service.
|
||||
# One of these combinations must be set:
|
||||
# - speech_host
|
||||
# - speech_key + speech_region
|
||||
# - speech_auth_token + speech_region
|
||||
# See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-speech-to-text?tabs=macos%2Cterminal&pivots=programming-language-python#prerequisites
|
||||
speech_host:
|
||||
speech_key:
|
||||
speech_auth_token:
|
||||
speech_region:
|
||||
# Azure handles multiple languages and can auto-detect the language used. It requires the candidate set to be set. E.g. ["en-US", "es-ES"]
|
||||
# See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt#supported-languages
|
||||
language:
|
||||
# Removes profanity (swearing), or replaces letters of profane words with stars. Valid values: Masked | Removed | Raw
|
||||
# See https://learn.microsoft.com/en-us/azure/ai-services/translator/profanity-filtering
|
||||
profanity:
|
||||
# List of words or phrases to boost recognition accuracy. Azure will give higher priority to these phrases during recognition.
|
||||
phrase_list:
|
||||
# Controls punctuation behavior. If True, enables explicit punctuation mode where punctuation marks are added explicitly. If False (default), uses Azure's default punctuation behavior.
|
||||
explicit_punctuation:
|
||||
|
||||
azure_openai:
|
||||
# Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
|
||||
# Azure OpenAI API key
|
||||
azure_api_key:
|
||||
# Azure Active Directory token
|
||||
azure_ad_token:
|
||||
# Azure OpenAI endpoint in the following format: https://{your-resource-name}.openai.azure.com. Mandatory value.
|
||||
azure_endpoint:
|
||||
# Name of your model deployment. If given with `azure_endpoint`, sets the base client URL to include `/deployments/{azure_deployment}`.
|
||||
azure_deployment:
|
||||
# OpenAI REST API version used for the request. Mandatory value.
|
||||
api_version:
|
||||
# OpenAI organization ID.
|
||||
organization:
|
||||
# OpenAI project ID.
|
||||
project:
|
||||
# The language code to use for transcription (e.g., "en" for English).
|
||||
language:
|
||||
# Whether to automatically detect the language.
|
||||
detect_language:
|
||||
# ID of the model to use for speech-to-text.
|
||||
model:
|
||||
# Initial prompt to guide the transcription.
|
||||
prompt:
|
||||
|
||||
google:
|
||||
# Credentials for Google Cloud. This is the content of a Google Cloud credential JSON file.
|
||||
# Below is a dummy example for a credential type of "Service Account" (https://cloud.google.com/iam/docs/service-account-creds#key-types)
|
||||
credentials_info: |
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "my-project",
|
||||
"private_key_id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "my-email@my-project.iam.gserviceaccount.com",
|
||||
"client_id": "xxxxxxxxxxxxxxxxxxxxx",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/my-email%40my-project.iam.gserviceaccount.com",
|
||||
"universe_domain": "googleapis.com"
|
||||
}
|
||||
# Which model to use for recognition. If not set, uses the default model for the selected language.
|
||||
# See https://cloud.google.com/speech-to-text/docs/transcription-model
|
||||
model:
|
||||
# The location to use for recognition. Default is "us-central1". Latency will be best if the location is close to your users.
|
||||
# Check supported languages and locations at https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
|
||||
location:
|
||||
# List of language codes to recognize. Default is ["en-US"].
|
||||
# See https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
|
||||
languages:
|
||||
# Whether to detect the language of the audio. Default is true.
|
||||
detect_language:
|
||||
# If 'true', adds punctuation to recognition result hypotheses. This feature is only available in select languages. Setting this
|
||||
# for requests in other languages has no effect at all. The default 'false' value does not add punctuation to result hypotheses.
|
||||
# See https://cloud.google.com/speech-to-text/docs/automatic-punctuation
|
||||
punctuate:
|
||||
# The spoken punctuation behavior for the call. If not set, uses default behavior based on model of choice.
|
||||
# e.g. command_and_search will enable spoken punctuation by default. If 'true', replaces spoken punctuation
|
||||
# with the corresponding symbols in the request. For example, "how are you question mark" becomes "how are you?".
|
||||
# See https://cloud.google.com/speech-to-text/docs/spoken-punctuation for support. If 'false', spoken punctuation is not replaced.
|
||||
spoken_punctuation:
|
||||
# Whether to return interim (non-final) transcription results. Defaults to true.
|
||||
interim_results:
|
||||
|
||||
openai:
|
||||
# API key for OpenAI. See https://platform.openai.com/api-keys
|
||||
api_key:
|
||||
# The OpenAI model to use for transcription. See https://platform.openai.com/docs/guides/speech-to-text
|
||||
model:
|
||||
# The language of the input audio. Supplying the input language in ISO-639-1 format
|
||||
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
|
||||
language:
|
||||
# Whether to automatically detect the language.
|
||||
detect_language:
|
||||
# Optional text prompt to guide the transcription. Only supported for whisper-1.
|
||||
prompt:
|
||||
|
||||
groq:
|
||||
# API key for Groq. See https://console.groq.com/keys
|
||||
api_key:
|
||||
# See https://console.groq.com/docs/speech-to-text
|
||||
model:
|
||||
# The language of the input audio. Supplying the input language in ISO-639-1 format
|
||||
# (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
|
||||
language:
|
||||
# Whether to automatically detect the language.
|
||||
detect_language:
|
||||
# Prompt to guide the model's style or specify how to spell unfamiliar words. 224 tokens max.
|
||||
prompt:
|
||||
# Base URL for the Groq API. By default "https://api.groq.com/openai/v1"
|
||||
base_url:
|
||||
|
||||
deepgram:
|
||||
# See https://console.deepgram.com/
|
||||
api_key:
|
||||
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.model
|
||||
model:
|
||||
# See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.language
|
||||
language:
|
||||
# Whether to enable automatic language detection. See https://developers.deepgram.com/docs/language-detection
|
||||
detect_language: false
|
||||
# Whether to return interim (non-final) transcription results. See https://developers.deepgram.com/docs/interim-results
|
||||
interim_results: true
|
||||
# Whether to apply smart formatting to numbers, dates, etc. See https://developers.deepgram.com/docs/smart-format
|
||||
smart_format: false
|
||||
# When smart_format is used, ensures it does not wait for sequence to be complete before returning results. See https://developers.deepgram.com/docs/smart-format#using-no-delay
|
||||
no_delay: true
|
||||
# Whether to add punctuations to the transcription. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
|
||||
punctuate: true
|
||||
# Whether to include filler words (um, uh, etc.) in transcription. See https://developers.deepgram.com/docs/filler-words
|
||||
filler_words: true
|
||||
# Whether to filter profanity from the transcription. See https://developers.deepgram.com/docs/profanity-filter
|
||||
profanity_filter: false
|
||||
# Whether to transcribe numbers as numerals. See https://developers.deepgram.com/docs/numerals
|
||||
numerals: false
|
||||
# List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). keywords does not work with Nova-3 models. Use keyterms instead.
|
||||
# keywords:
|
||||
# - [OpenVidu, 1.5]
|
||||
# - [WebRTC, 1]
|
||||
# List of key terms to improve recognition accuracy. keyterms is supported by Nova-3 models.
|
||||
# Commented below is an example
|
||||
keyterms:
|
||||
# - "OpenVidu"
|
||||
# - "WebRTC"
|
||||
|
||||
assemblyai:
|
||||
# API key for AssemblyAI. See https://www.assemblyai.com/dashboard/api-keys
|
||||
api_key:
|
||||
# The confidence threshold (0.0 to 1.0) to use when determining if the end of a turn has been reached.
|
||||
end_of_turn_confidence_threshold:
|
||||
# The minimum amount of silence in milliseconds required to detect end of turn when confident.
|
||||
min_end_of_turn_silence_when_confident:
|
||||
# The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered.
|
||||
max_turn_silence:
|
||||
# Whether to return formatted final transcripts (proper punctuation, letter casing...). If enabled, formatted final transcripts are emitted shortly following an end-of-turn detection.
|
||||
format_turns: true
|
||||
# List of keyterms to improve recognition accuracy for specific words and phrases.
|
||||
keyterms_prompt:
|
||||
# - "OpenVidu"
|
||||
# - "WebRTC"
|
||||
|
||||
fal:
|
||||
# API key for fal. See https://fal.ai/dashboard/keys
|
||||
api_key:
|
||||
# See https://fal.ai/models/fal-ai/wizper/api#schema
|
||||
language:
|
||||
|
||||
clova:
|
||||
# Secret key issued when registering the app
|
||||
api_key:
|
||||
# API Gateway's unique invoke URL created in CLOVA Speech Domain.
|
||||
# See https://guide.ncloud-docs.com/docs/en/clovaspeech-domain#create-domain
|
||||
invoke_url:
|
||||
# See https://api.ncloud-docs.com/docs/en/ai-application-service-clovaspeech-longsentence
|
||||
language:
|
||||
# Value between 0 and 1 indicating the threshold for the confidence score of the transcribed text. Default is 0.5.
|
||||
# If the confidence score is lower than the threshold, the transcription event is not sent to the client.
|
||||
# For a definition of the confidence score see https://api.ncloud-docs.com/docs/en/ai-application-service-clovaspeech-grpc
|
||||
threshold:
|
||||
|
||||
speechmatics:
|
||||
# API key for Speechmatics. See https://portal.speechmatics.com/manage-access/
|
||||
api_key:
|
||||
# ISO 639-1 language code. All languages are global and can understand different dialects/accents. To see the list of all supported languages, see https://docs.speechmatics.com/speech-to-text/languages#transcription-languages
|
||||
language:
|
||||
# Operating point to use for the transcription per required accuracy & complexity. To learn more, see https://docs.speechmatics.com/speech-to-text/languages#operating-points
|
||||
operating_point:
|
||||
# Partial transcripts allow you to receive preliminary transcriptions and update as more context is available until the higher-accuracy final transcript is returned. Partials are returned faster but without any post-processing such as formatting. See https://docs.speechmatics.com/speech-to-text/realtime/output#partial-transcripts
|
||||
enable_partials:
|
||||
# Enable speaker diarization. When enabled, the STT engine will determine and attribute words to unique speakers. The speaker_sensitivity parameter can be used to adjust the sensitivity of diarization
|
||||
enable_diarization:
|
||||
# RFC-5646 language code to make spelling rules more consistent in the transcription output. See https://docs.speechmatics.com/features/word-tagging#output-locale
|
||||
output_locale:
|
||||
# The delay in seconds between the end of a spoken word and returning the final transcript results. See https://docs.speechmatics.com/features/realtime-latency#configuration-example
|
||||
max_delay:
|
||||
# See https://docs.speechmatics.com/features/realtime-latency#configuration-example
|
||||
max_delay_mode:
|
||||
# Configuration for speaker diarization. See https://docs.speechmatics.com/features/diarization
|
||||
speaker_diarization_config:
|
||||
# See https://docs.speechmatics.com/features/diarization#max-speakers
|
||||
max_speakers:
|
||||
# See https://docs.speechmatics.com/features/diarization#speaker-sensitivity
|
||||
speaker_sensitivity:
|
||||
# See https://docs.speechmatics.com/features/diarization#prefer-current-speaker
|
||||
prefer_current_speaker:
|
||||
# Permitted punctuation marks for advanced punctuation. See https://docs.speechmatics.com/features/punctuation-settings
|
||||
# Commented is an example of punctuation settings
|
||||
punctuation_overrides:
|
||||
# permitted_marks: [ ".", "," ]
|
||||
# sensitivity: 0.4
|
||||
# See https://docs.speechmatics.com/features/custom-dictionary
|
||||
# Commented below is an example of a custom dictionary
|
||||
additional_vocab:
|
||||
# - content: financial crisis
|
||||
# - content: gnocchi
|
||||
# sounds_like:
|
||||
# - nyohki
|
||||
# - nokey
|
||||
# - nochi
|
||||
# - content: CEO
|
||||
# sounds_like:
|
||||
# - C.E.O.
|
||||
|
||||
gladia:
|
||||
# API key for Gladia. See https://app.gladia.io/account
|
||||
api_key:
|
||||
# Whether to return interim (non-final) transcription results. Defaults to True
|
||||
interim_results:
|
||||
# List of language codes to use for recognition. Defaults to None (auto-detect). See https://docs.gladia.io/chapters/limits-and-specifications/languages
|
||||
languages:
|
||||
# Whether to allow switching between languages during recognition. Defaults to True
|
||||
code_switching:
|
||||
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-audio-enhancer
|
||||
pre_processing_audio_enhancer:
|
||||
# https://docs.gladia.io/api-reference/v2/live/init#body-pre-processing-speech-threshold
|
||||
pre_processing_speech_threshold:
|
||||
|
||||
sarvam:
|
||||
# API key for Sarvam. See https://dashboard.sarvam.ai/key-management
|
||||
api_key:
|
||||
# BCP-47 language code for supported Indian languages. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.language_code.language_code
|
||||
language:
|
||||
# The Sarvam STT model to use. See https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe#request.body.model.model
|
||||
model:
|
||||
|
||||
mistralai:
|
||||
# API key for Mistral AI. See https://console.mistral.ai/api-keys
|
||||
api_key:
|
||||
# Name of the Voxtral STT model to use. Default to voxtral-mini-latest. See https://docs.mistral.ai/capabilities/audio/
|
||||
model:
|
||||
# The language code to use for transcription (e.g., "en" for English)
|
||||
language:
|
||||
|
||||
cartesia:
|
||||
# API key for Cartesia. See https://play.cartesia.ai/keys
|
||||
api_key:
|
||||
# The Cartesia STT model to use
|
||||
model:
|
||||
# The language code to use for transcription (e.g., "en" for English)
|
||||
language:
|
||||
|
||||
soniox:
|
||||
# API key for Soniox. See https://console.soniox.com/
|
||||
api_key:
|
||||
# Set language hints when possible to significantly improve accuracy. See: https://soniox.com/docs/stt/concepts/language-hints
|
||||
language_hints:
|
||||
# - "en"
|
||||
# - "es"
|
||||
# Set context to improve recognition of difficult and rare words. Context is a string and can include words, phrases, sentences, or summaries (limit: 10K chars). See https://soniox.com/docs/stt/concepts/context
|
||||
context:
|
||||
|
||||
nvidia:
|
||||
# API key for NVIDIA. See https://build.nvidia.com/explore/speech?integrate_nim=true&hosted_api=true&modal=integrate-nim
|
||||
# Required when using NVIDIA's cloud services. To use a self-hosted NVIDIA Riva server setup "server" and "use_ssl" instead.
|
||||
api_key:
|
||||
# The NVIDIA Riva ASR model to use. Default is "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
|
||||
# See available models: https://build.nvidia.com/search/models?filters=usecase%3Ausecase_speech_to_text
|
||||
model:
|
||||
# The NVIDIA function ID for the model. Default is "1598d209-5e27-4d3c-8079-4751568b1081"
|
||||
function_id:
|
||||
# Whether to add punctuation to transcription results. Default is true.
|
||||
punctuate:
|
||||
# The language code for transcription. Default is "en-US"
|
||||
language_code:
|
||||
# Audio sample rate in Hz. Default is 16000.
|
||||
sample_rate:
|
||||
# The NVIDIA Riva server address. Default is "grpc.nvcf.nvidia.com:443"
|
||||
# For self-hosted NIM, use your server address (e.g., "localhost:50051")
|
||||
server:
|
||||
# Whether to use SSL for the connection. Default is true.
|
||||
# Set to false for locally hosted Riva NIM services without SSL.
|
||||
use_ssl:
|
||||
|
||||
spitch:
|
||||
# API key for Spitch. See https://docs.spitch.app/keys
|
||||
api_key:
|
||||
# Language short code for the generated speech. For supported values, see https://docs.spitch.app/concepts/languages
|
||||
language:
|
||||
|
||||
elevenlabs:
|
||||
# API key for ElevenLabs. See https://elevenlabs.io/app/settings/api-keys
|
||||
api_key:
|
||||
# The ElevenLabs STT model to use. Valid values are ["scribe_v1", "scribe_v2", "scribe_v2_realtime"]. See https://elevenlabs.io/docs/overview/models#models-overview
|
||||
model_id:
|
||||
# An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically
|
||||
language_code:
|
||||
# Custom base URL for the API. Optional.
|
||||
base_url:
|
||||
# Audio sample rate in Hz. Default is 16000.
|
||||
sample_rate:
|
||||
# Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Only supported for Scribe v1 model. Default is True
|
||||
tag_audio_events:
|
||||
# Whether to include word-level timestamps in the transcription. Default is false.
|
||||
include_timestamps:
|
||||
|
||||
simplismart:
|
||||
# API key for SimpliSmart. See https://docs.simplismart.ai/model-suite/settings/api-keys
|
||||
api_key:
|
||||
# Model identifier for the backend STT model. One of ["openai/whisper-large-v2", "openai/whisper-large-v3", "openai/whisper-large-v3-turbo"]
|
||||
# Default is "openai/whisper-large-v3-turbo"
|
||||
model:
|
||||
# Language code for transcription (default: "en"). See https://docs.simplismart.ai/get-started/playground/transcription-models#supported-languages-with-their-codes
|
||||
language:
|
||||
# Operation to perform. "transcribe" converts speech to text in the original language, "translate" translates into English. Default is "transcribe".
|
||||
task:
|
||||
# If true, disables timestamp generation in transcripts. Default is true
|
||||
without_timestamps:
|
||||
# Minimum duration (ms) for a valid speech segment. Default is 0
|
||||
min_speech_duration_ms:
|
||||
# Decoding temperature (affects randomness). Default is 0.0
|
||||
temperature:
|
||||
# Whether to permit multilingual recognition. Default is false
|
||||
multilingual:
|
||||
|
||||
vosk:
|
||||
# Vosk language model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-vosk"
|
||||
# Below is the list of pre-installed models in the container (available at https://alphacephei.com/vosk/models):
|
||||
# - vosk-model-en-us-0.22-lgraph (English US)
|
||||
# - vosk-model-small-cn-0.22 (Chinese)
|
||||
# - vosk-model-small-de-0.15 (German)
|
||||
# - vosk-model-small-en-in-0.4 (English India)
|
||||
# - vosk-model-small-es-0.42 (Spanish)
|
||||
# - vosk-model-small-fr-0.22 (French)
|
||||
# - vosk-model-small-hi-0.22 (Hindi)
|
||||
# - vosk-model-small-it-0.22 (Italian)
|
||||
# - vosk-model-small-ja-0.22 (Japanese)
|
||||
# - vosk-model-small-nl-0.22 (Dutch)
|
||||
# - vosk-model-small-pt-0.3 (Portuguese)
|
||||
# - vosk-model-small-ru-0.22 (Russian)
|
||||
model: vosk-model-en-us-0.22-lgraph
|
||||
# Language code for reference. It has no effect other than observability purposes.
|
||||
# If a pre-installed "model" is declared, this will be set automatically if empty.
|
||||
language:
|
||||
# Audio sample rate in Hz. Default is 16000.
|
||||
sample_rate:
|
||||
# Whether to return interim/partial results during recognition. Default is true.
|
||||
partial_results:
|
||||
# Whether to override Vosk's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
|
||||
use_silero_vad: false
|
||||
|
||||
sherpa:
|
||||
# sherpa streaming model. This provider requires docker_image "docker.io/openvidu/agent-speech-processing-sherpa"
|
||||
# Below is the list of pre-installed models in the container (available at https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models):
|
||||
# - sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06 (English)
|
||||
# - sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06 (Spanish)
|
||||
# - sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06 (German)
|
||||
# - sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06 (French)
|
||||
# - sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10 (Multilingual: Arabic, English, Indonesian, Japanese, Russian, Thai, Vietnamese, Chinese)
|
||||
model: sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06
|
||||
# Language code for reference. Auto-detected from model name if not set.
|
||||
language:
|
||||
# Runtime provider for sherpa-onnx. Supported values: "cpu" or "cuda". Default is "cpu".
|
||||
# Learn about GPU acceleration at https://openvidu.io/docs/ai/live-captions/#gpu-acceleration-for-sherpa-provider
|
||||
provider:
|
||||
# Audio sample rate in Hz. Default is 16000.
|
||||
sample_rate:
|
||||
# Whether to return interim/partial results during recognition. Default is true.
|
||||
partial_results:
|
||||
# Number of threads for ONNX Runtime. Default is 2.
|
||||
num_threads:
|
||||
# Recognizer type ("transducer", "paraformer", "zipformer_ctc", "nemo_ctc", "t_one_ctc"). Auto-detected from model name if not set.
|
||||
recognizer_type:
|
||||
# Decoding method ("greedy_search", "modified_beam_search"). Default is "greedy_search".
|
||||
decoding_method:
|
||||
# Whether to override sherpa's built-in Voice Activity Detection (VAD) with Silero's VAD. Default is false.
|
||||
use_silero_vad: false
|
||||
@ -14,14 +14,4 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
|
||||
fi
|
||||
|
||||
# Replace the LAN_PRIVATE_IP in the .env file
|
||||
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env
|
||||
|
||||
# If sillicon mac, enable EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU flag
|
||||
if [ "$(uname -m)" = "arm64" ]; then
|
||||
if ! grep -q "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU" .env; then
|
||||
echo "# Enable this flag to run Docker Desktop on Apple Silicon Macs" >> .env
|
||||
echo "EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1" >> .env
|
||||
else
|
||||
sed -i'' -e "s/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=.*/EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=1/g" .env
|
||||
fi
|
||||
fi
|
||||
sed -i'' -e "s/LAN_PRIVATE_IP=.*/LAN_PRIVATE_IP=$LAN_PRIVATE_IP/g" .env
|
||||
@ -1,7 +1,6 @@
|
||||
services:
|
||||
caddy-proxy:
|
||||
image: docker.io/openvidu/openvidu-caddy-local:3.2.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/openvidu-caddy-local:3.6.0
|
||||
container_name: caddy-proxy
|
||||
restart: unless-stopped
|
||||
extra_hosts:
|
||||
@ -18,37 +17,40 @@ services:
|
||||
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
|
||||
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
|
||||
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
|
||||
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
|
||||
env_file:
|
||||
- ./meet.env
|
||||
volumes:
|
||||
- ./custom-layout:/var/www/custom-layout
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
ports:
|
||||
- 5443:5443
|
||||
- 6443:6443
|
||||
- 7443:7443
|
||||
- 7880:7880
|
||||
- 9443:9443
|
||||
- 9080:9080
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
redis:
|
||||
image: docker.io/redis:7.4.4-alpine
|
||||
platform: linux/amd64
|
||||
image: docker.io/redis:8.6.1-alpine
|
||||
container_name: redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 6379:6379
|
||||
volumes:
|
||||
- redis:/data
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
command: >
|
||||
redis-server
|
||||
--bind 0.0.0.0
|
||||
--requirepass ${REDIS_PASSWORD:-}
|
||||
redis-server --bind 0.0.0.0 --requirepass ${REDIS_PASSWORD:-}
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
minio:
|
||||
image: docker.io/bitnami/minio:2025.5.24-debian-12-r1
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/minio:2025.10.15-debian-12-r9
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 9000:9000
|
||||
@ -57,23 +59,25 @@ services:
|
||||
- MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY:-}
|
||||
- MINIO_DEFAULT_BUCKETS=openvidu-appdata
|
||||
- MINIO_CONSOLE_SUBPATH=/minio-console
|
||||
- MINIO_BROWSER=on
|
||||
- MINIO_BROWSER_REDIRECT_URL=http://localhost:7880/minio-console
|
||||
volumes:
|
||||
- minio-data:/bitnami/minio/data
|
||||
- minio-certs:/certs
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
mongo:
|
||||
image: docker.io/bitnami/mongodb:8.0.9
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/mongodb:8.0.19-r1
|
||||
container_name: mongo
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongo-data:/bitnami/mongodb
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
- MONGODB_ROOT_USER=${MONGO_ADMIN_USERNAME:-}
|
||||
- MONGODB_ROOT_PASSWORD=${MONGO_ADMIN_PASSWORD:-}
|
||||
@ -81,14 +85,12 @@ services:
|
||||
- MONGODB_REPLICA_SET_MODE=primary
|
||||
- MONGODB_REPLICA_SET_NAME=rs0
|
||||
- MONGODB_REPLICA_SET_KEY=devreplicasetkey
|
||||
- EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU=${EXPERIMENTAL_DOCKER_DESKTOP_FORCE_QEMU:-0}
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
dashboard:
|
||||
image: docker.io/openvidu/openvidu-dashboard:3.2.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/openvidu-dashboard:3.6.0
|
||||
container_name: dashboard
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
@ -96,18 +98,20 @@ services:
|
||||
- ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
|
||||
- ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
|
||||
- DATABASE_URL=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
|
||||
volumes:
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
openvidu:
|
||||
image: docker.io/openvidu/openvidu-server-pro:3.2.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/openvidu-server-pro:3.6.0
|
||||
restart: unless-stopped
|
||||
container_name: openvidu
|
||||
extra_hosts:
|
||||
- host.docker.internal:host-gateway
|
||||
environment:
|
||||
- LAN_MODE=${LAN_MODE:-false}
|
||||
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
|
||||
- OPENVIDU_DEPLOYMENT_TYPE=local
|
||||
- OPENVIDU_ENVIRONMENT=on_premise
|
||||
@ -120,13 +124,13 @@ services:
|
||||
volumes:
|
||||
- ./livekit.yaml:/etc/livekit.yaml
|
||||
- ./scripts/entrypoint_openvidu.sh:/scripts/entrypoint.sh
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
ingress:
|
||||
image: docker.io/openvidu/ingress:3.2.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/ingress:3.6.0
|
||||
container_name: ingress
|
||||
restart: unless-stopped
|
||||
extra_hosts:
|
||||
@ -139,13 +143,13 @@ services:
|
||||
- INGRESS_CONFIG_FILE=/etc/ingress.yaml
|
||||
volumes:
|
||||
- ./ingress.yaml:/etc/ingress.yaml
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
egress:
|
||||
image: docker.io/livekit/egress:v1.9.1
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/egress:3.6.0
|
||||
restart: unless-stopped
|
||||
container_name: egress
|
||||
extra_hosts:
|
||||
@ -155,49 +159,42 @@ services:
|
||||
volumes:
|
||||
- ./egress.yaml:/etc/egress.yaml
|
||||
- egress-data:/home/egress
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
default-app:
|
||||
image: docker.io/openvidu/openvidu-call:3.2.0-demo
|
||||
platform: linux/amd64
|
||||
container_name: openvidu-call
|
||||
openvidu-meet:
|
||||
image: docker.io/openvidu/openvidu-meet:3.6.0
|
||||
container_name: openvidu-meet
|
||||
restart: on-failure
|
||||
extra_hosts:
|
||||
- host.docker.internal:host-gateway
|
||||
environment:
|
||||
- USE_HTTPS=${USE_HTTPS:-false}
|
||||
- LAN_MODE=${LAN_MODE:-false}
|
||||
- LAN_DOMAIN=${LAN_DOMAIN:-}
|
||||
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
|
||||
- SERVER_PORT=6080
|
||||
- CALL_NAME_ID=OpenViduCall-LOCAL
|
||||
- LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
|
||||
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
|
||||
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
|
||||
- CALL_PRIVATE_ACCESS=${CALL_PRIVATE_ACCESS:-false}
|
||||
- CALL_USER=${CALL_USER:-}
|
||||
- CALL_SECRET=${CALL_SECRET:-}
|
||||
- CALL_RECORDING=${CALL_RECORDING:-}
|
||||
- CALL_ADMIN_USER=${CALL_ADMIN_USER:-admin}
|
||||
- CALL_ADMIN_SECRET=${CALL_ADMIN_SECRET:-admin}
|
||||
- CALL_LOG_LEVEL=${CALL_LOG_LEVEL:-info}
|
||||
- CALL_S3_BUCKET=${CALL_S3_BUCKET:-openvidu-appdata}
|
||||
- CALL_S3_SERVICE_ENDPOINT=${CALL_S3_SERVICE_ENDPOINT:-http://minio:9000}
|
||||
- CALL_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
|
||||
- CALL_S3_SECRET_KEY=${MINIO_SECRET_KEY}
|
||||
- CALL_AWS_REGION=${CALL_AWS_REGION:-us-east-1}
|
||||
- CALL_S3_WITH_PATH_STYLE_ACCESS=${CALL_S3_WITH_PATH_STYLE_ACCESS:-true}
|
||||
- MEET_S3_ACCESS_KEY=${MINIO_ACCESS_KEY}
|
||||
- MEET_S3_SECRET_KEY=${MINIO_SECRET_KEY}
|
||||
- MEET_REDIS_PASSWORD=${REDIS_PASSWORD:-}
|
||||
- MEET_MONGO_URI=mongodb://${MONGO_ADMIN_USERNAME}:${MONGO_ADMIN_PASSWORD}@mongo:27017/?replicaSet=rs0&readPreference=primaryPreferred
|
||||
- MEET_BASE_PATH=${MEET_BASE_PATH:-/meet}
|
||||
- MEET_CONFIG_DIR=/config/meet.env
|
||||
volumes:
|
||||
- ./scripts/entrypoint_default_app.sh:/scripts/entrypoint.sh
|
||||
- ./meet.env:/config/meet.env
|
||||
- ./scripts/entrypoint_openvidu_meet.sh:/scripts/entrypoint.sh
|
||||
- ./scripts/utils.sh:/scripts/utils.sh
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
entrypoint: /bin/sh /scripts/entrypoint.sh
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
openvidu-v2compatibility:
|
||||
image: docker.io/openvidu/openvidu-v2compatibility:3.2.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/openvidu-v2compatibility:3.6.0
|
||||
restart: unless-stopped
|
||||
container_name: openvidu-v2compatibility
|
||||
entrypoint: /bin/sh /scripts/entrypoint.sh
|
||||
@ -233,27 +230,33 @@ services:
|
||||
- ./recordings:/opt/openvidu/recordings
|
||||
- ./scripts/entrypoint_v2comp.sh:/scripts/entrypoint.sh
|
||||
- ./scripts/utils.sh:/scripts/utils.sh
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
|
||||
ready-check:
|
||||
image: docker.io/curlimages/curl:8.13.0
|
||||
platform: linux/amd64
|
||||
image: docker.io/openvidu/openvidu-operator:3.6.0
|
||||
container_name: ready-check
|
||||
restart: on-failure
|
||||
volumes:
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
- MODE=local-ready-check
|
||||
- OPENVIDU_ENVIRONMENT=local-platform
|
||||
- USE_HTTPS=${USE_HTTPS:-false}
|
||||
- LAN_DOMAIN=${LAN_DOMAIN:-}
|
||||
- LAN_MODE=${LAN_MODE:-false}
|
||||
- LAN_PRIVATE_IP=${LAN_PRIVATE_IP:-}
|
||||
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
|
||||
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
|
||||
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
|
||||
- DASHBOARD_ADMIN_USERNAME=${DASHBOARD_ADMIN_USERNAME:-}
|
||||
- DASHBOARD_ADMIN_PASSWORD=${DASHBOARD_ADMIN_PASSWORD:-}
|
||||
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-}
|
||||
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-}
|
||||
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
|
||||
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
|
||||
- V2COMPAT_OPENVIDU_SECRET=${LIVEKIT_API_SECRET:-}
|
||||
env_file:
|
||||
- ./meet.env
|
||||
depends_on:
|
||||
- openvidu
|
||||
- ingress
|
||||
@ -261,14 +264,32 @@ services:
|
||||
- dashboard
|
||||
- minio
|
||||
- mongo
|
||||
volumes:
|
||||
- ./scripts/ready-check.sh:/scripts/ready-check.sh
|
||||
- ./scripts/utils.sh:/scripts/utils.sh
|
||||
command: /bin/sh /scripts/ready-check.sh
|
||||
|
||||
operator:
|
||||
image: docker.io/openvidu/openvidu-operator:3.6.0
|
||||
container_name: operator
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- agents-config:/agents-config
|
||||
- ./:/deployment
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
- MODE=agent-manager-local
|
||||
- DEPLOYMENT_FILES_DIR=/deployment
|
||||
- AGENTS_CONFIG_DIR=/agents-config
|
||||
- NETWORK_NAME=openvidu-pro
|
||||
- AGENTS_CONFIG_VOLUME=openvidu-pro-agents-config
|
||||
- LIVEKIT_URL=ws://openvidu:7880/
|
||||
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-}
|
||||
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-}
|
||||
- REDIS_ADDRESS=redis:6379
|
||||
- REDIS_PASSWORD=${REDIS_PASSWORD:-}
|
||||
depends_on:
|
||||
setup:
|
||||
condition: service_completed_successfully
|
||||
setup:
|
||||
image: docker.io/busybox:1.37.0
|
||||
platform: linux/amd64
|
||||
container_name: setup
|
||||
restart: "no"
|
||||
volumes:
|
||||
@ -276,6 +297,7 @@ services:
|
||||
- mongo-data:/mongo
|
||||
- egress-data:/egress
|
||||
- ./scripts/setup.sh:/scripts/setup.sh
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
- USE_HTTPS=${USE_HTTPS:-false}
|
||||
- LAN_MODE=${LAN_MODE:-false}
|
||||
|
||||
@ -27,6 +27,10 @@ storage:
|
||||
# account_name: your_account_name
|
||||
# account_key: your_account_key
|
||||
# container_name: openvidu-appdata
|
||||
# gcp:
|
||||
# credentials_json: |
|
||||
# your_credentials_json
|
||||
# bucket: openvidu-appdata
|
||||
|
||||
# CPU cost for each type of Egress operation.
|
||||
cpu_cost:
|
||||
@ -38,3 +42,20 @@ cpu_cost:
|
||||
participant_cpu_cost: 0.01
|
||||
track_composite_cpu_cost: 0.01
|
||||
track_cpu_cost: 0.01
|
||||
|
||||
openvidu:
|
||||
# Allocation strategy for new egress requests
|
||||
# - cpuload: the node with the lowest CPU load will be selected. Distributes the CPU load evenly across all nodes.
|
||||
# - binpack: some node already hosting at least one egress will be selected. Fills up nodes before assigning work to new ones.
|
||||
allocation_strategy: cpuload
|
||||
# Whether to use system-wide CPU monitoring or egress process CPU monitoring. This affects the allocation of new egress requests.
|
||||
# It is preferable to set this value to:
|
||||
# - true: when the egress service is running in a shared server also hosting other CPU-intensive services.
|
||||
# - false: when the egress service is running in a dedicated server.
|
||||
use_global_cpu_monitoring: true
|
||||
# Disables the automatic killing of the most expensive egress when CPU is overloaded.
|
||||
# The default "false" value helps keeping the node stable, but may cause unexpected egress terminations under high load.
|
||||
disable_cpu_overload_killer: false
|
||||
# Minimum available disk space in MB required to accept new egress requests.
|
||||
# Default: 512 MB. Set to a negative value (e.g., -1) to disable disk space checking.
|
||||
min_disk_space_mb: 512
|
||||
|
||||
@ -56,7 +56,7 @@ webhook:
|
||||
urls:
|
||||
- http://host.docker.internal:4443/livekit/webhook # For OpenVidu 2 compatibility
|
||||
- http://host.docker.internal:6080/livekit/webhook
|
||||
- http://default-app:6080/livekit/webhook
|
||||
- http://openvidu-meet:6080/livekit/webhook
|
||||
ingress:
|
||||
rtmp_base_url: rtmp://localhost:1935/rtmp
|
||||
whip_base_url: http://localhost:8085/whip
|
||||
|
||||
36
pro/meet.env
Normal file
36
pro/meet.env
Normal file
@ -0,0 +1,36 @@
|
||||
# OpenVidu Meet configuration
|
||||
# Static environment variables loaded via MEET_CONFIG_DIR
|
||||
|
||||
SERVER_PORT=6080
|
||||
MEET_NAME_ID=openviduMeet-LOCAL
|
||||
MEET_LOG_LEVEL=info
|
||||
MEET_COOKIE_SECURE=false
|
||||
MEET_INITIAL_ADMIN_USER=admin
|
||||
MEET_INITIAL_ADMIN_PASSWORD=admin
|
||||
MEET_INITIAL_API_KEY=meet-api-key
|
||||
MEET_INITIAL_WEBHOOK_ENABLED=true
|
||||
MEET_INITIAL_WEBHOOK_URL=http://host.docker.internal:6080/webhook
|
||||
|
||||
LIVEKIT_URL_PRIVATE=ws://openvidu:7880/
|
||||
|
||||
# S3 configuration
|
||||
MEET_S3_BUCKET=openvidu-appdata
|
||||
MEET_S3_SUBBUCKET=openvidu-meet
|
||||
MEET_S3_SERVICE_ENDPOINT=http://minio:9000
|
||||
MEET_AWS_REGION=us-east-1
|
||||
MEET_S3_WITH_PATH_STYLE_ACCESS=true
|
||||
|
||||
# Storage backend type
|
||||
MEET_BLOB_STORAGE_MODE=s3
|
||||
|
||||
# Redis configuration
|
||||
MEET_REDIS_HOST=redis
|
||||
MEET_REDIS_PORT=6379
|
||||
MEET_REDIS_DB=0
|
||||
|
||||
# MongoDB configuration
|
||||
MEET_MONGO_ENABLED=true
|
||||
MEET_MONGO_DB_NAME=openvidu-meet
|
||||
|
||||
# Enable live captions using OpenVidu Speech to Text agent
|
||||
MEET_CAPTIONS_ENABLED=false
|
||||
@ -1,7 +1,8 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
if [ "$LAN_PRIVATE_IP" != "none" ]; then
|
||||
if [ "$LAN_PRIVATE_IP" != "" ] && [ "$LAN_MODE" = 'true' ]; then
|
||||
echo "Using as NODE_IP: $LAN_PRIVATE_IP"
|
||||
export NODE_IP="$LAN_PRIVATE_IP"
|
||||
fi
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
. /scripts/utils.sh
|
||||
|
||||
URL=$(getDeploymentUrl)
|
||||
URL=$(getDeploymentUrl ws)
|
||||
export LIVEKIT_URL="${URL}"
|
||||
|
||||
/usr/local/bin/entrypoint.sh
|
||||
@ -1,70 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
. /scripts/utils.sh
|
||||
|
||||
trap 'handle_sigint' SIGINT
|
||||
|
||||
handle_sigint() {
|
||||
echo "SIGINT signal received, exiting..."
|
||||
exit 1
|
||||
}
|
||||
|
||||
wait_for_service() {
|
||||
SERVICE_NAME=$1
|
||||
SERVICE_URL=$2
|
||||
shift 2
|
||||
EXTRA=$@
|
||||
if [ -n "$EXTRA" ]; then
|
||||
until curl $EXTRA $SERVICE_URL > /dev/null; do
|
||||
echo "Waiting for $SERVICE_NAME to start...";
|
||||
sleep 1;
|
||||
done;
|
||||
else
|
||||
until curl --silent --head --fail $SERVICE_URL > /dev/null; do
|
||||
echo "Waiting for $SERVICE_NAME to start...";
|
||||
sleep 1;
|
||||
done;
|
||||
fi;
|
||||
}
|
||||
|
||||
wait_for_service 'OpenVidu' 'http://openvidu:7880'
|
||||
wait_for_service 'Ingress' 'http://ingress:9091'
|
||||
wait_for_service 'Egress' 'http://egress:9091'
|
||||
wait_for_service 'Dashboard' 'http://dashboard:5000'
|
||||
wait_for_service 'Minio' 'http://minio:9000/minio/health/live'
|
||||
wait_for_service 'Minio Console' 'http://minio:9001/minio-console'
|
||||
wait_for_service 'Mongo' 'http://mongo:27017' --connect-timeout 10 --silent
|
||||
|
||||
LAN_HTTP_URL=$(getDeploymentUrl http)
|
||||
LAN_WS_URL=$(getDeploymentUrl ws)
|
||||
|
||||
for i in $(seq 1 10); do
|
||||
echo 'Starting OpenVidu... Please be patient...'
|
||||
sleep 1
|
||||
done;
|
||||
echo ''
|
||||
echo ''
|
||||
echo '========================================='
|
||||
echo '🎉 OpenVidu is ready! 🎉'
|
||||
echo '========================================='
|
||||
echo ''
|
||||
echo 'OpenVidu Server & LiveKit Server URLs:'
|
||||
echo ''
|
||||
echo ' - From this machine:'
|
||||
echo ''
|
||||
echo ' - http://localhost:7880'
|
||||
echo ' - ws://localhost:7880'
|
||||
echo ''
|
||||
echo ' - From other devices in your LAN:'
|
||||
echo ''
|
||||
echo " - $LAN_HTTP_URL"
|
||||
echo " - $LAN_WS_URL"
|
||||
echo ''
|
||||
echo '========================================='
|
||||
echo ''
|
||||
echo 'OpenVidu Developer UI (services and passwords):'
|
||||
echo ''
|
||||
echo ' - http://localhost:7880'
|
||||
echo " - $LAN_HTTP_URL"
|
||||
echo ''
|
||||
echo '========================================='
|
||||
@ -1,6 +1,11 @@
|
||||
#!/bin/sh
|
||||
|
||||
if [ -z "$LAN_PRIVATE_IP" ]; then
|
||||
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
|
||||
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$LAN_MODE" = 'true' ] && [ -z "$LAN_PRIVATE_IP" ]; then
|
||||
echo '------------------------'
|
||||
echo ''
|
||||
echo 'LAN_PRIVATE_IP is required in the .env file.'
|
||||
@ -14,7 +19,9 @@ if [ -z "$LAN_PRIVATE_IP" ]; then
|
||||
echo 'If it can'\''t be found, you can manually set it in the .env file'
|
||||
echo '------------------------'
|
||||
exit 1
|
||||
else
|
||||
fi
|
||||
|
||||
if [ "$LAN_MODE" = 'true' ] && [ -n "$LAN_PRIVATE_IP" ]; then
|
||||
# Check if the LAN_PRIVATE_IP is reachable
|
||||
if ! ping -c 1 -W 1 "$LAN_PRIVATE_IP" > /dev/null; then
|
||||
echo "ERROR: LAN_PRIVATE_IP $LAN_PRIVATE_IP is not reachable"
|
||||
@ -26,15 +33,16 @@ else
|
||||
echo " - MacOS: ./configure_lan_private_ip_macos.sh"
|
||||
echo " - Windows: .\configure_lan_private_ip_windows.bat"
|
||||
echo ""
|
||||
echo " If you don't want to access OpenVidu through your LAN,"
|
||||
echo " you can run without LAN_MODE enabled, simply set"
|
||||
echo " the following variables in the .env file:"
|
||||
echo " USE_HTTPS=false"
|
||||
echo " LAN_MODE=false"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$LAN_MODE" = 'true' ] && [ "$USE_HTTPS" = 'false' ]; then
|
||||
echo 'LAN_MODE cannot be "true" if USE_HTTPS is "false"'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Prepare volumes
|
||||
mkdir -p /minio/data
|
||||
mkdir -p /mongo/data
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user