From 8a268d8e658f3caf39a62bac4084db1a155afdaa Mon Sep 17 00:00:00 2001
From: pabloFuente <pablofuenteperez@gmail.com>
Date: Tue, 24 Jun 2025 19:12:24 +0200
Subject: [PATCH] Updated agent-speech-processing.yaml

---
 community/agent-speech-processing.yaml | 26 ++++++++++++++++----------
 pro/agent-speech-processing.yaml       | 26 ++++++++++++++++----------
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/community/agent-speech-processing.yaml b/community/agent-speech-processing.yaml
index 2e86075..756390f 100644
--- a/community/agent-speech-processing.yaml
+++ b/community/agent-speech-processing.yaml
@@ -51,18 +51,18 @@ live_captions:
     speech_key:
     speech_auth_token:
     speech_region:
-    # Azure handles multiple languages and can auto-detect the language used. It requires the candidate set to be set.
+    # Azure handles multiple languages and can auto-detect the language used. It requires the candidate set to be set. E.g. ["en-US", "es-ES"]
     # See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt#supported-languages
-    languages:
+    language:
     # Removes profanity (swearing), or replaces letters of profane words with stars. Valid values: Masked | Removed | Raw
     # See https://learn.microsoft.com/en-us/azure/ai-services/translator/profanity-filtering
     profanity:
 
   azure_openai:
     # Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
-    # Azure OpenAI API key. Mandatory value.
+    # Azure OpenAI API key
     azure_api_key:
-    # Azure Active Directory token. Mandatory value.
+    # Azure Active Directory token
     azure_ad_token:
     # Azure OpenAI endpoint in the following format: https://{your-resource-name}.openai.azure.com. Mandatory value.
     azure_endpoint:
@@ -124,11 +124,13 @@ live_captions:
   openai:
     # API key for OpenAI. See https://platform.openai.com/api-keys
     api_key:
-    # See https://platform.openai.com/docs/guides/speech-to-text
+    # The OpenAI model to use for transcription. See https://platform.openai.com/docs/guides/speech-to-text
     model:
     # The language of the input audio. Supplying the input language in ISO-639-1 format
     # (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
     language:
+    # Optional text prompt to guide the transcription. Only supported for whisper-1.
+    prompt:
 
   groq:
     # API key for Groq. See https://console.groq.com/keys
@@ -148,15 +150,19 @@ live_captions:
     model:
     # See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.language
     language:
-    # Whether to return interim (non-final) transcription results. Defaults to true
+    # Whether to enable automatic language detection. Defaults to false. See https://developers.deepgram.com/docs/language-detection
+    detect_language: false
+    # Whether to return interim (non-final) transcription results. Defaults to true. See https://developers.deepgram.com/docs/interim-results
     interim_results: true
-    # Whether to apply smart formatting to numbers, dates, etc. Defaults to true
+    # Whether to apply smart formatting to numbers, dates, etc. Defaults to true. See https://developers.deepgram.com/docs/smart-format
     smart_format: true
-    # Whether to add punctuations to the transcription. Defaults to true. Turn detector will work better with punctuations
+    # When smart_format is used, ensures it does not wait for sequence to be complete before returning results. Defaults to true. See https://developers.deepgram.com/docs/smart-format#using-no-delay
+    no_delay: true
+    # Whether to add punctuations to the transcription. Defaults to true. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
     punctuate: true
-    # Whether to include filler words (um, uh, etc.) in transcription. Defaults to true
+    # Whether to include filler words (um, uh, etc.) in transcription. Defaults to true. See https://developers.deepgram.com/docs/filler-words
     filler_words: true
-    # Whether to filter profanity from the transcription. Defaults to false
+    # Whether to filter profanity from the transcription. Defaults to false. See https://developers.deepgram.com/docs/profanity-filter
     profanity_filter: false
     # List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). Defaults to None. keywords does not work with Nova-3 models. Use keyterms instead.
     # keywords:
diff --git a/pro/agent-speech-processing.yaml b/pro/agent-speech-processing.yaml
index 2e86075..756390f 100644
--- a/pro/agent-speech-processing.yaml
+++ b/pro/agent-speech-processing.yaml
@@ -51,18 +51,18 @@ live_captions:
     speech_key:
     speech_auth_token:
     speech_region:
-    # Azure handles multiple languages and can auto-detect the language used. It requires the candidate set to be set.
+    # Azure handles multiple languages and can auto-detect the language used. It requires the candidate set to be set. E.g. ["en-US", "es-ES"]
     # See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt#supported-languages
-    languages:
+    language:
     # Removes profanity (swearing), or replaces letters of profane words with stars. Valid values: Masked | Removed | Raw
     # See https://learn.microsoft.com/en-us/azure/ai-services/translator/profanity-filtering
     profanity:
 
   azure_openai:
     # Credentials for Azure OpenAI APIs. See https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai
-    # Azure OpenAI API key. Mandatory value.
+    # Azure OpenAI API key
     azure_api_key:
-    # Azure Active Directory token. Mandatory value.
+    # Azure Active Directory token
     azure_ad_token:
     # Azure OpenAI endpoint in the following format: https://{your-resource-name}.openai.azure.com. Mandatory value.
     azure_endpoint:
@@ -124,11 +124,13 @@ live_captions:
   openai:
     # API key for OpenAI. See https://platform.openai.com/api-keys
     api_key:
-    # See https://platform.openai.com/docs/guides/speech-to-text
+    # The OpenAI model to use for transcription. See https://platform.openai.com/docs/guides/speech-to-text
     model:
     # The language of the input audio. Supplying the input language in ISO-639-1 format
     # (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) will improve accuracy and latency.
     language:
+    # Optional text prompt to guide the transcription. Only supported for whisper-1.
+    prompt:
 
   groq:
     # API key for Groq. See https://console.groq.com/keys
@@ -148,15 +150,19 @@ live_captions:
     model:
     # See https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query.language
     language:
-    # Whether to return interim (non-final) transcription results. Defaults to true
+    # Whether to enable automatic language detection. Defaults to false. See https://developers.deepgram.com/docs/language-detection
+    detect_language: false
+    # Whether to return interim (non-final) transcription results. Defaults to true. See https://developers.deepgram.com/docs/interim-results
     interim_results: true
-    # Whether to apply smart formatting to numbers, dates, etc. Defaults to true
+    # Whether to apply smart formatting to numbers, dates, etc. Defaults to true. See https://developers.deepgram.com/docs/smart-format
     smart_format: true
-    # Whether to add punctuations to the transcription. Defaults to true. Turn detector will work better with punctuations
+    # When smart_format is used, ensures it does not wait for sequence to be complete before returning results. Defaults to true. See https://developers.deepgram.com/docs/smart-format#using-no-delay
+    no_delay: true
+    # Whether to add punctuations to the transcription. Defaults to true. Turn detector will work better with punctuations. See https://developers.deepgram.com/docs/punctuation
     punctuate: true
-    # Whether to include filler words (um, uh, etc.) in transcription. Defaults to true
+    # Whether to include filler words (um, uh, etc.) in transcription. Defaults to true. See https://developers.deepgram.com/docs/filler-words
     filler_words: true
-    # Whether to filter profanity from the transcription. Defaults to false
+    # Whether to filter profanity from the transcription. Defaults to false. See https://developers.deepgram.com/docs/profanity-filter
     profanity_filter: false
     # List of tuples containing keywords and their boost values for improved recognition. Each tuple should be (keyword: str, boost: float). Defaults to None. keywords does not work with Nova-3 models. Use keyterms instead.
     # keywords: