From 9ae27bf32af752b44456a51675094cbb7175a9e2 Mon Sep 17 00:00:00 2001
From: CSantosM <4a.santos@gmail.com>
Date: Thu, 22 Jan 2026 18:24:50 +0100
Subject: [PATCH] backend: Adds live captions functionality to rooms
Adds support for live captions in meet rooms.
This includes schema definitions, API configurations,
and LiveKit integration for dispatching captions agents.
Captions are disabled by default and can be enabled per room.
---
.../responses/success-get-room.yaml | 2 +
.../responses/success-get-rooms.yaml | 2 +
.../components/schemas/meet-room-config.yaml | 13 +++++
meet-ce/backend/src/config/internal-config.ts | 2 +
meet-ce/backend/src/environment.ts | 2 +-
.../models/mongoose-schemas/room.schema.ts | 19 +++++++
.../src/models/zod-schemas/room.schema.ts | 14 +++--
.../src/services/livekit-webhook.service.ts | 8 +--
.../backend/src/services/livekit.service.ts | 10 ++--
.../src/services/room-member.service.ts | 3 +-
meet-ce/backend/src/services/token.service.ts | 26 ++++++---
meet-ce/typings/src/room-config.ts | 54 ++++++++++---------
12 files changed, 112 insertions(+), 43 deletions(-)
diff --git a/meet-ce/backend/openapi/components/responses/success-get-room.yaml b/meet-ce/backend/openapi/components/responses/success-get-room.yaml
index 7db0b12c..6fcb7a69 100644
--- a/meet-ce/backend/openapi/components/responses/success-get-room.yaml
+++ b/meet-ce/backend/openapi/components/responses/success-get-room.yaml
@@ -26,6 +26,8 @@ content:
enabled: true
e2ee:
enabled: false
+ captions:
+ enabled: false
roles:
moderator:
permissions:
diff --git a/meet-ce/backend/openapi/components/responses/success-get-rooms.yaml b/meet-ce/backend/openapi/components/responses/success-get-rooms.yaml
index 9872b5b8..fb54e370 100644
--- a/meet-ce/backend/openapi/components/responses/success-get-rooms.yaml
+++ b/meet-ce/backend/openapi/components/responses/success-get-rooms.yaml
@@ -35,6 +35,8 @@ content:
enabled: true
e2ee:
enabled: false
+ captions:
+ enabled: false
roles:
moderator:
permissions:
diff --git a/meet-ce/backend/openapi/components/schemas/meet-room-config.yaml b/meet-ce/backend/openapi/components/schemas/meet-room-config.yaml
index f9d599a6..9c794afa 100644
--- a/meet-ce/backend/openapi/components/schemas/meet-room-config.yaml
+++ b/meet-ce/backend/openapi/components/schemas/meet-room-config.yaml
@@ -13,6 +13,9 @@ MeetRoomConfig:
e2ee:
$ref: '#/MeetE2EEConfig'
description: Config for End-to-End Encryption (E2EE) in the room.
+ captions:
+ $ref: '#/MeetCaptionsConfig'
+ description: Config for live captions in the room.
MeetChatConfig:
type: object
properties:
@@ -80,3 +83,13 @@ MeetE2EEConfig:
If true, the room will have End-to-End Encryption (E2EE) enabled.
This ensures that the media streams are encrypted from the sender to the receiver, providing enhanced privacy and security for the participants.
**Enabling E2EE will disable the recording feature for the room**.
+MeetCaptionsConfig:
+ type: object
+ properties:
+ enabled:
+ type: boolean
+ default: false
+ example: false
+ description: >
+ If true, the room will have live captions enabled.
+ This allows participants to see real-time captions of the all participants' speech during the meeting.
diff --git a/meet-ce/backend/src/config/internal-config.ts b/meet-ce/backend/src/config/internal-config.ts
index d7b95776..cf4e709c 100644
--- a/meet-ce/backend/src/config/internal-config.ts
+++ b/meet-ce/backend/src/config/internal-config.ts
@@ -49,6 +49,8 @@ export const INTERNAL_CONFIG = {
PARTICIPANT_MAX_CONCURRENT_NAME_REQUESTS: '20', // Maximum number of request by the same name at the same time allowed
PARTICIPANT_NAME_RESERVATION_TTL: '12h' as StringValue, // Time-to-live for participant name reservations
+ CAPTIONS_AGENT_NAME: 'agent-meet-captions',
+
// MongoDB Schema Versions
// These define the current schema version for each collection
// Increment when making breaking changes to the schema structure
diff --git a/meet-ce/backend/src/environment.ts b/meet-ce/backend/src/environment.ts
index d1ec064d..43fcd499 100644
--- a/meet-ce/backend/src/environment.ts
+++ b/meet-ce/backend/src/environment.ts
@@ -85,7 +85,7 @@ export const MEET_ENV = {
ENABLED_MODULES: process.env.ENABLED_MODULES ?? '',
// Agent Speech Processing configuration
- AGENT_SPEECH_PROCESSING_NAME: process.env.MEET_AGENT_SPEECH_PROCESSING_NAME || '',
+ CAPTIONS_ENABLED: process.env.MEET_CAPTIONS || 'true',
};
export function checkModuleEnabled() {
diff --git a/meet-ce/backend/src/models/mongoose-schemas/room.schema.ts b/meet-ce/backend/src/models/mongoose-schemas/room.schema.ts
index 4713e0b1..c09cdf0d 100644
--- a/meet-ce/backend/src/models/mongoose-schemas/room.schema.ts
+++ b/meet-ce/backend/src/models/mongoose-schemas/room.schema.ts
@@ -105,6 +105,20 @@ const MeetE2EEConfigSchema = new Schema(
{ _id: false }
);
+/**
+ * Mongoose schema for MeetRoom captions configuration.
+ */
+const MeetCaptionsConfigSchema = new Schema(
+ {
+ enabled: {
+ type: Boolean,
+ required: true,
+ default: false
+ }
+ },
+ { _id: false }
+);
+
/**
* Sub-schema for room theme configuration.
*/
@@ -181,6 +195,11 @@ const MeetRoomConfigSchema = new Schema(
type: MeetE2EEConfigSchema,
required: true,
default: { enabled: false }
+ },
+ captions: {
+ type: MeetCaptionsConfigSchema,
+ required: true,
+ default: { enabled: false }
}
},
{ _id: false }
diff --git a/meet-ce/backend/src/models/zod-schemas/room.schema.ts b/meet-ce/backend/src/models/zod-schemas/room.schema.ts
index 804bbaee..06a7b998 100644
--- a/meet-ce/backend/src/models/zod-schemas/room.schema.ts
+++ b/meet-ce/backend/src/models/zod-schemas/room.schema.ts
@@ -7,6 +7,7 @@ import {
MeetRecordingConfig,
MeetRecordingLayout,
MeetRoomAutoDeletionPolicy,
+ MeetRoomCaptionsConfig,
MeetRoomConfig,
MeetRoomDeletionPolicyWithMeeting,
MeetRoomDeletionPolicyWithRecordings,
@@ -55,6 +56,10 @@ const E2EEConfigSchema: z.ZodType = z.object({
enabled: z.boolean()
});
+const CaptionsConfigSchema: z.ZodType = z.object({
+ enabled: z.boolean()
+});
+
const ThemeModeSchema: z.ZodType = z.nativeEnum(MeetRoomThemeMode);
const hexColorSchema = z
@@ -92,7 +97,8 @@ const UpdateRoomConfigSchema: z.ZodType> = z
recording: RecordingConfigSchema.optional(),
chat: ChatConfigSchema.optional(),
virtualBackground: VirtualBackgroundConfigSchema.optional(),
- e2ee: E2EEConfigSchema.optional()
+ e2ee: E2EEConfigSchema.optional(),
+ captions: CaptionsConfigSchema.optional()
// appearance: AppearanceConfigSchema,
})
.transform((data: Partial) => {
@@ -123,7 +129,8 @@ const CreateRoomConfigSchema = z
})),
chat: ChatConfigSchema.optional().default(() => ({ enabled: true })),
virtualBackground: VirtualBackgroundConfigSchema.optional().default(() => ({ enabled: true })),
- e2ee: E2EEConfigSchema.optional().default(() => ({ enabled: false }))
+ e2ee: E2EEConfigSchema.optional().default(() => ({ enabled: false })),
+ captions: CaptionsConfigSchema.optional().default(() => ({ enabled: false }))
// appearance: AppearanceConfigSchema,
})
.transform((data) => {
@@ -207,7 +214,8 @@ export const RoomOptionsSchema: z.ZodType = z.object({
},
chat: { enabled: true },
virtualBackground: { enabled: true },
- e2ee: { enabled: false }
+ e2ee: { enabled: false },
+ captions: { enabled: false }
})
// maxParticipants: z
// .number()
diff --git a/meet-ce/backend/src/services/livekit-webhook.service.ts b/meet-ce/backend/src/services/livekit-webhook.service.ts
index 74fb7bf9..dadaa7a0 100644
--- a/meet-ce/backend/src/services/livekit-webhook.service.ts
+++ b/meet-ce/backend/src/services/livekit-webhook.service.ts
@@ -163,8 +163,8 @@ export class LivekitWebhookService {
* @param participant - Information about the newly joined participant.
*/
async handleParticipantJoined(room: Room, participant: ParticipantInfo) {
- // Skip if the participant is an egress participant
- if (this.livekitService.isEgressParticipant(participant)) return;
+ // Skip if the participant is not a standard participant
+ if (!this.livekitService.isStandardParticipant(participant)) return;
try {
const { recordings } = await this.recordingService.getAllRecordings({ roomId: room.name });
@@ -185,8 +185,8 @@ export class LivekitWebhookService {
* @param participant - Information about the participant who left.
*/
async handleParticipantLeft(room: Room, participant: ParticipantInfo) {
- // Skip if the participant is an egress participant
- if (this.livekitService.isEgressParticipant(participant)) return;
+ // Skip if the participant is not a standard participant
+ if (!this.livekitService.isStandardParticipant(participant)) return;
try {
// Release the participant's reserved name
diff --git a/meet-ce/backend/src/services/livekit.service.ts b/meet-ce/backend/src/services/livekit.service.ts
index 630da29b..72debf17 100644
--- a/meet-ce/backend/src/services/livekit.service.ts
+++ b/meet-ce/backend/src/services/livekit.service.ts
@@ -1,3 +1,4 @@
+import { ParticipantInfo_Kind } from '@livekit/protocol';
import { inject, injectable } from 'inversify';
import {
CreateOptions,
@@ -400,8 +401,11 @@ export class LiveKitService {
}
}
- isEgressParticipant(participant: ParticipantInfo): boolean {
- // TODO: Remove deprecated warning by using ParticipantInfo_Kind: participant.kind === ParticipantInfo_Kind.EGRESS;
- return participant.identity.startsWith('EG_') && participant.permission?.recorder === true;
+ /**
+ * Checks if a participant is a standard participant (web clients).
+ * @param participant
+ */
+ isStandardParticipant(participant: ParticipantInfo): boolean {
+ return participant.kind === ParticipantInfo_Kind.STANDARD;
}
}
diff --git a/meet-ce/backend/src/services/room-member.service.ts b/meet-ce/backend/src/services/room-member.service.ts
index 6fcee613..950ae899 100644
--- a/meet-ce/backend/src/services/room-member.service.ts
+++ b/meet-ce/backend/src/services/room-member.service.ts
@@ -133,9 +133,10 @@ export class RoomMemberService {
// Get participant permissions (with join meeting)
const permissions = await this.getRoomMemberPermissions(roomId, role, true);
+ const withCaptions = room.config.captions.enabled ?? false;
// Generate token with participant name
- return this.tokenService.generateRoomMemberToken(role, permissions, participantName, participantIdentity);
+ return this.tokenService.generateRoomMemberToken(role, permissions, participantName, participantIdentity, withCaptions);
}
/**
diff --git a/meet-ce/backend/src/services/token.service.ts b/meet-ce/backend/src/services/token.service.ts
index 632838e3..84fc6c47 100644
--- a/meet-ce/backend/src/services/token.service.ts
+++ b/meet-ce/backend/src/services/token.service.ts
@@ -42,7 +42,8 @@ export class TokenService {
role: MeetRoomMemberRole,
permissions: MeetRoomMemberPermissions,
participantName?: string,
- participantIdentity?: string
+ participantIdentity?: string,
+ roomWithCaptions = false
): Promise {
const metadata: MeetRoomMemberTokenMetadata = {
livekitUrl: MEET_ENV.LIVEKIT_URL,
@@ -56,23 +57,36 @@ export class TokenService {
ttl: INTERNAL_CONFIG.ROOM_MEMBER_TOKEN_EXPIRATION,
metadata: JSON.stringify(metadata)
};
- return await this.generateJwtToken(tokenOptions, permissions.livekit as VideoGrant);
+ return await this.generateJwtToken(tokenOptions, permissions.livekit as VideoGrant, roomWithCaptions);
}
- private async generateJwtToken(tokenOptions: AccessTokenOptions, grants?: VideoGrant): Promise {
+ private async generateJwtToken(
+ tokenOptions: AccessTokenOptions,
+ grants?: VideoGrant,
+ roomWithCaptions = false
+ ): Promise {
const at = new AccessToken(MEET_ENV.LIVEKIT_API_KEY, MEET_ENV.LIVEKIT_API_SECRET, tokenOptions);
if (grants) {
at.addGrant(grants);
}
- if (MEET_ENV.AGENT_SPEECH_PROCESSING_NAME) {
+ const captionsEnabledInEnv = MEET_ENV.CAPTIONS_ENABLED === 'true';
+ const captionsEnabledInRoom = Boolean(roomWithCaptions);
- this.logger.debug('Adding speech processing agent dispatch to token', MEET_ENV.AGENT_SPEECH_PROCESSING_NAME);
+ // Warn if configuration is inconsistent
+ if (!captionsEnabledInEnv && captionsEnabledInRoom) {
+ this.logger.warn(
+ `Captions feature is disabled in environment but Room is created with captions enabled. Please enable captions in environment by setting MEET_CAPTIONS_ENABLED=true to ensure proper functionality.`
+ );
+ }
+
+ if (captionsEnabledInEnv && captionsEnabledInRoom) {
+ this.logger.debug('Activating Captions Agent. Configuring Room Agent Dispatch.');
at.roomConfig = new RoomConfiguration({
agents: [
new RoomAgentDispatch({
- agentName: MEET_ENV.AGENT_SPEECH_PROCESSING_NAME
+ agentName: INTERNAL_CONFIG.CAPTIONS_AGENT_NAME
})
]
});
diff --git a/meet-ce/typings/src/room-config.ts b/meet-ce/typings/src/room-config.ts
index c8ae2104..01ccfa12 100644
--- a/meet-ce/typings/src/room-config.ts
+++ b/meet-ce/typings/src/room-config.ts
@@ -4,56 +4,60 @@ import { MeetRecordingLayout } from './recording.model';
* Interface representing the config for a room.
*/
export interface MeetRoomConfig {
- chat: MeetChatConfig;
- recording: MeetRecordingConfig;
- virtualBackground: MeetVirtualBackgroundConfig;
- e2ee: MeetE2EEConfig;
- // appearance: MeetAppearanceConfig;
+ chat: MeetChatConfig;
+ recording: MeetRecordingConfig;
+ virtualBackground: MeetVirtualBackgroundConfig;
+ e2ee: MeetE2EEConfig;
+ captions: MeetRoomCaptionsConfig;
+ // appearance: MeetAppearanceConfig;
}
/**
* Interface representing the config for recordings in a room.
*/
export interface MeetRecordingConfig {
- enabled: boolean;
- layout?: MeetRecordingLayout;
- allowAccessTo?: MeetRecordingAccess;
+ enabled: boolean;
+ layout?: MeetRecordingLayout;
+ allowAccessTo?: MeetRecordingAccess;
}
export enum MeetRecordingAccess {
- ADMIN = 'admin', // Only admins can access the recording
- ADMIN_MODERATOR = 'admin_moderator', // Admins and moderators can access
- ADMIN_MODERATOR_SPEAKER = 'admin_moderator_speaker' // Admins, moderators and speakers can access
+ ADMIN = 'admin', // Only admins can access the recording
+ ADMIN_MODERATOR = 'admin_moderator', // Admins and moderators can access
+ ADMIN_MODERATOR_SPEAKER = 'admin_moderator_speaker', // Admins, moderators and speakers can access
}
export interface MeetChatConfig {
- enabled: boolean;
+ enabled: boolean;
}
export interface MeetVirtualBackgroundConfig {
- enabled: boolean;
+ enabled: boolean;
}
export interface MeetE2EEConfig {
- enabled: boolean;
+ enabled: boolean;
+}
+export interface MeetRoomCaptionsConfig {
+ enabled: boolean;
}
export interface MeetAppearanceConfig {
- themes: MeetRoomTheme[];
+ themes: MeetRoomTheme[];
}
export interface MeetRoomTheme {
- name: string;
- enabled: boolean;
- baseTheme: MeetRoomThemeMode;
- backgroundColor?: string;
- primaryColor?: string;
- secondaryColor?: string;
- accentColor?: string;
- surfaceColor?: string;
+ name: string;
+ enabled: boolean;
+ baseTheme: MeetRoomThemeMode;
+ backgroundColor?: string;
+ primaryColor?: string;
+ secondaryColor?: string;
+ accentColor?: string;
+ surfaceColor?: string;
}
export enum MeetRoomThemeMode {
- LIGHT = 'light',
- DARK = 'dark'
+ LIGHT = 'light',
+ DARK = 'dark',
}