backend: Adds live captions functionality to rooms
Adds support for live captions in meet rooms. This includes schema definitions, API configurations, and LiveKit integration for dispatching captions agents. Captions are disabled by default and can be enabled per room.
This commit is contained in:
parent
f677b18879
commit
9ae27bf32a
@ -26,6 +26,8 @@ content:
|
||||
enabled: true
|
||||
e2ee:
|
||||
enabled: false
|
||||
captions:
|
||||
enabled: false
|
||||
roles:
|
||||
moderator:
|
||||
permissions:
|
||||
|
||||
@ -35,6 +35,8 @@ content:
|
||||
enabled: true
|
||||
e2ee:
|
||||
enabled: false
|
||||
captions:
|
||||
enabled: false
|
||||
roles:
|
||||
moderator:
|
||||
permissions:
|
||||
|
||||
@ -13,6 +13,9 @@ MeetRoomConfig:
|
||||
e2ee:
|
||||
$ref: '#/MeetE2EEConfig'
|
||||
description: Config for End-to-End Encryption (E2EE) in the room.
|
||||
captions:
|
||||
$ref: '#/MeetCaptionsConfig'
|
||||
description: Config for live captions in the room.
|
||||
MeetChatConfig:
|
||||
type: object
|
||||
properties:
|
||||
@ -80,3 +83,13 @@ MeetE2EEConfig:
|
||||
If true, the room will have End-to-End Encryption (E2EE) enabled.<br/>
|
||||
This ensures that the media streams are encrypted from the sender to the receiver, providing enhanced privacy and security for the participants.<br/>
|
||||
**Enabling E2EE will disable the recording feature for the room**.
|
||||
MeetCaptionsConfig:
|
||||
type: object
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
default: false
|
||||
example: false
|
||||
description: >
|
||||
If true, the room will have live captions enabled.<br/>
|
||||
This allows participants to see real-time captions of the all participants' speech during the meeting.<br/>
|
||||
|
||||
@ -49,6 +49,8 @@ export const INTERNAL_CONFIG = {
|
||||
PARTICIPANT_MAX_CONCURRENT_NAME_REQUESTS: '20', // Maximum number of request by the same name at the same time allowed
|
||||
PARTICIPANT_NAME_RESERVATION_TTL: '12h' as StringValue, // Time-to-live for participant name reservations
|
||||
|
||||
CAPTIONS_AGENT_NAME: 'agent-meet-captions',
|
||||
|
||||
// MongoDB Schema Versions
|
||||
// These define the current schema version for each collection
|
||||
// Increment when making breaking changes to the schema structure
|
||||
|
||||
@ -85,7 +85,7 @@ export const MEET_ENV = {
|
||||
ENABLED_MODULES: process.env.ENABLED_MODULES ?? '',
|
||||
|
||||
// Agent Speech Processing configuration
|
||||
AGENT_SPEECH_PROCESSING_NAME: process.env.MEET_AGENT_SPEECH_PROCESSING_NAME || '',
|
||||
CAPTIONS_ENABLED: process.env.MEET_CAPTIONS || 'true',
|
||||
};
|
||||
|
||||
export function checkModuleEnabled() {
|
||||
|
||||
@ -105,6 +105,20 @@ const MeetE2EEConfigSchema = new Schema(
|
||||
{ _id: false }
|
||||
);
|
||||
|
||||
/**
|
||||
* Mongoose schema for MeetRoom captions configuration.
|
||||
*/
|
||||
const MeetCaptionsConfigSchema = new Schema(
|
||||
{
|
||||
enabled: {
|
||||
type: Boolean,
|
||||
required: true,
|
||||
default: false
|
||||
}
|
||||
},
|
||||
{ _id: false }
|
||||
);
|
||||
|
||||
/**
|
||||
* Sub-schema for room theme configuration.
|
||||
*/
|
||||
@ -181,6 +195,11 @@ const MeetRoomConfigSchema = new Schema(
|
||||
type: MeetE2EEConfigSchema,
|
||||
required: true,
|
||||
default: { enabled: false }
|
||||
},
|
||||
captions: {
|
||||
type: MeetCaptionsConfigSchema,
|
||||
required: true,
|
||||
default: { enabled: false }
|
||||
}
|
||||
},
|
||||
{ _id: false }
|
||||
|
||||
@ -7,6 +7,7 @@ import {
|
||||
MeetRecordingConfig,
|
||||
MeetRecordingLayout,
|
||||
MeetRoomAutoDeletionPolicy,
|
||||
MeetRoomCaptionsConfig,
|
||||
MeetRoomConfig,
|
||||
MeetRoomDeletionPolicyWithMeeting,
|
||||
MeetRoomDeletionPolicyWithRecordings,
|
||||
@ -55,6 +56,10 @@ const E2EEConfigSchema: z.ZodType<MeetE2EEConfig> = z.object({
|
||||
enabled: z.boolean()
|
||||
});
|
||||
|
||||
const CaptionsConfigSchema: z.ZodType<MeetRoomCaptionsConfig> = z.object({
|
||||
enabled: z.boolean()
|
||||
});
|
||||
|
||||
const ThemeModeSchema: z.ZodType<MeetRoomThemeMode> = z.nativeEnum(MeetRoomThemeMode);
|
||||
|
||||
const hexColorSchema = z
|
||||
@ -92,7 +97,8 @@ const UpdateRoomConfigSchema: z.ZodType<Partial<MeetRoomConfig>> = z
|
||||
recording: RecordingConfigSchema.optional(),
|
||||
chat: ChatConfigSchema.optional(),
|
||||
virtualBackground: VirtualBackgroundConfigSchema.optional(),
|
||||
e2ee: E2EEConfigSchema.optional()
|
||||
e2ee: E2EEConfigSchema.optional(),
|
||||
captions: CaptionsConfigSchema.optional()
|
||||
// appearance: AppearanceConfigSchema,
|
||||
})
|
||||
.transform((data: Partial<MeetRoomConfig>) => {
|
||||
@ -123,7 +129,8 @@ const CreateRoomConfigSchema = z
|
||||
})),
|
||||
chat: ChatConfigSchema.optional().default(() => ({ enabled: true })),
|
||||
virtualBackground: VirtualBackgroundConfigSchema.optional().default(() => ({ enabled: true })),
|
||||
e2ee: E2EEConfigSchema.optional().default(() => ({ enabled: false }))
|
||||
e2ee: E2EEConfigSchema.optional().default(() => ({ enabled: false })),
|
||||
captions: CaptionsConfigSchema.optional().default(() => ({ enabled: false }))
|
||||
// appearance: AppearanceConfigSchema,
|
||||
})
|
||||
.transform((data) => {
|
||||
@ -207,7 +214,8 @@ export const RoomOptionsSchema: z.ZodType<MeetRoomOptions> = z.object({
|
||||
},
|
||||
chat: { enabled: true },
|
||||
virtualBackground: { enabled: true },
|
||||
e2ee: { enabled: false }
|
||||
e2ee: { enabled: false },
|
||||
captions: { enabled: false }
|
||||
})
|
||||
// maxParticipants: z
|
||||
// .number()
|
||||
|
||||
@ -163,8 +163,8 @@ export class LivekitWebhookService {
|
||||
* @param participant - Information about the newly joined participant.
|
||||
*/
|
||||
async handleParticipantJoined(room: Room, participant: ParticipantInfo) {
|
||||
// Skip if the participant is an egress participant
|
||||
if (this.livekitService.isEgressParticipant(participant)) return;
|
||||
// Skip if the participant is not a standard participant
|
||||
if (!this.livekitService.isStandardParticipant(participant)) return;
|
||||
|
||||
try {
|
||||
const { recordings } = await this.recordingService.getAllRecordings({ roomId: room.name });
|
||||
@ -185,8 +185,8 @@ export class LivekitWebhookService {
|
||||
* @param participant - Information about the participant who left.
|
||||
*/
|
||||
async handleParticipantLeft(room: Room, participant: ParticipantInfo) {
|
||||
// Skip if the participant is an egress participant
|
||||
if (this.livekitService.isEgressParticipant(participant)) return;
|
||||
// Skip if the participant is not a standard participant
|
||||
if (!this.livekitService.isStandardParticipant(participant)) return;
|
||||
|
||||
try {
|
||||
// Release the participant's reserved name
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import { ParticipantInfo_Kind } from '@livekit/protocol';
|
||||
import { inject, injectable } from 'inversify';
|
||||
import {
|
||||
CreateOptions,
|
||||
@ -400,8 +401,11 @@ export class LiveKitService {
|
||||
}
|
||||
}
|
||||
|
||||
isEgressParticipant(participant: ParticipantInfo): boolean {
|
||||
// TODO: Remove deprecated warning by using ParticipantInfo_Kind: participant.kind === ParticipantInfo_Kind.EGRESS;
|
||||
return participant.identity.startsWith('EG_') && participant.permission?.recorder === true;
|
||||
/**
|
||||
* Checks if a participant is a standard participant (web clients).
|
||||
* @param participant
|
||||
*/
|
||||
isStandardParticipant(participant: ParticipantInfo): boolean {
|
||||
return participant.kind === ParticipantInfo_Kind.STANDARD;
|
||||
}
|
||||
}
|
||||
|
||||
@ -133,9 +133,10 @@ export class RoomMemberService {
|
||||
|
||||
// Get participant permissions (with join meeting)
|
||||
const permissions = await this.getRoomMemberPermissions(roomId, role, true);
|
||||
const withCaptions = room.config.captions.enabled ?? false;
|
||||
|
||||
// Generate token with participant name
|
||||
return this.tokenService.generateRoomMemberToken(role, permissions, participantName, participantIdentity);
|
||||
return this.tokenService.generateRoomMemberToken(role, permissions, participantName, participantIdentity, withCaptions);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -42,7 +42,8 @@ export class TokenService {
|
||||
role: MeetRoomMemberRole,
|
||||
permissions: MeetRoomMemberPermissions,
|
||||
participantName?: string,
|
||||
participantIdentity?: string
|
||||
participantIdentity?: string,
|
||||
roomWithCaptions = false
|
||||
): Promise<string> {
|
||||
const metadata: MeetRoomMemberTokenMetadata = {
|
||||
livekitUrl: MEET_ENV.LIVEKIT_URL,
|
||||
@ -56,23 +57,36 @@ export class TokenService {
|
||||
ttl: INTERNAL_CONFIG.ROOM_MEMBER_TOKEN_EXPIRATION,
|
||||
metadata: JSON.stringify(metadata)
|
||||
};
|
||||
return await this.generateJwtToken(tokenOptions, permissions.livekit as VideoGrant);
|
||||
return await this.generateJwtToken(tokenOptions, permissions.livekit as VideoGrant, roomWithCaptions);
|
||||
}
|
||||
|
||||
private async generateJwtToken(tokenOptions: AccessTokenOptions, grants?: VideoGrant): Promise<string> {
|
||||
private async generateJwtToken(
|
||||
tokenOptions: AccessTokenOptions,
|
||||
grants?: VideoGrant,
|
||||
roomWithCaptions = false
|
||||
): Promise<string> {
|
||||
const at = new AccessToken(MEET_ENV.LIVEKIT_API_KEY, MEET_ENV.LIVEKIT_API_SECRET, tokenOptions);
|
||||
|
||||
if (grants) {
|
||||
at.addGrant(grants);
|
||||
}
|
||||
|
||||
if (MEET_ENV.AGENT_SPEECH_PROCESSING_NAME) {
|
||||
const captionsEnabledInEnv = MEET_ENV.CAPTIONS_ENABLED === 'true';
|
||||
const captionsEnabledInRoom = Boolean(roomWithCaptions);
|
||||
|
||||
this.logger.debug('Adding speech processing agent dispatch to token', MEET_ENV.AGENT_SPEECH_PROCESSING_NAME);
|
||||
// Warn if configuration is inconsistent
|
||||
if (!captionsEnabledInEnv && captionsEnabledInRoom) {
|
||||
this.logger.warn(
|
||||
`Captions feature is disabled in environment but Room is created with captions enabled. Please enable captions in environment by setting MEET_CAPTIONS_ENABLED=true to ensure proper functionality.`
|
||||
);
|
||||
}
|
||||
|
||||
if (captionsEnabledInEnv && captionsEnabledInRoom) {
|
||||
this.logger.debug('Activating Captions Agent. Configuring Room Agent Dispatch.');
|
||||
at.roomConfig = new RoomConfiguration({
|
||||
agents: [
|
||||
new RoomAgentDispatch({
|
||||
agentName: MEET_ENV.AGENT_SPEECH_PROCESSING_NAME
|
||||
agentName: INTERNAL_CONFIG.CAPTIONS_AGENT_NAME
|
||||
})
|
||||
]
|
||||
});
|
||||
|
||||
@ -4,56 +4,60 @@ import { MeetRecordingLayout } from './recording.model';
|
||||
* Interface representing the config for a room.
|
||||
*/
|
||||
export interface MeetRoomConfig {
|
||||
chat: MeetChatConfig;
|
||||
recording: MeetRecordingConfig;
|
||||
virtualBackground: MeetVirtualBackgroundConfig;
|
||||
e2ee: MeetE2EEConfig;
|
||||
// appearance: MeetAppearanceConfig;
|
||||
chat: MeetChatConfig;
|
||||
recording: MeetRecordingConfig;
|
||||
virtualBackground: MeetVirtualBackgroundConfig;
|
||||
e2ee: MeetE2EEConfig;
|
||||
captions: MeetRoomCaptionsConfig;
|
||||
// appearance: MeetAppearanceConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface representing the config for recordings in a room.
|
||||
*/
|
||||
export interface MeetRecordingConfig {
|
||||
enabled: boolean;
|
||||
layout?: MeetRecordingLayout;
|
||||
allowAccessTo?: MeetRecordingAccess;
|
||||
enabled: boolean;
|
||||
layout?: MeetRecordingLayout;
|
||||
allowAccessTo?: MeetRecordingAccess;
|
||||
}
|
||||
|
||||
export enum MeetRecordingAccess {
|
||||
ADMIN = 'admin', // Only admins can access the recording
|
||||
ADMIN_MODERATOR = 'admin_moderator', // Admins and moderators can access
|
||||
ADMIN_MODERATOR_SPEAKER = 'admin_moderator_speaker' // Admins, moderators and speakers can access
|
||||
ADMIN = 'admin', // Only admins can access the recording
|
||||
ADMIN_MODERATOR = 'admin_moderator', // Admins and moderators can access
|
||||
ADMIN_MODERATOR_SPEAKER = 'admin_moderator_speaker', // Admins, moderators and speakers can access
|
||||
}
|
||||
|
||||
export interface MeetChatConfig {
|
||||
enabled: boolean;
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
export interface MeetVirtualBackgroundConfig {
|
||||
enabled: boolean;
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
export interface MeetE2EEConfig {
|
||||
enabled: boolean;
|
||||
enabled: boolean;
|
||||
}
|
||||
export interface MeetRoomCaptionsConfig {
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
export interface MeetAppearanceConfig {
|
||||
themes: MeetRoomTheme[];
|
||||
themes: MeetRoomTheme[];
|
||||
}
|
||||
|
||||
export interface MeetRoomTheme {
|
||||
name: string;
|
||||
enabled: boolean;
|
||||
baseTheme: MeetRoomThemeMode;
|
||||
backgroundColor?: string;
|
||||
primaryColor?: string;
|
||||
secondaryColor?: string;
|
||||
accentColor?: string;
|
||||
surfaceColor?: string;
|
||||
name: string;
|
||||
enabled: boolean;
|
||||
baseTheme: MeetRoomThemeMode;
|
||||
backgroundColor?: string;
|
||||
primaryColor?: string;
|
||||
secondaryColor?: string;
|
||||
accentColor?: string;
|
||||
surfaceColor?: string;
|
||||
}
|
||||
|
||||
export enum MeetRoomThemeMode {
|
||||
LIGHT = 'light',
|
||||
DARK = 'dark'
|
||||
LIGHT = 'light',
|
||||
DARK = 'dark',
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user