Customized meet for multi-user translation agent

2025-07-28 00:44:33 +01:00 · 2025-07-28 00:44:33 +01:00 · 8ca5bc9f40
commit 8ca5bc9f40
parent 762f1c4a6e
11 changed files with 1033 additions and 13 deletions
--- a/app/api/connection-details/route.ts
+++ b/app/api/connection-details/route.ts
@ -2,6 +2,7 @@ import { randomString } from '@/lib/client-utils';
 import { getLiveKitURL } from '@/lib/getLiveKitURL';
 import { ConnectionDetails } from '@/lib/types';
 import { AccessToken, AccessTokenOptions, VideoGrant } from 'livekit-server-sdk';
+import { RoomAgentDispatch, RoomConfiguration } from '@livekit/protocol';
 import { NextRequest, NextResponse } from 'next/server';

 const API_KEY = process.env.LIVEKIT_API_KEY;
@ -17,9 +18,11 @@ export async function GET(request: NextRequest) {
    const participantName = request.nextUrl.searchParams.get('participantName');
    const metadata = request.nextUrl.searchParams.get('metadata') ?? '';
    const region = request.nextUrl.searchParams.get('region');
+    const language = request.nextUrl.searchParams.get('language') ?? 'en';
    if (!LIVEKIT_URL) {
      throw new Error('LIVEKIT_URL is not defined');
    }
+
    const livekitServerUrl = region ? getLiveKitURL(LIVEKIT_URL, region) : LIVEKIT_URL;
    let randomParticipantPostfix = request.cookies.get(COOKIE_KEY)?.value;
    if (livekitServerUrl === undefined) {
@ -33,7 +36,6 @@ export async function GET(request: NextRequest) {
      return new NextResponse('Missing required query parameter: participantName', { status: 400 });
    }

-    // Generate participant token
    if (!randomParticipantPostfix) {
      randomParticipantPostfix = randomString(4);
    }
@ -42,10 +44,15 @@ export async function GET(request: NextRequest) {
        identity: `${participantName}__${randomParticipantPostfix}`,
        name: participantName,
        metadata,
+        attributes: {
+          language,
+        }
      },
      roomName,
    );

+    console.info("token:", participantToken);
+
    // Return connection details
    const data: ConnectionDetails = {
      serverUrl: livekitServerUrl,
@ -75,8 +82,14 @@ function createParticipantToken(userInfo: AccessTokenOptions, roomName: string)
    canPublish: true,
    canPublishData: true,
    canSubscribe: true,
+    canUpdateOwnMetadata: true,
  };
  at.addGrant(grant);
+  at.roomConfig = new RoomConfiguration({
+    agents: [new RoomAgentDispatch({
+      agentName: "translator",
+    })],
+  })
  return at.toJwt();
 }

--- a/app/page.tsx
+++ b/app/page.tsx
@ -44,6 +44,8 @@ function Tabs(props: React.PropsWithChildren<{}>) {
 function DemoMeetingTab(props: { label: string }) {
  const router = useRouter();
  const [e2ee, setE2ee] = useState(false);
+  // TODO(dz): we need to set this to the default language of the browser
+  const [language, setLanguage] = useState("en")
  const [sharedPassphrase, setSharedPassphrase] = useState(randomString(64));
  const startMeeting = () => {
    if (e2ee) {
@ -60,6 +62,12 @@ function DemoMeetingTab(props: { label: string }) {
      </button>
      <div style={{ display: 'flex', flexDirection: 'column', gap: '1rem' }}>
        <div style={{ display: 'flex', flexDirection: 'row', gap: '1rem' }}>
+          <select
+            id="language"
+            onChange={(ev) => setLanguage(ev.target.value)}
+          >
+
+          </select>
          <input
            id="use-e2ee"
            type="checkbox"
--- a/app/rooms/[roomName]/PageClientImpl.tsx
+++ b/app/rooms/[roomName]/PageClientImpl.tsx
@ -6,13 +6,12 @@ import { DebugMode } from '@/lib/Debug';
 import { KeyboardShortcuts } from '@/lib/KeyboardShortcuts';
 import { RecordingIndicator } from '@/lib/RecordingIndicator';
 import { SettingsMenu } from '@/lib/SettingsMenu';
-import { ConnectionDetails } from '@/lib/types';
+import { ConnectionDetails, LocalUserChoices } from '@/lib/types';
+import { VideoConference } from './VideoConference';
+import { PreJoin } from './PreJoin';
 import {
  formatChatMessageLinks,
-  LocalUserChoices,
-  PreJoin,
  RoomContext,
-  VideoConference,
 } from '@livekit/components-react';
 import {
  ExternalE2EEKeyProvider,
@ -43,6 +42,7 @@ export function PageClientImpl(props: {
  const [preJoinChoices, setPreJoinChoices] = React.useState<LocalUserChoices | undefined>(
    undefined,
  );
+  
  const preJoinDefaults = React.useMemo(() => {
    return {
      username: '',
@ -50,6 +50,7 @@ export function PageClientImpl(props: {
      audioEnabled: true,
    };
  }, []);
+  
  const [connectionDetails, setConnectionDetails] = React.useState<ConnectionDetails | undefined>(
    undefined,
  );
@ -59,6 +60,9 @@ export function PageClientImpl(props: {
    const url = new URL(CONN_DETAILS_ENDPOINT, window.location.origin);
    url.searchParams.append('roomName', props.roomName);
    url.searchParams.append('participantName', values.username);
+    if (values.language) {
+      url.searchParams.append('language', values.language);
+    }
    if (props.region) {
      url.searchParams.append('region', props.region);
    }
--- a/app/rooms/[roomName]/PreJoin.tsx
+++ b/app/rooms/[roomName]/PreJoin.tsx
@ -0,0 +1,542 @@
+import type {
+    CreateLocalTracksOptions,
+    LocalAudioTrack,
+    LocalTrack,
+    LocalVideoTrack,
+    TrackProcessor,
+  } from 'livekit-client';
+  import {
+    createLocalAudioTrack,
+    createLocalTracks,
+    createLocalVideoTrack,
+    facingModeFromLocalTrack,
+    Track,
+    VideoPresets,
+    Mutex,
+  } from 'livekit-client';
+  import * as React from 'react';
+  import { MediaDeviceMenu, ParticipantPlaceholder } from '@livekit/components-react';
+  import { TrackToggle } from '@livekit/components-react';
+  import { log } from '@livekit/components-core';
+  import { useMediaDevices, usePersistentUserChoices } from '@livekit/components-react/hooks';
+  import { LocalUserChoices } from '@/lib/types';
+  
+  /**
+   * Props for the PreJoin component.
+   * @public
+   */
+  export interface PreJoinProps
+    extends Omit<React.HTMLAttributes<HTMLDivElement>, 'onSubmit' | 'onError'> {
+    /** This function is called with the `LocalUserChoices` if validation is passed. */
+    onSubmit?: (values: LocalUserChoices) => void;
+    /**
+     * Provide your custom validation function. Only if validation is successful the user choices are past to the onSubmit callback.
+     */
+    onValidate?: (values: LocalUserChoices) => boolean;
+    onError?: (error: Error) => void;
+    /** Prefill the input form with initial values. */
+    defaults?: Partial<LocalUserChoices>;
+    /** Display a debug window for your convenience. */
+    debug?: boolean;
+    joinLabel?: string;
+    micLabel?: string;
+    camLabel?: string;
+    userLabel?: string;
+    languageLabel?: string;
+    /**
+     * If true, user choices are persisted across sessions.
+     * @defaultValue true
+     * @alpha
+     */
+    persistUserChoices?: boolean;
+    videoProcessor?: TrackProcessor<Track.Kind.Video>;
+  }
+  
+  /** @public */
+  export function usePreviewTracks(
+    options: CreateLocalTracksOptions,
+    onError?: (err: Error) => void,
+  ) {
+    const [tracks, setTracks] = React.useState<LocalTrack[]>();
+  
+    const trackLock = React.useMemo(() => new Mutex(), []);
+  
+    React.useEffect(() => {
+      let needsCleanup = false;
+      let localTracks: Array<LocalTrack> = [];
+      trackLock.lock().then(async (unlock) => {
+        try {
+          if (options.audio || options.video) {
+            localTracks = await createLocalTracks(options);
+  
+            if (needsCleanup) {
+              localTracks.forEach((tr) => tr.stop());
+            } else {
+              setTracks(localTracks);
+            }
+          }
+        } catch (e: unknown) {
+          if (onError && e instanceof Error) {
+            onError(e);
+          } else {
+            log.error(e);
+          }
+        } finally {
+          unlock();
+        }
+      });
+  
+      return () => {
+        needsCleanup = true;
+        localTracks.forEach((track) => {
+          track.stop();
+        });
+      };
+    }, [JSON.stringify(options, roomOptionsStringifyReplacer), onError, trackLock]);
+  
+    return tracks;
+  }
+  
+  /**
+   * @public
+   * @deprecated use `usePreviewTracks` instead
+   */
+  export function usePreviewDevice<T extends LocalVideoTrack | LocalAudioTrack>(
+    enabled: boolean,
+    deviceId: string,
+    kind: 'videoinput' | 'audioinput',
+  ) {
+    const [deviceError, setDeviceError] = React.useState<Error | null>(null);
+    const [isCreatingTrack, setIsCreatingTrack] = React.useState<boolean>(false);
+  
+    const devices = useMediaDevices({ kind });
+    const [selectedDevice, setSelectedDevice] = React.useState<MediaDeviceInfo | undefined>(
+      undefined,
+    );
+  
+    const [localTrack, setLocalTrack] = React.useState<T>();
+    const [localDeviceId, setLocalDeviceId] = React.useState<string>(deviceId);
+  
+    React.useEffect(() => {
+      setLocalDeviceId(deviceId);
+    }, [deviceId]);
+  
+    const createTrack = async (deviceId: string, kind: 'videoinput' | 'audioinput') => {
+      try {
+        const track =
+          kind === 'videoinput'
+            ? await createLocalVideoTrack({
+                deviceId,
+                resolution: VideoPresets.h720.resolution,
+              })
+            : await createLocalAudioTrack({ deviceId });
+  
+        const newDeviceId = await track.getDeviceId(false);
+        if (newDeviceId && deviceId !== newDeviceId) {
+          prevDeviceId.current = newDeviceId;
+          setLocalDeviceId(newDeviceId);
+        }
+        setLocalTrack(track as T);
+      } catch (e) {
+        if (e instanceof Error) {
+          setDeviceError(e);
+        }
+      }
+    };
+  
+    const switchDevice = async (track: LocalVideoTrack | LocalAudioTrack, id: string) => {
+      await track.setDeviceId(id);
+      prevDeviceId.current = id;
+    };
+  
+    const prevDeviceId = React.useRef(localDeviceId);
+  
+    React.useEffect(() => {
+      if (enabled && !localTrack && !deviceError && !isCreatingTrack) {
+        log.debug('creating track', kind);
+        setIsCreatingTrack(true);
+        createTrack(localDeviceId, kind).finally(() => {
+          setIsCreatingTrack(false);
+        });
+      }
+    }, [enabled, localTrack, deviceError, isCreatingTrack]);
+  
+    // switch camera device
+    React.useEffect(() => {
+      if (!localTrack) {
+        return;
+      }
+      if (!enabled) {
+        log.debug(`muting ${kind} track`);
+        localTrack.mute().then(() => log.debug(localTrack.mediaStreamTrack));
+      } else if (selectedDevice?.deviceId && prevDeviceId.current !== selectedDevice?.deviceId) {
+        log.debug(`switching ${kind} device from`, prevDeviceId.current, selectedDevice.deviceId);
+        switchDevice(localTrack, selectedDevice.deviceId);
+      } else {
+        log.debug(`unmuting local ${kind} track`);
+        localTrack.unmute();
+      }
+    }, [localTrack, selectedDevice, enabled, kind]);
+  
+    React.useEffect(() => {
+      return () => {
+        if (localTrack) {
+          log.debug(`stopping local ${kind} track`);
+          localTrack.stop();
+          localTrack.mute();
+        }
+      };
+    }, []);
+  
+    React.useEffect(() => {
+      setSelectedDevice(devices?.find((dev) => dev.deviceId === localDeviceId));
+    }, [localDeviceId, devices]);
+  
+    return {
+      selectedDevice,
+      localTrack,
+      deviceError,
+    };
+  }
+  
+  /**
+   * The `PreJoin` prefab component is normally presented to the user before he enters a room.
+   * This component allows the user to check and select the preferred media device (camera und microphone).
+   * On submit the user decisions are returned, which can then be passed on to the `LiveKitRoom` so that the user enters the room with the correct media devices.
+   *
+   * @remarks
+   * This component is independent of the `LiveKitRoom` component and should not be nested within it.
+   * Because it only accesses the local media tracks this component is self-contained and works without connection to the LiveKit server.
+   *
+   * @example
+   * ```tsx
+   * <PreJoin />
+   * ```
+   * @public
+   */
+  export function PreJoin({
+    defaults = {},
+    onValidate,
+    onSubmit,
+    onError,
+    debug,
+    joinLabel = 'Join Room',
+    micLabel = 'Microphone',
+    camLabel = 'Camera',
+    userLabel = 'Username',
+    languageLabel = 'Language',
+    persistUserChoices = true,
+    videoProcessor,
+    ...htmlProps
+  }: PreJoinProps) {
+    const [browserLanguage, setBrowserLanguage] = React.useState<string>('en');
+    
+    React.useEffect(() => {
+      setBrowserLanguage(getBrowserLanguage());
+    }, []);
+    
+    const {
+      userChoices: initialUserChoices,
+      saveAudioInputDeviceId,
+      saveAudioInputEnabled,
+      saveVideoInputDeviceId,
+      saveVideoInputEnabled,
+      saveUsername,
+    } = usePersistentUserChoices({
+      defaults,
+      preventSave: !persistUserChoices,
+      preventLoad: !persistUserChoices,
+    });
+  
+    // Cast initialUserChoices to our extended LocalUserChoices type
+    const extendedInitialChoices = initialUserChoices as unknown as LocalUserChoices;
+    
+    const [userChoices, setUserChoices] = React.useState({
+      ...initialUserChoices,
+      language: extendedInitialChoices.language || browserLanguage,
+    });
+  
+    // Initialize device settings
+    const [audioEnabled, setAudioEnabled] = React.useState<boolean>(userChoices.audioEnabled);
+    const [videoEnabled, setVideoEnabled] = React.useState<boolean>(userChoices.videoEnabled);
+    const [audioDeviceId, setAudioDeviceId] = React.useState<string>(userChoices.audioDeviceId);
+    const [videoDeviceId, setVideoDeviceId] = React.useState<string>(userChoices.videoDeviceId);
+    const [username, setUsername] = React.useState(userChoices.username);
+    const [language, setLanguage] = React.useState(userChoices.language || browserLanguage);
+
+    // use browser defaults if we can discover it
+    React.useEffect(() => {
+      if (browserLanguage && !extendedInitialChoices.language) {
+        setLanguage(browserLanguage);
+      }
+    }, [browserLanguage, extendedInitialChoices.language]);
+  
+    // Save user choices to persistent storage.
+    React.useEffect(() => {
+      saveAudioInputEnabled(audioEnabled);
+    }, [audioEnabled, saveAudioInputEnabled]);
+    React.useEffect(() => {
+      saveVideoInputEnabled(videoEnabled);
+    }, [videoEnabled, saveVideoInputEnabled]);
+    React.useEffect(() => {
+      saveAudioInputDeviceId(audioDeviceId);
+    }, [audioDeviceId, saveAudioInputDeviceId]);
+    React.useEffect(() => {
+      saveVideoInputDeviceId(videoDeviceId);
+    }, [videoDeviceId, saveVideoInputDeviceId]);
+    React.useEffect(() => {
+      saveUsername(username);
+    }, [username, saveUsername]);
+    
+    // Save language preference to local storage
+    React.useEffect(() => {
+      if (persistUserChoices) {
+        try {
+          localStorage.setItem('lk-user-language', language);
+        } catch (e) {
+          console.warn('Failed to save language preference to local storage', e);
+        }
+      }
+    }, [language, persistUserChoices]);
+  
+    const tracks = usePreviewTracks(
+      {
+        audio: audioEnabled ? { deviceId: initialUserChoices.audioDeviceId } : false,
+        video: videoEnabled
+          ? { deviceId: initialUserChoices.videoDeviceId, processor: videoProcessor }
+          : false,
+      },
+      onError,
+    );
+  
+    const videoEl = React.useRef(null);
+  
+    const videoTrack = React.useMemo(
+      () => tracks?.filter((track) => track.kind === Track.Kind.Video)[0] as LocalVideoTrack,
+      [tracks],
+    );
+  
+    const facingMode = React.useMemo(() => {
+      if (videoTrack) {
+        const { facingMode } = facingModeFromLocalTrack(videoTrack);
+        return facingMode;
+      } else {
+        return 'undefined';
+      }
+    }, [videoTrack]);
+  
+    const audioTrack = React.useMemo(
+      () => tracks?.filter((track) => track.kind === Track.Kind.Audio)[0] as LocalAudioTrack,
+      [tracks],
+    );
+  
+    React.useEffect(() => {
+      if (videoEl.current && videoTrack) {
+        videoTrack.unmute();
+        videoTrack.attach(videoEl.current);
+      }
+  
+      return () => {
+        videoTrack?.detach();
+      };
+    }, [videoTrack]);
+  
+    const [isValid, setIsValid] = React.useState<boolean>();
+  
+    const handleValidation = React.useCallback(
+      (values: LocalUserChoices) => {
+        if (typeof onValidate === 'function') {
+          return onValidate(values);
+        } else {
+          return values.username !== '';
+        }
+      },
+      [onValidate],
+    );
+  
+    React.useEffect(() => {
+      const newUserChoices = {
+        username,
+        videoEnabled,
+        videoDeviceId,
+        audioEnabled,
+        audioDeviceId,
+        language,
+      };
+      setUserChoices(newUserChoices);
+      setIsValid(handleValidation(newUserChoices));
+    }, [username, videoEnabled, handleValidation, audioEnabled, audioDeviceId, videoDeviceId, language]);
+  
+    function handleSubmit(event: React.FormEvent) {
+      event.preventDefault();
+      if (handleValidation(userChoices)) {
+        if (typeof onSubmit === 'function') {
+          onSubmit(userChoices);
+        }
+      } else {
+        log.warn('Validation failed with: ', userChoices);
+      }
+    }
+    
+    return (
+      <div className="lk-prejoin" {...htmlProps}>
+        <div className="lk-video-container">
+          {videoTrack && (
+            <video ref={videoEl} width="1280" height="720" data-lk-facing-mode={facingMode} />
+          )}
+          {(!videoTrack || !videoEnabled) && (
+            <div className="lk-camera-off-note">
+              <ParticipantPlaceholder />
+            </div>
+          )}
+        </div>
+        <div className="lk-button-group-container">
+          <div className="lk-button-group audio">
+            <TrackToggle
+              initialState={audioEnabled}
+              source={Track.Source.Microphone}
+              onChange={(enabled) => setAudioEnabled(enabled)}
+            >
+              {micLabel}
+            </TrackToggle>
+            <div className="lk-button-group-menu">
+              <MediaDeviceMenu
+                initialSelection={audioDeviceId}
+                kind="audioinput"
+                disabled={!audioTrack}
+                tracks={{ audioinput: audioTrack }}
+                onActiveDeviceChange={(_, id) => setAudioDeviceId(id)}
+              />
+            </div>
+          </div>
+          <div className="lk-button-group video">
+            <TrackToggle
+              initialState={videoEnabled}
+              source={Track.Source.Camera}
+              onChange={(enabled) => setVideoEnabled(enabled)}
+            >
+              {camLabel}
+            </TrackToggle>
+            <div className="lk-button-group-menu">
+              <MediaDeviceMenu
+                initialSelection={videoDeviceId}
+                kind="videoinput"
+                disabled={!videoTrack}
+                tracks={{ videoinput: videoTrack }}
+                onActiveDeviceChange={(_, id) => setVideoDeviceId(id)}
+              />
+            </div>
+          </div>
+        </div>
+  
+        <form className="lk-username-container">
+          <input
+            className="lk-form-control"
+            id="username"
+            name="username"
+            type="text"
+            defaultValue={username}
+            placeholder={userLabel}
+            onChange={(inputEl) => setUsername(inputEl.target.value)}
+            autoComplete="off"
+          />
+          <div className="lk-form-control-wrapper">
+            <label htmlFor="language" className="lk-form-label">
+              {languageLabel}
+            </label>
+            <select
+              className="lk-form-control"
+              id="language"
+              name="language"
+              value={language}
+              onChange={(e) => setLanguage(e.target.value)}
+            >
+              {availableLanguages.map((lang) => (
+                <option key={lang.code} value={lang.code}>
+                  {lang.name}
+                </option>
+              ))}
+            </select>
+          </div>
+          <button
+            className="lk-button lk-join-button"
+            type="submit"
+            onClick={handleSubmit}
+            disabled={!isValid}
+          >
+            {joinLabel}
+          </button>
+        </form>
+  
+        {debug && (
+          <>
+            <strong>User Choices:</strong>
+            <ul className="lk-list" style={{ overflow: 'hidden', maxWidth: '15rem' }}>
+              <li>Username: {`${userChoices.username}`}</li>
+              <li>Video Enabled: {`${userChoices.videoEnabled}`}</li>
+              <li>Audio Enabled: {`${userChoices.audioEnabled}`}</li>
+              <li>Video Device: {`${userChoices.videoDeviceId}`}</li>
+              <li>Audio Device: {`${userChoices.audioDeviceId}`}</li>
+              <li>Language: {`${userChoices.language}`}</li>
+            </ul>
+          </>
+        )}
+      </div>
+    );
+  }
+  
+
+// copied because it's not exported
+function roomOptionsStringifyReplacer(key: string, val: unknown) {
+    if (key === 'processor' && val && typeof val === 'object' && 'name' in val) {
+        return val.name;
+    }
+    if (key === 'e2ee' && val) {
+        return 'e2ee-enabled';
+    }
+    return val;
+}
+  
+
+/**
+ * Get the user's preferred language as a two-character code
+ * First checks local storage for a saved preference,
+ * then falls back to the browser's language,
+ * and finally defaults to 'en' if neither is available or supported
+ */
+export function getBrowserLanguage(): string {
+    if (typeof window === 'undefined') {
+      return 'en'; // Default for server-side rendering
+    }
+    
+    // First check if there's a saved preference
+    try {
+      const savedLanguage = localStorage.getItem('lk-user-language');
+      if (savedLanguage) {
+        const isSupported = availableLanguages.some(lang => lang.code === savedLanguage);
+        if (isSupported) {
+          return savedLanguage;
+        }
+      }
+    } catch (e) {
+      console.warn('Failed to read language preference from local storage', e);
+    }
+    
+    // Fall back to browser language
+    const browserLang = navigator.language.substring(0, 2).toLowerCase();
+    
+    // Check if the browser language is in our supported languages
+    const isSupported = availableLanguages.some(lang => lang.code === browserLang);
+    
+    return isSupported ? browserLang : 'en';
+  }
+
+  export const availableLanguages = [
+    { code: 'en', name: 'English' },
+    { code: 'es', name: 'Español' },
+    { code: 'fr', name: 'Français' },
+    { code: 'de', name: 'Deutsch' },
+    { code: 'ja', name: 'Japanese' },
+    { code: 'zh', name: 'Chinese' },
+  ];
+  
--- a/app/rooms/[roomName]/RoomAudioRenderer.tsx
+++ b/app/rooms/[roomName]/RoomAudioRenderer.tsx
@ -0,0 +1,60 @@
+import { getTrackReferenceId } from '@livekit/components-core';
+import { Track, ParticipantKind } from 'livekit-client';
+import * as React from 'react';
+import { useLocalParticipant, useTracks } from '@livekit/components-react/hooks';
+import { AudioTrack, TrackReference } from '@livekit/components-react';
+
+
+export function RoomAudioRenderer() {
+  const tracks = useTracks(
+    [Track.Source.Microphone, Track.Source.ScreenShareAudio, Track.Source.Unknown],
+    {
+      updateOnlyOn: [],
+      onlySubscribed: true,
+    },
+  ).filter((ref) => !ref.participant.isLocal && ref.publication.kind === Track.Kind.Audio);
+  const {localParticipant} = useLocalParticipant();
+  const currentLanguage = localParticipant?.attributes?.language;
+
+  // we don't have a language set so we don't know how to handle the multiple audio tracks
+  // this should not happen
+  if (!currentLanguage) {
+    return null;
+  }
+
+
+  const matchingTracks: TrackReference[] = [];
+  const originalTracks: TrackReference[] = [];
+
+  for (const track of tracks) {
+    if (track.participant.attributes?.language === currentLanguage ||
+        (track.participant.kind === ParticipantKind.AGENT && track.publication.trackName.endsWith(`-${currentLanguage}`))
+    ) {
+        matchingTracks.push(track);
+    } else if (track.participant.kind !== ParticipantKind.AGENT) {
+        originalTracks.push(track);
+    }
+  }
+
+
+  return (
+    <div style={{ display: 'none' }}>
+      {matchingTracks.map((trackRef) => (
+        <AudioTrack
+          key={getTrackReferenceId(trackRef)}
+          trackRef={trackRef}
+          volume={1.0}
+          muted={false}
+        />
+      ))}
+      {originalTracks.map((trackRef) => (
+        <AudioTrack
+          key={getTrackReferenceId(trackRef)}
+          trackRef={trackRef}
+          volume={0.4}
+          muted={false}
+        />
+      ))}
+    </div>
+  );
+}
--- a/app/rooms/[roomName]/TranscriptDisplay.tsx
+++ b/app/rooms/[roomName]/TranscriptDisplay.tsx
@ -0,0 +1,179 @@
+import * as React from 'react';
+import { useEnsureRoom, useLocalParticipant } from '@livekit/components-react';
+
+export interface Transcript {
+  id: string;
+  text: string;
+  isTranslation: boolean;
+  participantId?: string;
+  timestamp: number;
+  complete?: boolean;
+}
+
+export interface TranscriptDisplayProps {
+}
+
+/**
+ * TranscriptDisplay component shows captions of what users are saying
+ * It displays up to two different transcripts (original and translation)
+ * and removes them after 5 seconds of no changes or when new transcripts arrive
+ */
+export function TranscriptDisplay() {
+  const [visibleTranscripts, setVisibleTranscripts] = React.useState<Transcript[]>([]);
+  const timeoutRef = React.useRef<NodeJS.Timeout | null>(null);
+  const transcriptsRef = React.useRef<Record<string, Transcript>>({});
+  
+  const room = useEnsureRoom();
+  const {localParticipant} = useLocalParticipant();
+
+  const currentLanguage = localParticipant?.attributes?.language;
+
+  const updateTranscriptState = React.useCallback(() => {
+    const allTranscripts = Object.values(transcriptsRef.current);
+    
+    // Sort by timestamp (newest first) and take the most recent 2
+    // One original and one translation if available
+    const sortedTranscripts = allTranscripts
+      .sort((a, b) => b.timestamp - a.timestamp);
+    
+    // Find the most recent original transcript
+    const originalTranscript = sortedTranscripts.find(t => !t.isTranslation);
+    // Find the most recent translation transcript
+    const translationTranscript = sortedTranscripts.find(t => t.isTranslation);
+    
+    // Combine them into the visible transcripts array
+    const newVisibleTranscripts: Transcript[] = [];
+    if (originalTranscript) newVisibleTranscripts.push(originalTranscript);
+    if (translationTranscript) newVisibleTranscripts.push(translationTranscript);
+    
+    setVisibleTranscripts(newVisibleTranscripts);
+    
+    // Reset the timeout
+    if (timeoutRef.current) {
+      clearTimeout(timeoutRef.current);
+    }
+    
+    // Set timeout to clear transcripts after 5 seconds
+    timeoutRef.current = setTimeout(() => {
+      setVisibleTranscripts([]);
+      // Also clear the transcripts reference
+      transcriptsRef.current = {};
+    }, 5000);
+  }, []);
+
+  React.useEffect(() => {
+    if (room) {
+      room.registerTextStreamHandler('lk.transcription', async (reader, participantInfo) => {
+        const info = reader.info;
+        const isTranslation = info.attributes?.translated === "true";
+
+        // ignore translations for other languages
+        // if (isTranslation && info.attributes?.language !== currentLanguage) {
+        //   return;
+        // }
+
+        const id = info.id;
+        const participantId = participantInfo?.identity;
+        const isFinal = info.attributes?.["lk.transcription_final"] === "true";
+        console.log("transcript", id, isFinal);
+        
+        // Create or update the transcript in our reference object
+        if (!transcriptsRef.current[id]) {
+          transcriptsRef.current[id] = {
+            id,
+            text: '',
+            isTranslation,
+            participantId,
+            timestamp: Date.now(),
+          };
+        }
+
+        try {
+          for await (const chunk of reader) {
+            // Update the transcript with the new chunk
+            if (chunk) {
+              const transcript = transcriptsRef.current[id];
+              transcript.text += chunk;
+              transcript.timestamp = Date.now();
+              transcript.complete = isFinal;
+              
+              updateTranscriptState();
+            }
+          }
+          
+          if (transcriptsRef.current[id]) {
+            transcriptsRef.current[id].complete = true;
+            updateTranscriptState();
+          }
+        } catch (e) {
+          console.error('Error processing transcript stream:', e);
+        }
+      });
+      
+      return () => {
+        room.unregisterTextStreamHandler('lk.transcription');
+        if (timeoutRef.current) {
+          clearTimeout(timeoutRef.current);
+        }
+      };
+    }
+  }, [room, currentLanguage, updateTranscriptState]);
+
+  React.useEffect(() => {
+    return () => {
+      if (timeoutRef.current) {
+        clearTimeout(timeoutRef.current);
+      }
+    };
+  }, []);
+
+  if (!currentLanguage) {
+    return null;
+  }
+
+  if (visibleTranscripts.length === 0) {
+    return null;
+  }
+
+  return (
+    <div className="lk-transcript-container">
+      {visibleTranscripts.map((transcript) => (
+        <div 
+          key={transcript.id} 
+          className={`lk-transcript ${transcript.isTranslation ? 'lk-transcript-translation' : 'lk-transcript-original'}`}
+        >
+          {transcript.text}
+        </div>
+      ))}
+      <style jsx>{`
+        .lk-transcript-container {
+          position: absolute;
+          bottom: 80px;
+          left: 20%;
+          right: 20%;
+          display: flex;
+          flex-direction: column;
+          align-items: center;
+          z-index: 10;
+        }
+        
+        .lk-transcript {
+          background-color: rgba(0, 0, 0, 0.7);
+          color: white;
+          padding: 8px 16px;
+          margin-bottom: 8px;
+          border-radius: 4px;
+          max-width: 100%;
+          text-align: center;
+          font-size: 1rem;
+          line-height: 1.5;
+        }
+        
+        .lk-transcript-translation {
+          font-style: italic;
+          background-color: rgba(0, 0, 0, 0.6);
+        }
+      `}</style>
+    </div>
+  );
+} 
--- a/app/rooms/[roomName]/VideoConference.tsx
+++ b/app/rooms/[roomName]/VideoConference.tsx
@ -0,0 +1,176 @@
+import * as React from 'react';
+import type {
+  MessageDecoder,
+  MessageEncoder,
+  TrackReferenceOrPlaceholder,
+  WidgetState,
+} from '@livekit/components-core';
+import { isEqualTrackRef, isTrackReference, isWeb, log } from '@livekit/components-core';
+import { ParticipantKind, RoomEvent, Track } from 'livekit-client';
+import { RoomAudioRenderer } from './RoomAudioRenderer';
+import { TranscriptDisplay } from './TranscriptDisplay';
+
+
+import {
+  CarouselLayout,
+  ConnectionStateToast,
+  FocusLayout,
+  FocusLayoutContainer,
+  GridLayout,
+  LayoutContextProvider,
+  ParticipantTile,
+  useCreateLayoutContext,
+  Chat,
+  ControlBar,
+  MessageFormatter,
+} from '@livekit/components-react';
+import { usePinnedTracks, useTracks } from '@livekit/components-react/hooks';
+
+/**
+ * @public
+ */
+export interface VideoConferenceProps extends React.HTMLAttributes<HTMLDivElement> {
+  chatMessageFormatter?: MessageFormatter;
+  chatMessageEncoder?: MessageEncoder;
+  chatMessageDecoder?: MessageDecoder;
+  /** @alpha */
+  SettingsComponent?: React.ComponentType;
+}
+
+/**
+ * The `VideoConference` ready-made component is your drop-in solution for a classic video conferencing application.
+ * It provides functionality such as focusing on one participant, grid view with pagination to handle large numbers
+ * of participants, basic non-persistent chat, screen sharing, and more.
+ *
+ * @remarks
+ * The component is implemented with other LiveKit components like `FocusContextProvider`,
+ * `GridLayout`, `ControlBar`, `FocusLayoutContainer` and `FocusLayout`.
+ * You can use these components as a starting point for your own custom video conferencing application.
+ *
+ * @example
+ * ```tsx
+ * <LiveKitRoom>
+ *   <VideoConference />
+ * <LiveKitRoom>
+ * ```
+ * @public
+ */
+export function VideoConference({
+  chatMessageFormatter,
+  chatMessageDecoder,
+  chatMessageEncoder,
+  SettingsComponent,
+  ...props
+}: VideoConferenceProps) {
+  const [widgetState, setWidgetState] = React.useState<WidgetState>({
+    showChat: false,
+    unreadMessages: 0,
+    showSettings: false,
+  });
+  const lastAutoFocusedScreenShareTrack = React.useRef<TrackReferenceOrPlaceholder | null>(null);
+
+  let tracks = useTracks(
+    [
+      { source: Track.Source.Camera, withPlaceholder: true },
+      { source: Track.Source.ScreenShare, withPlaceholder: false },
+    ],
+    { updateOnlyOn: [RoomEvent.ActiveSpeakersChanged], onlySubscribed: false },
+  );
+
+  tracks = tracks.filter((track) => track.participant.kind !== ParticipantKind.AGENT)
+
+  const widgetUpdate = (state: WidgetState) => {
+    log.debug('updating widget state', state);
+    setWidgetState(state);
+  };
+
+  const layoutContext = useCreateLayoutContext();
+
+  const screenShareTracks = tracks
+    .filter(isTrackReference)
+    .filter((track) => track.publication.source === Track.Source.ScreenShare);
+
+  const focusTrack = usePinnedTracks(layoutContext)?.[0];
+  const carouselTracks = tracks.filter((track) => !isEqualTrackRef(track, focusTrack));
+
+  React.useEffect(() => {
+    // If screen share tracks are published, and no pin is set explicitly, auto set the screen share.
+    if (
+      screenShareTracks.some((track) => track.publication.isSubscribed) &&
+      lastAutoFocusedScreenShareTrack.current === null
+    ) {
+      log.debug('Auto set screen share focus:', { newScreenShareTrack: screenShareTracks[0] });
+      layoutContext.pin.dispatch?.({ msg: 'set_pin', trackReference: screenShareTracks[0] });
+      lastAutoFocusedScreenShareTrack.current = screenShareTracks[0];
+    } else if (
+      lastAutoFocusedScreenShareTrack.current &&
+      !screenShareTracks.some(
+        (track) =>
+          track.publication.trackSid ===
+          lastAutoFocusedScreenShareTrack.current?.publication?.trackSid,
+      )
+    ) {
+      log.debug('Auto clearing screen share focus.');
+      layoutContext.pin.dispatch?.({ msg: 'clear_pin' });
+      lastAutoFocusedScreenShareTrack.current = null;
+    }
+    if (focusTrack && !isTrackReference(focusTrack)) {
+      const updatedFocusTrack = tracks.find(
+        (tr) =>
+          tr.participant.identity === focusTrack.participant.identity &&
+          tr.source === focusTrack.source,
+      );
+      if (updatedFocusTrack !== focusTrack && isTrackReference(updatedFocusTrack)) {
+        layoutContext.pin.dispatch?.({ msg: 'set_pin', trackReference: updatedFocusTrack });
+      }
+    }
+  }, [
+    screenShareTracks
+      .map((ref) => `${ref.publication.trackSid}_${ref.publication.isSubscribed}`)
+      .join(),
+    focusTrack?.publication?.trackSid,
+    tracks,
+  ]);
+  
+  return (
+    <div className="lk-video-conference" {...props}>
+      {isWeb() && (
+        <LayoutContextProvider
+          value={layoutContext}
+          onWidgetChange={widgetUpdate}
+        >
+          <RoomAudioRenderer />
+          <div className="lk-video-conference-inner">
+            {!focusTrack ? (
+              <div className="lk-grid-layout-wrapper">
+                <GridLayout tracks={tracks}>
+                  <ParticipantTile />
+                </GridLayout>
+              </div>
+            ) : (
+              <div className="lk-focus-layout-wrapper">
+                <FocusLayoutContainer>
+                  <CarouselLayout tracks={carouselTracks}>
+                    <ParticipantTile />
+                  </CarouselLayout>
+                  {focusTrack && <FocusLayout trackRef={focusTrack} />}
+                </FocusLayoutContainer>
+              </div>
+            )}
+            <TranscriptDisplay />
+            <ControlBar controls={{ chat: false, settings: !!SettingsComponent }} />
+          </div>
+          {SettingsComponent && (
+            <div
+              className="lk-settings-menu-modal"
+              style={{ display: widgetState.showSettings ? 'block' : 'none' }}
+            >
+              <SettingsComponent />
+            </div>
+          )}
+        </LayoutContextProvider>
+      )}
+      <ConnectionStateToast />
+    </div>
+  );
+}
--- a/lib/types.ts
+++ b/lib/types.ts
@ -1,5 +1,15 @@
 import { LocalAudioTrack, LocalVideoTrack, videoCodecs } from 'livekit-client';
 import { VideoCodec } from 'livekit-client';
+import { LocalUserChoices as LiveKitLocalUserChoices } from '@livekit/components-core';
+
+// Extend the LocalUserChoices type with our additional properties
+export interface LocalUserChoices extends LiveKitLocalUserChoices {
+  /**
+   * The language code selected by the user.
+   * @defaultValue 'en'
+   */
+  language?: string;
+}

 export interface SessionProps {
  roomName: string;
--- a/package.json
+++ b/package.json
@ -14,9 +14,11 @@
  },
  "dependencies": {
    "@datadog/browser-logs": "^5.23.3",
+    "@livekit/components-core": "^0.12.9",
    "@livekit/components-react": "2.9.13",
    "@livekit/components-styles": "1.1.6",
    "@livekit/krisp-noise-filter": "0.3.4",
+    "@livekit/protocol": "^1.39.3",
    "@livekit/track-processors": "^0.5.4",
    "livekit-client": "2.15.2",
    "livekit-server-sdk": "2.13.1",
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -11,6 +11,9 @@ importers:
      '@datadog/browser-logs':
        specifier: ^5.23.3
        version: 5.35.1
+      '@livekit/components-core':
+        specifier: ^0.12.9
+        version: 0.12.9(livekit-client@2.15.2(@types/dom-mediacapture-record@1.0.22))(tslib@2.8.1)
      '@livekit/components-react':
        specifier: 2.9.13
        version: 2.9.13(@livekit/krisp-noise-filter@0.3.4(livekit-client@2.15.2(@types/dom-mediacapture-record@1.0.22)))(livekit-client@2.15.2(@types/dom-mediacapture-record@1.0.22))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(tslib@2.8.1)
@ -20,6 +23,9 @@ importers:
      '@livekit/krisp-noise-filter':
        specifier: 0.3.4
        version: 0.3.4(livekit-client@2.15.2(@types/dom-mediacapture-record@1.0.22))
+      '@livekit/protocol':
+        specifier: ^1.39.3
+        version: 1.39.3
      '@livekit/track-processors':
        specifier: ^0.5.4
        version: 0.5.4(livekit-client@2.15.2(@types/dom-mediacapture-record@1.0.22))
@ -442,6 +448,13 @@ packages:
      livekit-client: ^2.13.3
      tslib: ^2.6.2

+  '@livekit/components-core@0.12.9':
+    resolution: {integrity: sha512-bwrZsHf6GaHIO+lLyA6Yps1STTX9YIeL3ixwt+Ufi88OgkNYdp41Ug8oeVDlf7tzdxa+r3Xkfaj/qvIG84Yo6A==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      livekit-client: ^2.13.3
+      tslib: ^2.6.2
+
  '@livekit/components-react@2.9.13':
    resolution: {integrity: sha512-iiTDZhokK5c1m4HXdNSxzIRzurBYAKZDjrR2dIYXxBsYBWUqEgPGeocAYCkPsT0VcCST6Z73p8ySR8X5SotXGg==}
    engines: {node: '>=18'}
@ -467,9 +480,6 @@ packages:
  '@livekit/mutex@1.1.1':
    resolution: {integrity: sha512-EsshAucklmpuUAfkABPxJNhzj9v2sG7JuzFDL4ML1oJQSV14sqrpTYnsaOudMAw9yOaW53NU3QQTlUQoRs4czw==}

-  '@livekit/protocol@1.39.2':
-    resolution: {integrity: sha512-kYbIO/JlC6cylSxd4WJrBps9+zoZ9gifL7t3iW9whT8rbo5jHx03I4dwBLhzOonVyX+memSEO90m/ymNoT+aAw==}
-
  '@livekit/protocol@1.39.3':
    resolution: {integrity: sha512-hfOnbwPCeZBEvMRdRhU2sr46mjGXavQcrb3BFRfG+Gm0Z7WUSeFdy5WLstXJzEepz17Iwp/lkGwJ4ZgOOYfPuA==}

@ -2593,6 +2603,14 @@ snapshots:
      rxjs: 7.8.2
      tslib: 2.8.1

+  '@livekit/components-core@0.12.9(livekit-client@2.15.2(@types/dom-mediacapture-record@1.0.22))(tslib@2.8.1)':
+    dependencies:
+      '@floating-ui/dom': 1.6.13
+      livekit-client: 2.15.2(@types/dom-mediacapture-record@1.0.22)
+      loglevel: 1.9.1
+      rxjs: 7.8.2
+      tslib: 2.8.1
+
  '@livekit/components-react@2.9.13(@livekit/krisp-noise-filter@0.3.4(livekit-client@2.15.2(@types/dom-mediacapture-record@1.0.22)))(livekit-client@2.15.2(@types/dom-mediacapture-record@1.0.22))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(tslib@2.8.1)':
    dependencies:
      '@livekit/components-core': 0.12.8(livekit-client@2.15.2(@types/dom-mediacapture-record@1.0.22))(tslib@2.8.1)
@ -2614,10 +2632,6 @@ snapshots:

  '@livekit/mutex@1.1.1': {}

-  '@livekit/protocol@1.39.2':
-    dependencies:
-      '@bufbuild/protobuf': 1.10.0
-
  '@livekit/protocol@1.39.3':
    dependencies:
      '@bufbuild/protobuf': 1.10.0
@ -3977,7 +3991,7 @@ snapshots:
  livekit-server-sdk@2.13.1:
    dependencies:
      '@bufbuild/protobuf': 1.10.0
-      '@livekit/protocol': 1.39.2
+      '@livekit/protocol': 1.39.3
      camelcase-keys: 9.1.3
      jose: 5.9.3

--- a/styles/globals.css
+++ b/styles/globals.css
@ -65,3 +65,15 @@ h2 a {
 h2 a {
  text-decoration: none;
 }
+
+.lk-form-control-wrapper {
+  margin-top: 10px;
+  width: 100%;
+}
+
+.lk-form-label {
+  display: block;
+  margin-bottom: 5px;
+  font-size: 0.9rem;
+  color: #666;
+}