frontend (test): add Smart Mosaic layout helper functions and fake participant management

- Implemented helper functions for configuring Smart Mosaic layout, including setting participant count and waiting for participant visibility.
- Created a new file for managing fake participants, allowing for joining and disconnecting from LiveKit rooms using both CLI and browser-based methods.
- Introduced interfaces for browser-based fake participant options to streamline participant creation with audio and video assets.
This commit is contained in:
Carlos Santos 2025-12-02 21:02:40 +01:00
parent f930bf1447
commit 1663b008ed
24 changed files with 1841 additions and 1 deletions

View File

@ -0,0 +1,330 @@
#!/bin/bash
# =============================================================================
# Audio Generation Script for Smart Mosaic Layout Tests
# =============================================================================
# This script generates test audio files from a base audio file (base.wav)
# for testing the Smart Mosaic layout speaker detection functionality.
#
# Requirements:
# - ffmpeg 7.0+ (optimized for this version)
# - base.wav file with continuous speech audio in the same directory
#
# IMPORTANT: This script generates WAV files for best compatibility with
# Chrome's fake audio capture (--use-file-for-fake-audio-capture).
# WAV format ensures proper audio device simulation and VAD detection.
#
# Usage:
# chmod +x generate-test-audio.sh
# ./generate-test-audio.sh
# =============================================================================
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BASE_AUDIO="$SCRIPT_DIR/base.wav"
OUTPUT_DIR="$SCRIPT_DIR"
# Audio settings
SAMPLE_RATE=48000
CHANNELS=1
# WAV encoding settings for Chrome fake audio capture compatibility
# PCM 16-bit is the most compatible format for Chrome's fake devices
WAV_OPTS="-c:a pcm_s16le -ar ${SAMPLE_RATE} -ac ${CHANNELS}"
# Check ffmpeg version
FFMPEG_VERSION=$(ffmpeg -version | head -n1 | grep -oP 'ffmpeg version \K[0-9]+')
echo "🔧 Detected ffmpeg major version: $FFMPEG_VERSION"
# Check if base audio exists
if [ ! -f "$BASE_AUDIO" ]; then
echo "❌ Error: base.wav not found in $SCRIPT_DIR"
echo "Please provide a base.wav file with continuous speech audio."
exit 1
fi
echo ""
echo "🎵 Generating test audio files from base.wav..."
echo " Output directory: $OUTPUT_DIR"
echo " Sample rate: ${SAMPLE_RATE}Hz, Channels: ${CHANNELS}"
echo " Codec: PCM 16-bit (WAV) for Chrome fake audio compatibility"
echo ""
# -----------------------------------------------------------------------------
# 1. continuous_speech.wav (30s)
# Continuous speech audio for participants who speak constantly
# -----------------------------------------------------------------------------
echo "1⃣ Generating continuous_speech.wav (30s of continuous speech)..."
ffmpeg -y -i "$BASE_AUDIO" -t 30 -af "aresample=${SAMPLE_RATE}" $WAV_OPTS "$OUTPUT_DIR/continuous_speech.wav" 2>/dev/null
echo " ✅ continuous_speech.wav created"
# -----------------------------------------------------------------------------
# 2. complete_silence.wav (30s)
# Complete digital silence using aevalsrc with explicit zero expression
# This generates samples with value exactly 0.0 - guaranteed no VAD trigger
# -----------------------------------------------------------------------------
echo "2⃣ Generating complete_silence.wav (30s of TRUE digital silence)..."
ffmpeg -y -f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
$WAV_OPTS "$OUTPUT_DIR/complete_silence.wav" 2>/dev/null
echo " ✅ complete_silence.wav created"
# -----------------------------------------------------------------------------
# 3. speech_5s_then_silence.wav (30s)
# 5s speech, then 25s TRUE silence
# Uses amix to combine speech with silence background for clean transitions
# -----------------------------------------------------------------------------
echo "3⃣ Generating speech_5s_then_silence.wav (5s speech + 25s TRUE silence)..."
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:5,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE}[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/speech_5s_then_silence.wav" 2>/dev/null
echo " ✅ speech_5s_then_silence.wav created"
# -----------------------------------------------------------------------------
# 4. silence_5s_then_speech.wav (30s)
# 5s TRUE silence, then 25s speech
# -----------------------------------------------------------------------------
echo "4⃣ Generating silence_5s_then_speech.wav (5s TRUE silence + 25s speech)..."
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:25,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE},adelay=5s:all=1[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/silence_5s_then_speech.wav" 2>/dev/null
echo " ✅ silence_5s_then_speech.wav created"
# -----------------------------------------------------------------------------
# 5. speech_gap_speech.wav (30s)
# 5s speech, 10s TRUE silence, 15s speech - for testing speaker re-activation
# -----------------------------------------------------------------------------
echo "5⃣ Generating speech_gap_speech.wav (5s speech + 10s TRUE gap + 15s speech)..."
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:5,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE}[s1];
[0:a]atrim=5:20,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE},adelay=15s:all=1[s2];
[1:a][s1][s2]amix=inputs=3:duration=first:dropout_transition=0,volume=3[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/speech_gap_speech.wav" 2>/dev/null
echo " ✅ speech_gap_speech.wav created"
# -----------------------------------------------------------------------------
# 6-11. Sequential speaker audio files (for rotation tests)
# Each speaker has a unique time window for speech with TRUE silence elsewhere
# -----------------------------------------------------------------------------
echo "6⃣ Generating sequential speaker audio files (A through F)..."
# Speaker A: speaks 0-3s, then TRUE silence
echo " → speaker_seq_A.wav (speaks at 0-3s)"
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:3,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE}[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/speaker_seq_A.wav" 2>/dev/null
# Speaker B: TRUE silence 0-5s, speaks 5-8s, then TRUE silence
echo " → speaker_seq_B.wav (speaks at 5-8s)"
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:3,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE},adelay=5s:all=1[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/speaker_seq_B.wav" 2>/dev/null
# Speaker C: TRUE silence 0-10s, speaks 10-13s, then TRUE silence
echo " → speaker_seq_C.wav (speaks at 10-13s)"
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:3,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE},adelay=10s:all=1[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/speaker_seq_C.wav" 2>/dev/null
# Speaker D: TRUE silence 0-15s, speaks 15-18s, then TRUE silence
echo " → speaker_seq_D.wav (speaks at 15-18s)"
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:3,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE},adelay=15s:all=1[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/speaker_seq_D.wav" 2>/dev/null
# Speaker E: TRUE silence 0-20s, speaks 20-23s, then TRUE silence
echo " → speaker_seq_E.wav (speaks at 20-23s)"
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:3,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE},adelay=20s:all=1[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/speaker_seq_E.wav" 2>/dev/null
# Speaker F: TRUE silence 0-25s, speaks 25-28s, then TRUE silence
echo " → speaker_seq_F.wav (speaks at 25-28s)"
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:3,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE},adelay=25s:all=1[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/speaker_seq_F.wav" 2>/dev/null
echo " ✅ Sequential speaker files created (A-F)"
# -----------------------------------------------------------------------------
# 12. simultaneous_then_solo.wav (30s)
# 15s speech then 15s TRUE silence
# Used for the "simultaneous speech" test (this participant continues speaking)
# -----------------------------------------------------------------------------
echo "7⃣ Generating simultaneous_then_solo.wav (15s speech + 15s TRUE silence)..."
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:15,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE}[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/simultaneous_then_solo.wav" 2>/dev/null
echo " ✅ simultaneous_then_solo.wav created"
# -----------------------------------------------------------------------------
# 13. simultaneous_then_stop.wav (30s)
# 5s speech then 25s TRUE silence
# Used for participants who stop speaking after simultaneous period
# -----------------------------------------------------------------------------
echo "8⃣ Generating simultaneous_then_stop.wav (5s speech + 25s TRUE silence)..."
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:5,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE}[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/simultaneous_then_stop.wav" 2>/dev/null
echo " ✅ simultaneous_then_stop.wav created"
# -----------------------------------------------------------------------------
# 14. low_volume_speech.wav (30s)
# Continuous speech at 10% volume - below the audioLevel threshold (0.15)
# Used to test that participants with low audio levels are filtered out
# -----------------------------------------------------------------------------
echo "9⃣ Generating low_volume_speech.wav (30s speech at 10% volume)..."
ffmpeg -y \
-f lavfi -i "anoisesrc=color=pink:amplitude=0.02:s=${SAMPLE_RATE}:d=30" \
$WAV_OPTS "$OUTPUT_DIR/ambient_pink_noise.wav" 2>/dev/null
echo " ✅ low_volume_speech.wav created"
# -----------------------------------------------------------------------------
# 15. brief_sound_1s.wav (30s)
# Only 1 second of speech followed by silence
# Used to test minimum speaking duration filter (should be filtered out)
# -----------------------------------------------------------------------------
echo "🔟 Generating brief_sound_1s.wav (1s speech + 29s silence)..."
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:1,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE},adelay=5000|5000[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/brief_sound_1s_at_5s.wav" 2>/dev/null
echo " ✅ brief_sound_1s_at_5s.wav created"
# -----------------------------------------------------------------------------
# 16. brief_cough.wav (30s)
# Only 0.5 seconds of sound (simulating a cough) followed by silence
# Used to test that very brief sounds are filtered out
# -----------------------------------------------------------------------------
echo "1⃣1⃣ Generating brief_cough.wav (0.5s sound + 29.5s silence)..."
ffmpeg -y \
-i "$BASE_AUDIO" \
-f lavfi -i "aevalsrc=0:c=mono:s=${SAMPLE_RATE}:d=30" \
-filter_complex "
[0:a]atrim=0:0.5,asetpts=PTS-STARTPTS,aresample=${SAMPLE_RATE},adelay=5000|5000[speech];
[1:a][speech]amix=inputs=2:duration=first:dropout_transition=0,volume=2[out]
" \
-map "[out]" -t 30 $WAV_OPTS "$OUTPUT_DIR/brief_cough_at_5s.wav" 2>/dev/null
echo " ✅ brief_cough_at_5s.wav created"
# -----------------------------------------------------------------------------
# Verify silence in generated files
# -----------------------------------------------------------------------------
echo ""
echo "🔍 Verifying silence quality in generated files..."
verify_silence() {
local file=$1
local expected_silence_start=$2
# Check RMS level in silence portion (should be exactly 0 or very close)
local rms=$(ffmpeg -i "$file" -af "atrim=${expected_silence_start}:${expected_silence_start}+1,astats=metadata=1:reset=1" -f null - 2>&1 | grep "RMS level" | head -1 | grep -oP '[-0-9.]+' | head -1)
if [ -n "$rms" ]; then
echo " $file: RMS at ${expected_silence_start}s = ${rms}dB"
fi
}
# Verify a few key files
verify_silence "$OUTPUT_DIR/complete_silence.wav" 15
verify_silence "$OUTPUT_DIR/speech_5s_then_silence.wav" 10
verify_silence "$OUTPUT_DIR/speaker_seq_B.wav" 2
# -----------------------------------------------------------------------------
# Summary
# -----------------------------------------------------------------------------
echo ""
echo "============================================================================="
echo "✅ Audio generation complete! (WAV format for Chrome fake audio capture)"
echo "============================================================================="
echo ""
echo "Generated files:"
echo " 📁 $OUTPUT_DIR/"
echo " ├── continuous_speech.wav (30s continuous speech)"
echo " ├── complete_silence.wav (30s TRUE digital silence - aevalsrc=0)"
echo " ├── speech_5s_then_silence.wav (5s speech + 25s TRUE silence)"
echo " ├── silence_5s_then_speech.wav (5s TRUE silence + 25s speech)"
echo " ├── speech_gap_speech.wav (5s speech + 10s gap + 15s speech)"
echo " ├── speaker_seq_A.wav (speaks at 0-3s)"
echo " ├── speaker_seq_B.wav (speaks at 5-8s)"
echo " ├── speaker_seq_C.wav (speaks at 10-13s)"
echo " ├── speaker_seq_D.wav (speaks at 15-18s)"
echo " ├── speaker_seq_E.wav (speaks at 20-23s)"
echo " ├── speaker_seq_F.wav (speaks at 25-28s)"
echo " ├── simultaneous_then_solo.wav (15s speech + 15s silence)"
echo " ├── simultaneous_then_stop.wav (5s speech + 25s silence)"
echo " ├── low_volume_speech.wav (30s speech at 10% volume - below threshold)"
echo " ├── brief_sound_1s.wav (1s speech + 29s silence - too short)"
echo " └── brief_cough.wav (0.5s sound + 29.5s silence - simulates cough)"
echo ""
echo "Key features of this version:"
echo " • WAV format (PCM 16-bit) for Chrome fake audio capture compatibility"
echo " • Uses aevalsrc=0 for TRUE digital silence (samples = 0.0)"
echo " • amix filter for clean speech/silence transitions"
echo " • adelay for precise speech timing"
echo " • 48kHz sample rate, mono channel"
echo ""
echo "Usage in tests:"
echo " await joinBrowserFakeParticipant(browser, roomId, 'speaker1', {"
echo " audioFile: 'continuous_speech.wav'"
echo " });"
echo ""

View File

@ -0,0 +1,918 @@
import { expect, test } from '@playwright/test';
import { MEET_TESTAPP_URL } from '../config';
import {
configureLayoutMode,
createTestRoom,
deleteAllRecordings,
deleteAllRooms,
getVisibleParticipantsCount,
getVisibleParticipantNames,
joinRoomAs,
prepareForJoiningRoom,
waitForElementInIframe,
waitForParticipantCount,
waitForParticipantVisible,
waitForParticipantSwap,
muteAudio
} from '../helpers/function-helpers';
import {
disconnectAllFakeParticipants,
disconnectAllBrowserFakeParticipants,
joinBrowserFakeParticipant,
joinFakeParticipant,
disconnectBrowserFakeParticipant
} from '../helpers/participant.helper';
test.describe('Custom Layout Tests', () => {
let subscribedToAppErrors = false;
let roomId: string;
let participantName: string;
test.beforeEach(async ({ page }) => {
// Create a new room for each test to avoid state pollution
roomId = await createTestRoom('smart-mosaic-test-room');
if (!subscribedToAppErrors) {
page.on('console', (msg) => {
const type = msg.type();
const tag = type === 'error' ? 'ERROR' : type === 'warning' ? 'WARNING' : 'LOG';
console.log('[' + tag + ']', msg.text());
});
subscribedToAppErrors = true;
}
participantName = `Local-${Math.random().toString(36).substring(2, 7)}`;
});
test.afterEach(async () => {
// Clean up fake participants after each test
await Promise.all([disconnectAllBrowserFakeParticipants(), disconnectAllFakeParticipants()]);
});
test.afterAll(async ({ browser }) => {
const tempContext = await browser.newContext();
const tempPage = await tempContext.newPage();
await deleteAllRooms(tempPage);
await deleteAllRecordings(tempPage);
await tempContext.close();
await tempPage.close();
});
// =========================================================================
// SMART MOSAIC LAYOUT TESTS
// These tests verify that the Smart Mosaic layout correctly displays
// participants based on their speaking activity, showing only the most
// recent active speakers up to the configured limit.
// =========================================================================
test.describe('Smart Mosaic Layout - Speaker Priority', () => {
test('should display only local participant and the single active remote speaker when limit is set to 1 and one remote is muted', async ({
page
}) => {
// Scenario: 3 participants (local + remote A speaking + remote B muted), limit = 1
// Expected: Grid shows local + remote A only (2 participants total)
// Audio: Remote A uses continuous_speech.ogg, Remote B has no audio
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
await muteAudio(page); // Mute local to avoid interference
// Configure Smart Mosaic layout with limit = 1
await configureLayoutMode(page, 'smart-mosaic', 1);
// Join fake participant A (speaking with continuous audio)
await Promise.all([
joinBrowserFakeParticipant(roomId, 'RemoteA-Speaker', {
audioFile: 'continuous_speech.wav'
}),
// Join fake participant B (muted/silent - no audio)
joinFakeParticipant(roomId, 'RemoteB-Silent')
]);
// Wait for participants to appear and speaker detection to process
await waitForParticipantVisible(page, 'RemoteA-Speaker');
// Verify the grid shows exactly 2 participants (local + 1 remote speaker)
const participantCount = await getVisibleParticipantsCount(page);
expect(participantCount).toBe(2);
// Step 8: Verify the visible participants are local and RemoteA (the speaker)
const visibleIdentities = await getVisibleParticipantNames(page);
// expect(visibleIdentities).toContain(participantName); // Local participant
expect(visibleIdentities).toContain('RemoteA-Speaker'); // Active speaker
expect(visibleIdentities).not.toContain('RemoteB-Silent'); // Silent participant should NOT be visible
});
test('should reorder two remote participants based on alternating speech activity while keeping local participant always visible', async ({
page
}) => {
// Scenario: 3 participants, A speaks first (0-5s), then B speaks (5s onwards)
// Expected: Initially A is prioritized, after B speaks, B becomes prioritized
// Audio: A uses speech_5s_then_silence.ogg, B uses silence_5s_then_speech.ogg
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
await muteAudio(page); // Mute local to avoid interference
// Configure Smart Mosaic layout with limit = 1
// Only 1 remote participant should be visible at a time (plus local)
await configureLayoutMode(page, 'smart-mosaic', 1);
// Join fake participant A (speaks first 5s, then silent)
// Join fake participant B (silent first 5s, then speaks)
// Use browser-based fake participant to ensure VAD triggers correctly (lk CLI always send active speakers events when using audio files)
await joinBrowserFakeParticipant(roomId, 'RemoteA-SpeaksFirst', {
audioFile: 'speech_5s_then_silence.wav'
});
await waitForParticipantVisible(page, 'RemoteA-SpeaksFirst');
await joinBrowserFakeParticipant(roomId, 'RemoteB-SpeaksLater', {
audioFile: 'silence_5s_then_speech.wav'
});
// Verify that RemoteA is visible (he's speaking in first 5s)
let [visibleIdentities, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
expect(visibleIdentities).toContain('RemoteA-SpeaksFirst');
expect(visibleIdentities).not.toContain('RemoteB-SpeaksLater');
// Verify we have exactly 2 participants visible (local + 1 remote)
expect(participantCount).toBe(2);
expect(participantCount).toBe(2);
// Wait for the speech transition (A stops at 5s, B starts at 5s)
// Wait additional time for B to start speaking and be detected
await waitForParticipantVisible(page, 'RemoteB-SpeaksLater');
// Verify that RemoteB is now visible (he started speaking)
[visibleIdentities, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
expect(visibleIdentities).toContain('RemoteB-SpeaksLater');
expect(visibleIdentities).not.toContain('RemoteA-SpeaksFirst');
// Verify still exactly 2 participants visible (local + 1 remote)
expect(participantCount).toBe(2);
// Verify local participant remained visible throughout
// The local participant should always be visible regardless of speaking state
expect(visibleIdentities.length).toBe(2); // Local + current active speaker
});
test('should rotate three remote participants by most recent speaker order with limit of 2 visible remotes', async ({
page
}) => {
// Scenario: 4 participants with limit = 2, speaking order A → B → C
// Expected: After rotation, grid shows local + B + C (last 2 speakers)
// Audio: A speaks 0-3s, B speaks 5-8s, C speaks 10-13s
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
await muteAudio(page); // Mute local to avoid interference
// Configure Smart Mosaic layout with limit = 2
// 2 remote participants should be visible at a time (plus local = 3 total)
await configureLayoutMode(page, 'smart-mosaic', 2);
// Join three browser-based fake participants with sequential audio
// speaker_seq_A.wav: speaks at 0-3s
// speaker_seq_B.wav: speaks at 5-8s
// speaker_seq_C.wav: speaks at 10-13s
await joinBrowserFakeParticipant(roomId, 'RemoteA-First', {
audioFile: 'speaker_seq_A.wav'
});
await joinBrowserFakeParticipant(roomId, 'RemoteB-Second', {
audioFile: 'speaker_seq_B.wav'
});
await joinBrowserFakeParticipant(roomId, 'RemoteC-Third', {
audioFile: 'speaker_seq_C.wav'
});
// Wait for A to become visible (speaks first at 0-3s)
await waitForParticipantVisible(page, 'RemoteA-First');
// Initially A and B should be visible (limit = 2, A is speaking, B fills the slot)
let [visibleIdentities, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('After A starts speaking:', visibleIdentities);
expect(visibleIdentities).toContain('RemoteA-First');
expect(participantCount).toBe(3); // Local + 2 remotes
// Wait for C to become visible (speaks at 10-13s)
// This is the key assertion - when C starts speaking, it should replace A (oldest speaker)
await waitForParticipantSwap(page, 'RemoteC-Third', 'RemoteA-First', 30000);
// Verify final state - B and C should be visible (most recent 2 speakers)
[visibleIdentities, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('After C speaks (rotation complete):', visibleIdentities);
expect(participantCount).toBe(3); // Local + 2 remotes
expect(visibleIdentities).toContain('RemoteB-Second');
expect(visibleIdentities).toContain('RemoteC-Third');
expect(visibleIdentities).not.toContain('RemoteA-First'); // A was rotated out
});
test('should display local and three most active remote speakers while ignoring the silent participant when limit is 3', async ({
page
}) => {
// Scenario: 5 participants (local + A, B, C speaking + D always silent), limit = 3
// Expected: Grid shows local + A + B + C, D is never shown
// Audio: A, B, C use continuous_speech.wav, D has no audio
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
await muteAudio(page); // Mute local to avoid interference
// Configure Smart Mosaic layout with limit = 3
// 3 remote participants should be visible at a time (plus local = 4 total)
await configureLayoutMode(page, 'smart-mosaic', 3);
// Join silent participant first using lk CLI (no audio triggers VAD)
await joinFakeParticipant(roomId, 'RemoteD-Silent');
// Join three browser-based fake participants with continuous speech audio
// These will all be detected as active speakers
await Promise.all([
joinBrowserFakeParticipant(roomId, 'RemoteA-Speaker', {
audioFile: 'continuous_speech.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteB-Speaker', {
audioFile: 'continuous_speech.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteC-Speaker', {
audioFile: 'continuous_speech.wav'
})
]);
// Wait for speaker detection to process all participants
await Promise.all([
waitForParticipantVisible(page, 'RemoteA-Speaker'),
waitForParticipantVisible(page, 'RemoteB-Speaker'),
waitForParticipantVisible(page, 'RemoteC-Speaker')
]);
// Verify the grid shows exactly 4 participants (local + 3 active speakers)
const [visibleIdentities, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
// Should show local + 3 speakers = 4 total
expect(participantCount).toBe(4);
// Verify all three speakers are visible
expect(visibleIdentities).toContain('RemoteA-Speaker');
expect(visibleIdentities).toContain('RemoteB-Speaker');
expect(visibleIdentities).toContain('RemoteC-Speaker');
// Verify the silent participant is NOT visible
// Since limit is 3 and we have 3 active speakers, silent D should be excluded
expect(visibleIdentities).not.toContain('RemoteD-Silent');
});
test('should handle simultaneous speech from multiple participants and correctly reorder when only one continues speaking', async ({
page
}) => {
// Scenario: 3 remote participants + local, limit = 2
// All 3 speak simultaneously for first 5s, then only A continues speaking
// Expected: Initially any 2 of the 3 are visible (all speaking)
// After 5s, only A continues → A should remain visible as active speaker
// Audio: A uses simultaneous_then_solo.wav (15s speech)
// B, C use simultaneous_then_stop.wav (5s speech then silence)
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
await muteAudio(page); // Mute local to avoid interference
// Configure Smart Mosaic layout with limit = 2
// Only 2 remote participants visible at a time
await configureLayoutMode(page, 'smart-mosaic', 2);
// Join three browser-based fake participants
// A continues speaking (15s), B and C stop after 5s
await Promise.all([
joinBrowserFakeParticipant(roomId, 'RemoteA-ContinuesSpeaking', {
audioFile: 'continuous_speech.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteB-StopsSpeaking', {
audioFile: 'simultaneous_then_stop.wav' //5s speech + 25s silence
})
]);
// Wait for simultaneous speech period (first 5s - all speaking)
await Promise.all([
waitForParticipantVisible(page, 'RemoteA-ContinuesSpeaking'),
waitForParticipantVisible(page, 'RemoteB-StopsSpeaking')
]);
await joinBrowserFakeParticipant(roomId, 'RemoteC-StopsSpeaking', {
audioFile: 'simultaneous_then_stop.wav' //5s speech + 25s silence
});
let [visibleIdentities, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
// During simultaneous speech, we should see exactly 3 participants (local + 2 remotes)
expect(participantCount).toBe(3);
// At least A should be visible (continues speaking)
// The other visible one could be B or C (both are speaking)
expect(visibleIdentities).toContain('RemoteA-ContinuesSpeaking');
expect(visibleIdentities).toContain('RemoteB-StopsSpeaking');
expect(visibleIdentities).not.toContain('RemoteC-StopsSpeaking');
// Wait for B and C to stop speaking (after 5s mark)
// Only A continues speaking, so A should remain as priority speaker
await page.waitForTimeout(6000); // Wait until ~11s mark (well past the 5s cutoff)
[visibleIdentities, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('After B and C stop speaking (~11s):', visibleIdentities);
// A should definitely be visible (still speaking)
expect(visibleIdentities).toContain('RemoteA-ContinuesSpeaking');
// Verify participant count is still 3 (local + 2 remotes)
// Even though only A is speaking, the layout maintains 2 remotes
expect(participantCount).toBe(3);
});
test('should not reorder layout continuously when smart mosaic limit is reached and multiple participants speak intermittently', async ({
page
}) => {
// Scenario: 2 remote participants + local, limit = 1
// Participants A and B speak continuously
// Expected: Layout stabilizes showing local + 1 most recent active speakers
// Audio: A and B use continuous_speech.wav
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
await muteAudio(page); // Mute local to avoid interference
// Configure Smart Mosaic layout with limit = 1
// Only 1 remote participant visible at a time
await configureLayoutMode(page, 'smart-mosaic', 1);
// Join two browser-based fake participants with continuous speech audio
await joinBrowserFakeParticipant(roomId, 'RemoteA-Continuous', {
audioFile: 'continuous_speech.wav'
});
await waitForParticipantVisible(page, 'RemoteA-Continuous');
// Verify the grid shows exactly 2 participants (local + 1 remote speaker)
let [participantCount, visibleNames] = await Promise.all([
getVisibleParticipantsCount(page),
getVisibleParticipantNames(page)
]);
expect(participantCount).toBe(2);
expect(visibleNames).toContain('RemoteA-Continuous');
await joinBrowserFakeParticipant(roomId, 'RemoteB-Continuous', {
audioFile: 'continuous_speech.wav'
});
// Verify participant count remains stable 20 times
for (let i = 0; i < 20; i++) {
[participantCount, visibleNames] = await Promise.all([
getVisibleParticipantsCount(page),
getVisibleParticipantNames(page)
]);
expect(visibleNames).toContain('RemoteA-Continuous');
expect(visibleNames).not.toContain('RemoteB-Continuous');
expect(participantCount).toBe(2);
await page.waitForTimeout(50);
}
});
test('should immediately prioritize a newly joined participant who starts speaking over existing silent participants', async ({
page
}) => {
// Scenario: Local + 2 silent participants (A, B) in room, limit = 1
// New participant C joins and starts speaking immediately
// Expected: C immediately appears in the grid, replacing one of the silent participants
// Audio: A, B are silent (lk CLI), C uses continuous_speech.wav
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
await muteAudio(page); // Mute local to avoid interference
// Configure Smart Mosaic layout with limit = 1
// Only 1 remote participant visible at a time
await configureLayoutMode(page, 'smart-mosaic', 1);
// Join two silent participants using lk CLI (no VAD triggers)
await Promise.all([
joinFakeParticipant(roomId, 'RemoteA-Silent'),
joinFakeParticipant(roomId, 'RemoteB-Silent')
]);
// Wait for silent participants to appear
await page.waitForTimeout(2000);
// New participant C joins and starts speaking immediately
await joinBrowserFakeParticipant(roomId, 'RemoteC-NewSpeaker', {
audioFile: 'silence_5s_then_speech.wav'
});
await page.waitForTimeout(2000);
let [visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
expect(visibleNames).not.toContain('RemoteC-NewSpeaker');
// Wait for speaker detection to process
await waitForParticipantVisible(page, 'RemoteC-NewSpeaker');
[visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
// Verify C is now visible (speaking has priority)
expect(visibleNames).toContain('RemoteC-NewSpeaker');
expect(participantCount).toBe(2); // Local + 1 remote
});
});
test.describe('Smart Mosaic Layout - Participant Join/Leave Handling', () => {
test('should update visible participants correctly when a visible speaker leaves the room', async ({
page
}) => {
// Scenario: Local + 3 remote participants (A, B, C) with limit = 2
// A and B are visible speakers, C is silent
// A leaves the room → B should remain visible, C should NOT appear
// Audio: A and B use continuous_speech.wav, C is silent
// Step 1: Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Step 2: Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
// Step 3: Configure Smart Mosaic layout with limit = 2
await configureLayoutMode(page, 'smart-mosaic', 2);
// Step 4: Join three browser-based fake participants
await Promise.all([
joinBrowserFakeParticipant(roomId, 'RemoteA-Speaker', {
audioFile: 'continuous_speech.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteB-Speaker', {
audioFile: 'continuous_speech.wav'
})
]);
await joinFakeParticipant(roomId, 'RemoteC-Silent');
// Step 5: Wait for speaker detection to process
await Promise.all([
waitForParticipantVisible(page, 'RemoteA-Speaker'),
waitForParticipantVisible(page, 'RemoteB-Speaker')
]);
// Verify A and B are visible
let [visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('Before A leaves, visible participants:', visibleNames);
expect(visibleNames).toContain('RemoteA-Speaker');
expect(visibleNames).toContain('RemoteB-Speaker');
expect(participantCount).toBe(3); // Local + 2 remotes
// Step 6: Disconnect participant A (visible speaker)
await disconnectBrowserFakeParticipant(roomId, 'RemoteA-Speaker');
// Step 7: Wait for layout to update
await page.waitForTimeout(1000);
[visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('After A leaves, visible participants:', visibleNames);
// Step 8: Verify B remains visible, C does NOT appear
expect(visibleNames).toContain('RemoteB-Speaker');
// expect(visibleNames).toContain('RemoteC-Silent');
expect(visibleNames).not.toContain('RemoteA-Speaker');
expect(participantCount).toBe(3); // Local + 2 remotes
});
test('should update visible participants correctly when a silent participant joins the room', async ({
page
}) => {
// Scenario: Local + 2 remote participants (A speaking, B silent) with limit = 1
// A is visible speaker, B is silent
// C joins as silent participant → should NOT appear in the grid
// Audio: A uses continuous_speech.wav, B and C are silent
// Step 1: Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Step 2: Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
// Step 3: Configure Smart Mosaic layout with limit = 1
await configureLayoutMode(page, 'smart-mosaic', 1);
// Step 4: Join two remote participants
await joinBrowserFakeParticipant(roomId, 'RemoteA-Speaker', {
audioFile: 'continuous_speech.wav'
});
await joinFakeParticipant(roomId, 'RemoteB-Silent');
// Step 5: Wait for speaker detection to process
await page.waitForTimeout(2000);
// Verify A is visible
let [visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('Before C joins, visible participants:', visibleNames);
expect(visibleNames).toContain('RemoteA-Speaker');
expect(participantCount).toBe(2); // Local + 1 remote
// Step 6: Join new silent participant C
await joinFakeParticipant(roomId, 'RemoteC-Silent');
// Step 7: Wait for layout to update
await page.waitForTimeout(3000);
[visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('After C joins, visible participants:', visibleNames);
// Step 8: Verify C does NOT appear in the grid
expect(visibleNames).toContain('RemoteA-Speaker');
expect(participantCount).toBe(2); // Local + 1 remote
});
});
test.describe('Mosaic Layout and Smart Mosaic Layout Switching', () => {
test('should switch from Smart Mosaic to Mosaic layout and display all participants', async ({ page }) => {
// Scenario: Start in Smart Mosaic layout with limit = 2, switch to Mosaic
// Expected: After switching, all participants become visible in the grid
// Audio: Participants A, B, C, D use continuous_speech.wav
// Step 1: Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Step 2: Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
// Step 3: Join four browser-based fake participants with continuous speech audio
await Promise.all([
joinBrowserFakeParticipant(roomId, 'RemoteA-Speaker', {
audioFile: 'continuous_speech.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteB-Speaker', {
audioFile: 'continuous_speech.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteC-Speaker', {
audioFile: 'continuous_speech.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteD-Speaker', {
audioFile: 'continuous_speech.wav'
})
]);
// Step 4: Wait for all participants to appear
await waitForParticipantCount(page, 5); // Local + 4 remotes
// Step 5: Configure Smart Mosaic layout with limit = 2
await configureLayoutMode(page, 'smart-mosaic', 2);
// Verify only 2 remote participants are visible
let [visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('In Smart Mosaic layout, visible participants:', visibleNames);
expect(participantCount).toBe(3); // Local + 2 remotes
// Step 6: Switch to Mosaic layout (all participants visible)
await configureLayoutMode(page, 'mosaic');
// Step 7: Wait for layout to update
await page.waitForTimeout(3000);
[visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('After switching to Mosaic, visible participants:', visibleNames);
// Step 8: Verify all 4 remote participants are now visible
expect(participantCount).toBe(5); // Local + 4 remotes
});
test('should switch from Mosaic to Smart Mosaic layout and maintain participant visibility based on speaking activity', async ({
page
}) => {
// Scenario: Start in Mosaic layout with 4 participants visible, switch to Smart Mosaic with limit = 2
// Expected: After switching, only the 2 most recent active speakers remain visible
// Audio: Participants A, B, C, D use continuous_speech.wav
// Wait for local participant to join the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
// Wait for four browser-based fake participants with continuous speech audio to join
await Promise.all([
joinBrowserFakeParticipant(roomId, 'RemoteA-Speaker', {
audioFile: 'continuous_speech.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteB-Speaker', {
audioFile: 'continuous_speech.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteC-Speaker', {
audioFile: 'continuous_speech.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteD-Speaker', {
audioFile: 'continuous_speech.wav'
})
]);
// Wait for all participants to appear
await waitForParticipantCount(page, 5); // Local + 4 remotes
// Switch to Smart Mosaic layout with limit = 2
await configureLayoutMode(page, 'smart-mosaic', 2);
// Wait for layout to update speaker visibility
await page.waitForTimeout(3000);
const [visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('After switching to Smart Mosaic, visible participants:', visibleNames);
// Verify only 2 remote participants are visible (most recent speakers)
expect(participantCount).toBe(3); // Local + 2 remotes
});
});
// =========================================================================
// SMART MOSAIC LAYOUT - AUDIO FILTERING TESTS
// These tests verify the hysteresis mechanisms that filter out:
// 1. Low volume audio (below audioLevel threshold of 0.15)
// 2. Brief sounds (below minimum speaking duration of 1.5s)
// =========================================================================
test.describe('Smart Mosaic Layout - Audio Level and Duration Filtering', () => {
test('should not display participant with low volume audio below threshold', async ({ page }) => {
// Scenario: 3 participants - Local + Remote A (normal volume) + Remote B (low volume ~10%)
// Expected: Only Remote A appears in the grid, Remote B is filtered due to low audioLevel
// Audio: A uses continuous_speech.wav, B uses low_volume_speech.wav (10% volume)
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
// Configure Smart Mosaic layout with limit = 1
await configureLayoutMode(page, 'smart-mosaic', 1);
// Join two participants: one with normal volume, one with very low volume
await joinFakeParticipant(roomId, 'RemoteA-Silence');
// Wait for speaker detection to process
await waitForParticipantCount(page, 2);
await joinBrowserFakeParticipant(roomId, 'RemoteB-LowVolume', {
audioFile: 'low_volume_speech.wav' // 10% volume - below 0.15 threshold
});
// Wait additional time for B's low volume to be evaluated
await page.waitForTimeout(4000);
for (let i = 0; i < 5; i++) {
const [visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
console.log('Visible participants with volume filtering:', visibleNames);
// Remote B (low volume) should NOT be prioritized as active speaker
expect(visibleNames).not.toContain('RemoteB-LowVolume');
expect(participantCount).toBe(2); // Local + 1 remote
}
});
test('should maintain layout stability when multiple participants have intermittent low-level audio', async ({
page
}) => {
// Scenario: 3 participants all with low volume audio, limit = 2
// Expected: Layout remains stable without constant swapping (all filtered by audioLevel threshold)
// Audio: All use low_volume_speech.wav
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
await muteAudio(page); // Mute local to avoid interference
// Configure Smart Mosaic layout with limit = 1
await configureLayoutMode(page, 'smart-mosaic', 1);
await joinFakeParticipant(roomId, 'Remote-Initial');
await waitForParticipantCount(page, 2);
// Join three participants all with low volume
await Promise.all([
joinBrowserFakeParticipant(roomId, 'RemoteA-LowVol', {
audioFile: 'ambient_pink_noise.wav'
}),
joinBrowserFakeParticipant(roomId, 'RemoteB-LowVol', {
audioFile: 'ambient_pink_noise.wav'
})
]);
await page.waitForTimeout(3000);
// Record initial visible participants
const initialVisibleNames = await getVisibleParticipantNames(page);
console.log('Initial visible participants:', initialVisibleNames);
// Check layout stability over time - should not swap since all are below threshold
let swapCount = 0;
let previousNames = [...initialVisibleNames];
for (let i = 0; i < 10; i++) {
await page.waitForTimeout(500);
const currentNames = await getVisibleParticipantNames(page);
// Check if any swap occurred
const hasSwap = !previousNames.every((name) => currentNames.includes(name));
if (hasSwap) {
swapCount++;
console.log(`Swap detected at check ${i + 1}:`, previousNames, '->', currentNames);
}
previousNames = [...currentNames];
}
console.log(`Total swaps detected: ${swapCount}`);
// Layout should be stable - no swaps should occur since all are filtered
expect(swapCount).toBe(0);
});
test('should not prioritize participant with brief sound (cough) under minimum duration', async ({ page }) => {
// Scenario: 3 participants - Local + Remote A (continuous speech) + Remote B (0.5s cough only)
// Expected: Remote A appears as active speaker, Remote B's brief cough is filtered out
// Audio: A uses continuous_speech.wav, B uses brief_cough.wav (0.5s sound)
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
await muteAudio(page); // Mute local to avoid interference
// Configure Smart Mosaic layout with limit = 1
await configureLayoutMode(page, 'smart-mosaic', 1);
// Join silence participant first
await joinFakeParticipant(roomId, 'RemoteA-Speaker');
// Wait for A to be detected as speaker
await waitForParticipantCount(page, 2);
// Now join participant B with brief cough sound
await joinBrowserFakeParticipant(roomId, 'RemoteB-Cough', {
audioFile: 'brief_cough_at_5s.wav' // 0.5s sound - below minimum duration
});
// Wait for the brief sound to be processed
await page.waitForTimeout(5000);
for (let i = 0; i < 5; i++) {
const [visibleNames, participantCount] = await Promise.all([
getVisibleParticipantNames(page),
getVisibleParticipantsCount(page)
]);
// Remote A should remain visible throughout
expect(visibleNames).not.toContain('RemoteB-Cough');
expect(participantCount).toBe(2); // Local + 1 remote
await page.waitForTimeout(500);
}
});
test('should not swap active speaker for participant with 1 second sound burst', async ({ page }) => {
// Scenario: 3 participants - Local + Remote A (speaking) + Remote B (1s sound burst)
// Expected: A remains visible, B's 1 second sound is filtered (< 1.5s threshold)
// Audio: A uses continuous_speech.wav, B uses brief_sound_1s.wav
// Local participant joins the room
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('moderator', participantName, page);
// Wait for session to be ready
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
// Configure Smart Mosaic layout with limit = 1
await configureLayoutMode(page, 'smart-mosaic', 1);
// Join participant A with continuous speech
await joinFakeParticipant(roomId, 'RemoteA-Continuous');
await waitForParticipantCount(page, 2);
// Join participant B with 1 second sound burst
await joinBrowserFakeParticipant(roomId, 'RemoteB-BriefSound', {
audioFile: 'brief_sound_1s_at_5s.wav' // 1s sound
});
// Wait 5s to allow B's speaks
await page.waitForTimeout(5000);
// Track visible participants over time to ensure no swap occurs
for (let i = 0; i < 5; i++) {
const visibleNames = await getVisibleParticipantNames(page);
// A should remain visible throughout
expect(visibleNames).not.toContain('RemoteB-BriefSound');
console.log(`Check ${i + 1}: Visible participants:`, visibleNames);
await page.waitForTimeout(500);
}
});
});
});

View File

@ -162,7 +162,8 @@ const getDefaultRoomConfig = (): MeetRoomConfig => ({
allowAccessTo: MeetRecordingAccess.ADMIN_MODERATOR_SPEAKER
},
chat: { enabled: true },
virtualBackground: { enabled: true }
virtualBackground: { enabled: true },
e2ee: { enabled: false }
});
// Helper function to create a room for testing
@ -616,3 +617,296 @@ export const isShareLinkOverlayyHidden = async (page: Page, overlaySelector: str
console.log('❌ Overlay is still visible');
return false;
};
// ==========================================
// SMART MOSAIC LAYOUT HELPER FUNCTIONS
// ==========================================
/**
* Opens the settings panel and configures the Smart Mosaic layout.
* This function:
* 1. Opens the more options menu
* 2. Clicks on the layout settings button
* 3. Selects the specified layout mode
* 4. If Smart Mosaic is selected, sets the max participants count
* 5. Closes the settings panel
*
* @param page - Playwright page object
* @param layoutMode - The layout mode to select ('mosaic' or 'smart-mosaic')
* @param maxParticipants - Maximum number of remote participants to show (1-6, only used for smart-mosaic)
*/
export const configureLayoutMode = async (
page: Page,
layoutMode: 'smart-mosaic' | 'mosaic',
maxParticipants?: number
): Promise<void> => {
console.log(
`🎛️ Configuring layout mode: ${layoutMode}${maxParticipants ? `, max participants: ${maxParticipants}` : ''}`
);
// Open more options menu
await openMoreOptionsMenu(page);
// Click on layout settings button (could be grid-layout-settings-btn or toolbar-settings-btn)
const layoutSettingsBtn = '#grid-layout-settings-btn, #toolbar-settings-btn';
await waitForElementInIframe(page, layoutSettingsBtn, { state: 'visible' });
await interactWithElementInIframe(page, layoutSettingsBtn, { action: 'click' });
// Wait for settings panel to open
await waitForElementInIframe(page, 'ov-settings-panel', { state: 'visible' });
await page.waitForTimeout(500); // Wait for panel animation
// Select the layout mode using radio buttons
const radioSelector = layoutMode === 'smart-mosaic' ? '#layout-smart-mosaic' : '#layout-mosaic';
await waitForElementInIframe(page, radioSelector, { state: 'visible' });
await interactWithElementInIframe(page, radioSelector, { action: 'click' });
await page.waitForTimeout(300); // Wait for mode change
// If Smart Mosaic is selected and maxParticipants is specified, set the slider value
if (layoutMode === 'smart-mosaic' && maxParticipants !== undefined) {
await setSmartMosaicParticipantCount(page, maxParticipants);
}
// Close the settings panel
await closeSettingsPanel(page);
console.log(
`✅ Layout configured: ${layoutMode}${maxParticipants ? ` with ${maxParticipants} max participants` : ''}`
);
};
/**
* Sets the participant count for Smart Mosaic layout using the slider.
* This function should be called when the settings panel is already open
* and Smart Mosaic mode is selected.
*
* @param page - Playwright page object
* @param count - Number of participants to show (1-6)
*/
export const setSmartMosaicParticipantCount = async (page: Page, count: number): Promise<void> => {
if (count < 1 || count > 6) {
throw new Error(`Invalid participant count: ${count}. Must be between 1 and 6.`);
}
console.log(`🔢 Setting Smart Mosaic participant count to: ${count}`);
// Wait for the slider to be visible (only appears in Smart Mosaic mode)
const sliderSelector = '.participant-slider input[matSliderThumb]';
await waitForElementInIframe(page, sliderSelector, { state: 'visible', timeout: 5000 });
// Get the slider element and set its value
const frameLocator = await getIframeInShadowDom(page);
const slider = frameLocator.locator(sliderSelector);
// Use keyboard to set the value - first focus, then set value via fill
await slider.focus();
// Clear and set the value - the slider input accepts direct value assignment
await slider.fill(count.toString());
await page.waitForTimeout(300); // Wait for value to be applied
console.log(`✅ Smart Mosaic participant count set to: ${count}`);
};
/**
* Closes the settings panel by clicking the close button or clicking outside
*
* @param page - Playwright page object
*/
export const closeSettingsPanel = async (page: Page): Promise<void> => {
// Try to close via the panel close button
const closeButtonSelector = '.panel-close-button';
try {
await waitForElementInIframe(page, closeButtonSelector, { state: 'visible', timeout: 2000 });
await interactWithElementInIframe(page, closeButtonSelector, { action: 'click' });
} catch {
// If close button not found, click outside the panel
await interactWithElementInIframe(page, 'body', { action: 'click' });
}
await page.waitForTimeout(500); // Wait for panel to close
};
export const muteAudio = async (page: Page) => {
await interactWithElementInIframe(page, '#mic-btn', { action: 'click' });
await page.waitForTimeout(500); // Wait for action to complete
};
/**
* Gets the number of visible participant tiles in the video grid.
* This counts all participant containers currently displayed.
*
* @param page - Playwright page object
* @returns Number of visible participant tiles
*/
export const getVisibleParticipantsCount = async (page: Page): Promise<number> => {
const participantSelector = '.OV_publisher';
const count = await countElementsInIframe(page, participantSelector);
console.log(`👥 Visible participants in grid: ${count}`);
return count;
};
/**
* Gets the identities of all visible participants in the grid.
*
* @param page - Playwright page object
* @returns Array of participant names/identities visible in the grid
*/
export const getVisibleParticipantNames = async (page: Page): Promise<string[]> => {
const frameLocator = await getIframeInShadowDom(page);
const participantContainers = frameLocator.locator('.participant-name-container');
const count = await participantContainers.count();
const names: string[] = [];
for (let i = 0; i < count; i++) {
const container = participantContainers.nth(i);
const participantName = await container.textContent();
if (participantName) {
names.push(participantName.trim());
}
}
console.log(`👥 Visible participant names: ${names.join(', ')}`);
return names;
};
/**
* Waits for the participant grid to show a specific number of participants.
*
* @param page - Playwright page object
* @param expectedCount - Expected number of visible participants
* @param timeout - Maximum time to wait in milliseconds (default: 10000)
* @returns true if the expected count is reached, false if timeout
*/
export const waitForParticipantCount = async (
page: Page,
expectedCount: number,
timeout: number = 10000
): Promise<boolean> => {
const startTime = Date.now();
while (Date.now() - startTime < timeout) {
const currentCount = await getVisibleParticipantsCount(page);
if (currentCount === expectedCount) {
console.log(`✅ Participant count reached: ${expectedCount}`);
return true;
}
await page.waitForTimeout(500);
}
const finalCount = await getVisibleParticipantsCount(page);
console.log(`❌ Timeout waiting for participant count. Expected: ${expectedCount}, Got: ${finalCount}`);
return false;
};
/**
* Waits for a specific participant to become visible in the grid.
* Uses polling to check if the participant's name appears in the visible participants list.
*
* @param page - Playwright page object
* @param participantName - The name/identity of the participant to wait for
* @param timeout - Maximum time to wait in milliseconds (default: 30000)
* @returns true if the participant becomes visible, throws error if timeout
*/
export const waitForParticipantVisible = async (
page: Page,
participantName: string,
timeout: number = 30000
): Promise<boolean> => {
console.log(`⏳ Waiting for participant "${participantName}" to become visible...`);
const startTime = Date.now();
while (Date.now() - startTime < timeout) {
const visibleNames = await getVisibleParticipantNames(page);
if (visibleNames.includes(participantName)) {
console.log(`✅ Participant "${participantName}" is now visible`);
return true;
}
await page.waitForTimeout(500);
}
const finalNames = await getVisibleParticipantNames(page);
throw new Error(
`Timeout waiting for participant "${participantName}" to become visible. ` +
`Current visible: [${finalNames.join(', ')}]`
);
};
/**
* Waits for a specific participant to become hidden (not visible) in the grid.
* Uses polling to check if the participant's name disappears from the visible participants list.
*
* @param page - Playwright page object
* @param participantName - The name/identity of the participant to wait for hiding
* @param timeout - Maximum time to wait in milliseconds (default: 30000)
* @returns true if the participant becomes hidden, throws error if timeout
*/
export const waitForParticipantHidden = async (
page: Page,
participantName: string,
timeout: number = 30000
): Promise<boolean> => {
console.log(`⏳ Waiting for participant "${participantName}" to become hidden...`);
const startTime = Date.now();
while (Date.now() - startTime < timeout) {
const visibleNames = await getVisibleParticipantNames(page);
if (!visibleNames.includes(participantName)) {
console.log(`✅ Participant "${participantName}" is now hidden`);
return true;
}
await page.waitForTimeout(500);
}
const finalNames = await getVisibleParticipantNames(page);
throw new Error(
`Timeout waiting for participant "${participantName}" to become hidden. ` +
`Current visible: [${finalNames.join(', ')}]`
);
};
/**
* Waits for a layout change where one participant replaces another.
* Useful for testing Smart Mosaic speaker rotation.
*
* @param page - Playwright page object
* @param participantToAppear - The participant that should become visible
* @param participantToDisappear - The participant that should become hidden
* @param timeout - Maximum time to wait in milliseconds (default: 30000)
* @returns true if the swap happens, throws error if timeout
*/
export const waitForParticipantSwap = async (
page: Page,
participantToAppear: string,
participantToDisappear: string,
timeout: number = 30000
): Promise<boolean> => {
console.log(`⏳ Waiting for swap: "${participantToAppear}" replaces "${participantToDisappear}"...`);
const startTime = Date.now();
while (Date.now() - startTime < timeout) {
const visibleNames = await getVisibleParticipantNames(page);
const newIsVisible = visibleNames.includes(participantToAppear);
const oldIsHidden = !visibleNames.includes(participantToDisappear);
if (newIsVisible && oldIsHidden) {
console.log(`✅ Swap complete: "${participantToAppear}" replaced "${participantToDisappear}"`);
return true;
}
await page.waitForTimeout(500);
}
const finalNames = await getVisibleParticipantNames(page);
throw new Error(
`Timeout waiting for participant swap. Expected "${participantToAppear}" to replace "${participantToDisappear}". ` +
`Current visible: [${finalNames.join(', ')}]`
);
};
/**
* Helper function to sleep for a specified time
*/
export const sleep = (ms: number): Promise<void> => {
return new Promise((resolve) => setTimeout(resolve, ms));
};

View File

@ -0,0 +1,283 @@
import { BrowserContext, chromium, Page } from '@playwright/test';
import { BrowserFakeParticipantOptions } from '../interfaces/fake-participant';
import { ChildProcess, spawn } from 'child_process';
import { fileURLToPath } from 'url';
import * as fs from 'fs';
import * as path from 'path';
import { joinRoomAs, leaveRoom, prepareForJoiningRoom, sleep, waitForElementInIframe } from './function-helpers';
import { MEET_TESTAPP_URL } from '../config';
// LiveKit credentials
const LIVEKIT_API_KEY = process.env['LIVEKIT_API_KEY'] || 'devkey';
const LIVEKIT_API_SECRET = process.env['LIVEKIT_API_SECRET'] || 'secret';
// Store fake participant processes for cleanup
const fakeParticipantProcesses = new Map<string, ChildProcess>();
// ES Module equivalent of __dirname
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// ==========================================
// FAKE PARTICIPANT HELPER FUNCTIONS
// ==========================================
/**
* Path to the test audio assets directory
* Uses __dirname to resolve relative to this file's location
*/
const AUDIO_ASSETS_DIR = path.resolve(__dirname, '../assets/audio');
/**
* Joins a fake participant to a LiveKit room using the lk CLI.
* This participant can publish audio to trigger speaker detection.
*
* @param roomId - The room ID to join
* @param identity - The participant identity/name
* @param options - Options for publishing media
*/
export const joinFakeParticipant = async (roomId: string, identity: string): Promise<void> => {
console.log(`🤖 Joining fake participant: ${identity} to room: ${roomId}`);
const process = spawn('lk', [
'room',
'join',
roomId,
'--identity',
identity,
'--publish-demo',
'--api-key',
LIVEKIT_API_KEY,
'--api-secret',
LIVEKIT_API_SECRET
]);
// Store process for cleanup
fakeParticipantProcesses.set(`${roomId}-${identity}`, process);
// Wait for participant to join
await sleep(1500);
console.log(`✅ Fake participant joined: ${identity}`);
};
/**
* Disconnects a specific fake participant from the room.
*
* @param roomId - The room ID
* @param identity - The participant identity to disconnect
*/
export const disconnectFakeParticipant = async (roomId: string, identity: string): Promise<void> => {
const key = `${roomId}-${identity}`;
const process = fakeParticipantProcesses.get(key);
if (process) {
process.kill();
fakeParticipantProcesses.delete(key);
console.log(`👋 Disconnected fake participant: ${identity}`);
await sleep(500);
}
};
/**
* Disconnects all fake participants from all rooms.
* Should be called in afterEach or afterAll hooks.
*/
export const disconnectAllFakeParticipants = async (): Promise<void> => {
for (const [key, process] of fakeParticipantProcesses) {
process.kill();
}
fakeParticipantProcesses.clear();
await sleep(500);
};
// ==========================================
// BROWSER-BASED FAKE PARTICIPANT HELPERS
// ==========================================
// These functions use Playwright browser tabs with fake audio devices
// to create participants that properly trigger LiveKit's VAD
/**
* Store for browser-based fake participant contexts
* Each participant gets its own browser context with specific Chrome args
*/
const browserFakeParticipants = new Map<string, { context: BrowserContext; page: Page }>();
/**
* Joins a fake participant to a room using a new browser instance with fake audio device.
* This method properly triggers LiveKit's Voice Activity Detection (VAD) because
* it uses Chrome's --use-file-for-fake-audio-capture flag.
*
* IMPORTANT: The audio file should be in WAV format for best compatibility with Chrome.
* Chrome's fake audio capture works best with uncompressed audio.
*
* @param roomId - The room ID to join
* @param identity - The participant identity/name
* @param options - Options for the fake participant
* @returns The page object for the fake participant (for further interactions)
*
* @example
* ```typescript
* const participantPage = await joinBrowserFakeParticipant(
* browser,
* roomId,
* 'RemoteA-Speaker',
* { audioFile: 'continuous_speech.wav' }
* );
* ```
*/
export const joinBrowserFakeParticipant = async (
roomId: string,
identity: string,
options: BrowserFakeParticipantOptions = {}
): Promise<Page> => {
console.log(`🌐 Joining browser-based fake participant: ${identity} to room: ${roomId}`);
const { audioFile, videoFile, displayName = identity, enableVideo = true, enableAudio = true } = options;
// Video assets directory (sibling to audio assets)
const VIDEO_ASSETS_DIR = path.resolve(path.dirname(AUDIO_ASSETS_DIR), 'video');
// Resolve audio file path
let audioFilePath: string | undefined;
if (audioFile) {
audioFilePath = path.isAbsolute(audioFile) ? audioFile : path.resolve(AUDIO_ASSETS_DIR, audioFile);
if (!fs.existsSync(audioFilePath)) {
throw new Error(`Audio file not found: ${audioFilePath}`);
}
console.log(` 🎵 Using audio file: ${audioFilePath}`);
}
// Resolve video file path
let videoFilePath: string | undefined;
if (videoFile) {
videoFilePath = path.isAbsolute(videoFile) ? videoFile : path.resolve(VIDEO_ASSETS_DIR, videoFile);
if (!fs.existsSync(videoFilePath)) {
throw new Error(`Video file not found: ${videoFilePath}`);
}
console.log(` 🎬 Using video file: ${videoFilePath}`);
}
// Chrome flags for fake media devices
const chromeArgs = [
'--use-fake-ui-for-media-stream', // Auto-accept media permissions
'--use-fake-device-for-media-stream', // Use fake devices
'--allow-file-access-from-files',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-gpu',
'--disable-dev-shm-usage'
];
// Add fake audio capture file if specified
if (audioFilePath) {
chromeArgs.push(`--use-file-for-fake-audio-capture=${audioFilePath}`);
}
// Add fake video capture file if specified
// Chrome supports Y4M (YUV4MPEG2) and MJPEG formats for fake video capture
if (videoFilePath) {
// chromeArgs.push(`--use-file-for-fake-video-capture=${videoFilePath}`);
}
console.log(` 🔧 Chrome args: ${chromeArgs.join(' ')}`);
// Launch a new browser context with the specific Chrome args
// We need to use launchPersistentContext to pass Chrome args
const userDataDir = `/tmp/playwright-fake-participant-${identity}-${Date.now()}`;
const context = await chromium.launchPersistentContext(userDataDir, {
headless: true, // Set to false for debugging
args: chromeArgs,
ignoreHTTPSErrors: true,
bypassCSP: true
});
// Get the first page or create one
const page = context.pages()[0] || (await context.newPage());
// Store for cleanup
const key = `${roomId}-${identity}`;
browserFakeParticipants.set(key, { context, page });
// Handle the lobby/prejoin if present - click join button
try {
await prepareForJoiningRoom(page, MEET_TESTAPP_URL, roomId);
await joinRoomAs('speaker', identity, page);
await waitForElementInIframe(page, 'ov-session', { state: 'visible' });
await waitForElementInIframe(page, '.OV_publisher', { state: 'visible', timeout: 10000 });
} catch (e) {
console.log(` ⚠️ No lobby found or already in room for ${identity}: ${e}`);
}
return page;
};
/**
* Disconnects a browser-based fake participant from the room.
*
* @param roomId - The room ID
* @param identity - The participant identity to disconnect
*/
export const disconnectBrowserFakeParticipant = async (roomId: string, identity: string): Promise<void> => {
const key = `${roomId}-${identity}`;
const participant = browserFakeParticipants.get(key);
if (participant) {
try {
await leaveRoom(participant.page);
await participant.page.close();
} catch (e) {
/* ignore */
}
try {
await participant.context.close();
} catch (e) {
/* ignore */
}
browserFakeParticipants.delete(key);
console.log(`👋 Disconnected browser fake participant: ${identity}`);
}
};
/**
* Disconnects all browser-based fake participants.
* Should be called in afterEach or afterAll hooks.
*/
export const disconnectAllBrowserFakeParticipants = async (): Promise<void> => {
const keys = Array.from(browserFakeParticipants.keys());
for (const key of keys) {
const participant = browserFakeParticipants.get(key);
if (participant) {
try {
await participant.page.close();
} catch (e) {
/* ignore */
}
try {
await participant.context.close();
} catch (e) {
/* ignore */
}
}
}
browserFakeParticipants.clear();
if (keys.length > 0) {
console.log(`👋 Disconnected all browser fake participants (${keys.length})`);
}
};
/**
* Gets the page object for a browser-based fake participant.
* Useful for interacting with the participant's UI (mute/unmute, etc.)
*
* @param roomId - The room ID
* @param identity - The participant identity
* @returns The Page object or undefined if not found
*/
export const getBrowserFakeParticipantPage = (roomId: string, identity: string): Page | undefined => {
const key = `${roomId}-${identity}`;
return browserFakeParticipants.get(key)?.page;
};

View File

@ -0,0 +1,15 @@
/**
* Options for joining a browser-based fake participant
*/
export interface BrowserFakeParticipantOptions {
/** Path to audio file (relative to assets/audio or absolute) - WAV format recommended */
audioFile?: string;
/** Path to video file (relative to assets/video or absolute) - Y4M or MJPEG format recommended for Chrome */
videoFile?: string;
/** Participant display name */
displayName?: string;
/** Whether to enable video (default: true) */
enableVideo?: boolean;
/** Whether to enable audio (default: true) */
enableAudio?: boolean;
}