feat(comments): add YouTube video comments extraction tools

Add two new MCP tools for extracting video comments:
- ytdlp_get_video_comments: Extract comments as structured JSON with
  author info, likes, timestamps, and reply threading
- ytdlp_get_video_comments_summary: Get human-readable summary of top comments

Features:
- Support for sorting by "top" (most liked) or "new" (newest first)
- Configurable comment limit (1-100 comments)
- Includes author verification status, pinned comments, and uploader replies
- Comprehensive error handling for disabled comments, private videos, etc.
- Comprehensive test suite
This commit is contained in:
Peter Keffer 2025-12-21 11:59:11 +00:00 committed by kevinwatt
parent d7f5ec0f62
commit 2e2888cccc
4 changed files with 592 additions and 0 deletions

View File

@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- **Video Comments Extraction**: New tools for extracting YouTube video comments
- `ytdlp_get_video_comments`: Extract comments in structured JSON format with author info, likes, timestamps, and reply threading
- `ytdlp_get_video_comments_summary`: Get human-readable summary of top comments
- Supports sorting by "top" (most liked) or "new" (newest first)
- Configurable comment limit (1-100 comments)
- Includes author verification status, pinned comments, and uploader replies
- Comprehensive test suite for comments functionality
### Changed
- Add Claude Code settings (.claude/, CLAUDE.md) to .gitignore
- Add development guideline to always update CHANGELOG.md

View File

@ -0,0 +1,179 @@
// @ts-nocheck
// @jest-environment node
import { describe, test, expect, beforeAll } from '@jest/globals';
import { getVideoComments, getVideoCommentsSummary } from '../modules/comments.js';
import type { CommentsResponse } from '../modules/comments.js';
import { CONFIG } from '../config.js';
// Set Python environment
process.env.PYTHONPATH = '';
process.env.PYTHONHOME = '';
describe('Video Comments Extraction', () => {
// Using a popular video that should have comments enabled
const testUrl = 'https://www.youtube.com/watch?v=jNQXAC9IVRw';
describe('getVideoComments', () => {
test('should extract comments from YouTube video', async () => {
const commentsJson = await getVideoComments(testUrl, 5, 'top', CONFIG);
const data: CommentsResponse = JSON.parse(commentsJson);
// Verify response structure
expect(data).toHaveProperty('count');
expect(data).toHaveProperty('has_more');
expect(data).toHaveProperty('comments');
expect(Array.isArray(data.comments)).toBe(true);
expect(data.count).toBeGreaterThan(0);
expect(data.count).toBeLessThanOrEqual(5);
}, 60000);
test('should return comments with expected fields', async () => {
const commentsJson = await getVideoComments(testUrl, 3, 'top', CONFIG);
const data: CommentsResponse = JSON.parse(commentsJson);
if (data.comments.length > 0) {
const comment = data.comments[0];
// These fields should typically be present
expect(comment).toHaveProperty('text');
expect(comment).toHaveProperty('author');
// Verify text is a string
if (comment.text !== undefined) {
expect(typeof comment.text).toBe('string');
}
if (comment.author !== undefined) {
expect(typeof comment.author).toBe('string');
}
}
}, 60000);
test('should respect maxComments parameter', async () => {
const commentsJson = await getVideoComments(testUrl, 3, 'top', CONFIG);
const data: CommentsResponse = JSON.parse(commentsJson);
expect(data.comments.length).toBeLessThanOrEqual(3);
}, 60000);
test('should support different sort orders', async () => {
// Just verify both sort orders work without error
const topComments = await getVideoComments(testUrl, 2, 'top', CONFIG);
const topData: CommentsResponse = JSON.parse(topComments);
expect(topData).toHaveProperty('comments');
const newComments = await getVideoComments(testUrl, 2, 'new', CONFIG);
const newData: CommentsResponse = JSON.parse(newComments);
expect(newData).toHaveProperty('comments');
}, 90000);
test('should throw error for invalid URL', async () => {
await expect(getVideoComments('invalid-url', 5, 'top', CONFIG)).rejects.toThrow();
});
test('should throw error for unsupported URL', async () => {
await expect(getVideoComments('https://example.com/video', 5, 'top', CONFIG)).rejects.toThrow();
}, 30000);
});
describe('getVideoCommentsSummary', () => {
test('should generate human-readable summary', async () => {
const summary = await getVideoCommentsSummary(testUrl, 5, CONFIG);
expect(typeof summary).toBe('string');
expect(summary.length).toBeGreaterThan(0);
// Should contain header
expect(summary).toContain('Video Comments');
// Should have formatted content
expect(summary).toContain('Author:');
}, 60000);
test('should respect maxComments parameter', async () => {
const summary = await getVideoCommentsSummary(testUrl, 3, CONFIG);
// Count occurrences of "Author:" to verify number of comments
const authorMatches = summary.match(/Author:/g);
if (authorMatches) {
expect(authorMatches.length).toBeLessThanOrEqual(3);
}
}, 60000);
test('should throw error for invalid URL', async () => {
await expect(getVideoCommentsSummary('invalid-url', 5, CONFIG)).rejects.toThrow();
});
test('should handle videos with different comment counts', async () => {
const summary = await getVideoCommentsSummary(testUrl, 10, CONFIG);
// Summary should be a valid string
expect(typeof summary).toBe('string');
expect(summary.trim().length).toBeGreaterThan(0);
}, 60000);
});
describe('Error Handling', () => {
test('should provide helpful error message for unavailable video', async () => {
const unavailableUrl = 'https://www.youtube.com/watch?v=invalid_video_id_xyz123';
await expect(getVideoComments(unavailableUrl, 5, 'top', CONFIG)).rejects.toThrow();
}, 30000);
test('should handle unsupported URLs gracefully', async () => {
const unsupportedUrl = 'https://example.com/not-a-video';
await expect(getVideoComments(unsupportedUrl, 5, 'top', CONFIG)).rejects.toThrow();
}, 30000);
});
describe('Comment Fields', () => {
test('should include author information when available', async () => {
const commentsJson = await getVideoComments(testUrl, 5, 'top', CONFIG);
const data: CommentsResponse = JSON.parse(commentsJson);
if (data.comments.length > 0) {
const comment = data.comments[0];
// Author fields
if (comment.author !== undefined) {
expect(typeof comment.author).toBe('string');
}
if (comment.author_id !== undefined) {
expect(typeof comment.author_id).toBe('string');
}
}
}, 60000);
test('should include engagement metrics when available', async () => {
const commentsJson = await getVideoComments(testUrl, 5, 'top', CONFIG);
const data: CommentsResponse = JSON.parse(commentsJson);
if (data.comments.length > 0) {
// At least one top comment should have like_count
const hasLikes = data.comments.some(c =>
c.like_count !== undefined && typeof c.like_count === 'number'
);
// This is optional - some comments may not have likes
expect(hasLikes || data.comments.length > 0).toBe(true);
}
}, 60000);
test('should handle boolean flags correctly', async () => {
const commentsJson = await getVideoComments(testUrl, 10, 'top', CONFIG);
const data: CommentsResponse = JSON.parse(commentsJson);
for (const comment of data.comments) {
// Boolean flags should be boolean or undefined
if (comment.is_pinned !== undefined) {
expect(typeof comment.is_pinned).toBe('boolean');
}
if (comment.author_is_uploader !== undefined) {
expect(typeof comment.author_is_uploader).toBe('boolean');
}
if (comment.author_is_verified !== undefined) {
expect(typeof comment.author_is_verified).toBe('boolean');
}
}
}, 60000);
});
});

View File

@ -19,6 +19,7 @@ import { downloadAudio } from "./modules/audio.js";
import { listSubtitles, downloadSubtitles, downloadTranscript } from "./modules/subtitle.js";
import { searchVideos } from "./modules/search.js";
import { getVideoMetadata, getVideoMetadataSummary } from "./modules/metadata.js";
import { getVideoComments, getVideoCommentsSummary } from "./modules/comments.js";
const VERSION = '0.7.0';
@ -114,6 +115,33 @@ const GetVideoMetadataSummarySchema = z.object({
.describe("URL of the video"),
}).strict();
const GetVideoCommentsSchema = z.object({
url: z.string()
.url("Must be a valid URL")
.describe("URL of the video"),
maxComments: z.coerce.number()
.int("Must be a whole number")
.min(1, "Must return at least 1 comment")
.max(100, "Cannot exceed 100 comments")
.default(20)
.describe("Maximum number of comments to retrieve (1-100, default: 20)"),
sortOrder: z.enum(["top", "new"])
.default("top")
.describe("Sort order: 'top' for most liked, 'new' for newest (default: 'top')"),
}).strict();
const GetVideoCommentsSummarySchema = z.object({
url: z.string()
.url("Must be a valid URL")
.describe("URL of the video"),
maxComments: z.coerce.number()
.int("Must be a whole number")
.min(1, "Must return at least 1 comment")
.max(50, "Cannot exceed 50 comments for summary")
.default(10)
.describe("Maximum number of comments to include in summary (1-50, default: 10)"),
}).strict();
/**
* Validate system configuration
* @throws {Error} when configuration is invalid
@ -448,6 +476,85 @@ Error Handling:
openWorldHint: true
}
},
{
name: "ytdlp_get_video_comments",
description: `Extract comments from a video in JSON format.
This tool retrieves comments from videos (primarily YouTube) using yt-dlp's comment extraction feature. Returns structured comment data including author info, likes, and timestamps.
Args:
- url (string): Full video URL
- maxComments (number): Maximum comments to retrieve (1-100, default: 20)
- sortOrder (enum): 'top' for most liked comments, 'new' for newest (default: 'top')
Returns:
JSON object with:
- count: Number of comments returned
- has_more: Whether more comments are available
- comments: Array of comment objects containing:
- id: Comment identifier
- text: Comment content
- author: Author name
- author_id: Author channel ID
- author_is_uploader: Whether author is video creator
- author_is_verified: Whether author is verified
- like_count: Number of likes
- is_pinned: Whether comment is pinned
- parent: Parent comment ID (for replies)
- timestamp: Unix timestamp
- time_text: Human-readable time (e.g., "2 days ago")
Use when: You need structured comment data for analysis or display
Don't use when: You want a quick readable overview (use ytdlp_get_video_comments_summary)
Note: Comment extraction is primarily supported for YouTube. Other platforms may have limited support.
Error Handling:
- "Video is unavailable or private" for inaccessible content
- "Comments are disabled" for videos with comments turned off
- "Requires authentication" for age-restricted content (configure cookies)
- "Unsupported platform" for non-YouTube URLs`,
inputSchema: GetVideoCommentsSchema,
annotations: {
readOnlyHint: true,
destructiveHint: false,
idempotentHint: true,
openWorldHint: true
}
},
{
name: "ytdlp_get_video_comments_summary",
description: `Get a human-readable summary of video comments.
This tool extracts comments and formats them into an easy-to-read summary. Perfect for quick overview of audience reactions and popular comments.
Args:
- url (string): Full video URL
- maxComments (number): Maximum comments to include (1-50, default: 10)
Returns:
Formatted text summary with:
- Comment author with indicators ([UPLOADER], [VERIFIED], [PINNED])
- Time posted (e.g., "2 days ago")
- Like count
- Comment text (truncated to 300 chars if longer)
- Reply indicators
Use when: You want a quick, readable overview of video comments
Don't use when: You need complete structured data (use ytdlp_get_video_comments)
Note: Comments are sorted by "top" (most liked) by default.
Error Handling:
- Same as ytdlp_get_video_comments (unavailable videos, disabled comments, authentication required)`,
inputSchema: GetVideoCommentsSummarySchema,
annotations: {
readOnlyHint: true,
destructiveHint: false,
idempotentHint: true,
openWorldHint: true
}
},
],
};
});
@ -493,6 +600,8 @@ server.setRequestHandler(
endTime?: string;
query?: string;
maxResults?: number;
maxComments?: number;
sortOrder?: "top" | "new";
fields?: string[];
};
@ -552,6 +661,18 @@ server.setRequestHandler(
() => getVideoMetadataSummary(validated.url, CONFIG),
"Error generating video metadata summary"
);
} else if (toolName === "ytdlp_get_video_comments") {
const validated = GetVideoCommentsSchema.parse(args);
return handleToolExecution(
() => getVideoComments(validated.url, validated.maxComments, validated.sortOrder, CONFIG),
"Error extracting video comments"
);
} else if (toolName === "ytdlp_get_video_comments_summary") {
const validated = GetVideoCommentsSummarySchema.parse(args);
return handleToolExecution(
() => getVideoCommentsSummary(validated.url, validated.maxComments, CONFIG),
"Error generating video comments summary"
);
} else {
return {
content: [{ type: "text", text: `Unknown tool: ${toolName}` }],

283
src/modules/comments.ts Normal file
View File

@ -0,0 +1,283 @@
import type { Config } from "../config.js";
import { getCookieArgs } from "../config.js";
import {
_spawnPromise,
validateUrl
} from "./utils.js";
/**
* Represents a single comment on a video
*/
export interface Comment {
/** Unique comment identifier */
id?: string;
/** Comment text content */
text?: string;
/** Comment author name */
author?: string;
/** Comment author channel ID */
author_id?: string;
/** Comment author channel URL */
author_url?: string;
/** Whether the author is the video uploader */
author_is_uploader?: boolean;
/** Whether author is verified */
author_is_verified?: boolean;
/** Comment like count */
like_count?: number;
/** Whether comment is pinned */
is_pinned?: boolean;
/** Whether comment is marked as favorite by uploader */
is_favorited?: boolean;
/** Parent comment ID (for replies) */
parent?: string;
/** Unix timestamp of comment */
timestamp?: number;
/** Human-readable time ago string */
time_text?: string;
/** Additional fields that might be present */
[key: string]: unknown;
}
/**
* Response structure for video comments
*/
export interface CommentsResponse {
/** Total number of comments returned */
count: number;
/** Whether there are more comments available */
has_more: boolean;
/** Array of comment objects */
comments: Comment[];
/** Truncation indicator */
_truncated?: boolean;
/** Truncation message */
_message?: string;
}
/**
* Sort order for comments
*/
export type CommentSortOrder = "top" | "new";
/**
* Extract video comments using yt-dlp.
* Uses yt-dlp's --write-comments and --dump-json flags to get comments.
*
* @param url - The URL of the video to extract comments from
* @param maxComments - Maximum number of comments to retrieve (default: 20)
* @param sortOrder - Sort order: "top" for most liked, "new" for newest (default: "top")
* @param config - Configuration object
* @returns Promise resolving to JSON string with comments data
* @throws {Error} When URL is invalid or comment extraction fails
*
* @example
* ```typescript
* // Get top 20 comments
* const comments = await getVideoComments('https://youtube.com/watch?v=...');
* console.log(comments);
*
* // Get newest 50 comments
* const newComments = await getVideoComments(
* 'https://youtube.com/watch?v=...',
* 50,
* 'new'
* );
* ```
*/
export async function getVideoComments(
url: string,
maxComments: number = 20,
sortOrder: CommentSortOrder = "top",
_config?: Config
): Promise<string> {
// Validate the URL
if (!validateUrl(url)) {
throw new Error("Invalid or unsupported URL format");
}
const args = [
"--dump-json",
"--no-warnings",
"--no-check-certificate",
"--write-comments",
"--extractor-args", `youtube:comment_sort=${sortOrder};max_comments=${maxComments},all,all`,
"--skip-download",
...(_config ? getCookieArgs(_config) : []),
url
];
try {
// Execute yt-dlp to get metadata with comments
const output = await _spawnPromise("yt-dlp", args);
// Parse the JSON output
const metadata = JSON.parse(output);
// Extract comments from metadata
const rawComments: Comment[] = metadata.comments || [];
// Limit to maxComments
const comments = rawComments.slice(0, maxComments);
// Build response
const response: CommentsResponse = {
count: comments.length,
has_more: rawComments.length > maxComments,
comments: comments.map(comment => ({
id: comment.id,
text: comment.text,
author: comment.author,
author_id: comment.author_id,
author_url: comment.author_url,
author_is_uploader: comment.author_is_uploader,
author_is_verified: comment.author_is_verified,
like_count: comment.like_count,
is_pinned: comment.is_pinned,
is_favorited: comment.is_favorited,
parent: comment.parent,
timestamp: comment.timestamp,
time_text: comment.time_text
}))
};
let result = JSON.stringify(response, null, 2);
// Check character limit
if (_config && result.length > _config.limits.characterLimit) {
// Reduce comments to fit within limit
let truncatedComments = [...response.comments];
while (result.length > _config.limits.characterLimit && truncatedComments.length > 1) {
truncatedComments = truncatedComments.slice(0, -1);
const truncatedResponse: CommentsResponse = {
count: truncatedComments.length,
has_more: true,
comments: truncatedComments,
_truncated: true,
_message: `Response truncated to ${truncatedComments.length} comments due to size limits. Use smaller maxComments value.`
};
result = JSON.stringify(truncatedResponse, null, 2);
}
}
return result;
} catch (error) {
if (error instanceof Error) {
// Handle common yt-dlp errors with actionable messages
if (error.message.includes("Video unavailable") || error.message.includes("private")) {
throw new Error(`Video is unavailable or private: ${url}. Check the URL and video privacy settings.`);
} else if (error.message.includes("Unsupported URL") || error.message.includes("extractor")) {
throw new Error(`Unsupported platform or video URL: ${url}. Comments extraction is primarily supported for YouTube.`);
} else if (error.message.includes("network") || error.message.includes("Connection")) {
throw new Error("Network error while extracting comments. Check your internet connection and retry.");
} else if (error.message.includes("comments are disabled") || error.message.includes("Comments are turned off")) {
throw new Error(`Comments are disabled for this video: ${url}`);
} else if (error.message.includes("Sign in") || error.message.includes("age")) {
throw new Error(`This video requires authentication to view comments. Configure cookies in your settings.`);
} else {
throw new Error(`Failed to extract video comments: ${error.message}. Verify the URL is correct.`);
}
}
throw new Error(`Failed to extract video comments from ${url}`);
}
}
/**
* Get a human-readable summary of video comments.
* This is useful for quick overview without overwhelming JSON output.
*
* @param url - The URL of the video to extract comments from
* @param maxComments - Maximum number of comments to include (default: 10)
* @param config - Configuration object
* @returns Promise resolving to a formatted summary string
* @throws {Error} When URL is invalid or comment extraction fails
*
* @example
* ```typescript
* const summary = await getVideoCommentsSummary('https://youtube.com/watch?v=...');
* console.log(summary);
* // Output:
* // Video Comments (10 shown)
* // ─────────────────────────
* //
* // 👤 John Doe (2 days ago) ❤️ 1,234 likes
* // This is an awesome video!
* //
* // 👤 Jane Smith (1 week ago) ❤️ 567 likes
* // Great content, keep it up!
* ```
*/
export async function getVideoCommentsSummary(
url: string,
maxComments: number = 10,
_config?: Config
): Promise<string> {
try {
// Get the comments
const commentsJson = await getVideoComments(url, maxComments, "top", _config);
const data: CommentsResponse = JSON.parse(commentsJson);
// Format comments into a readable summary
const lines: string[] = [];
lines.push(`Video Comments (${data.count} shown)`);
lines.push('─'.repeat(30));
lines.push('');
for (const comment of data.comments) {
// Build author line with indicators
let authorLine = `Author: ${comment.author || 'Unknown'}`;
if (comment.author_is_uploader) {
authorLine += ' [UPLOADER]';
}
if (comment.author_is_verified) {
authorLine += ' [VERIFIED]';
}
if (comment.is_pinned) {
authorLine += ' [PINNED]';
}
// Time info
if (comment.time_text) {
authorLine += ` (${comment.time_text})`;
}
// Likes
if (comment.like_count !== undefined && comment.like_count > 0) {
authorLine += ` - ${comment.like_count.toLocaleString()} likes`;
}
lines.push(authorLine);
// Comment text (truncate if too long)
if (comment.text) {
const text = comment.text.length > 300
? comment.text.substring(0, 300) + '...'
: comment.text;
lines.push(text);
}
// Note if this is a reply
if (comment.parent && comment.parent !== 'root') {
lines.push(`(Reply to comment ${comment.parent})`);
}
lines.push('');
}
if (data.has_more) {
lines.push('---');
lines.push('More comments available. Increase maxComments to see more.');
}
return lines.join('\n');
} catch (error) {
// Re-throw errors from getVideoComments with context
if (error instanceof Error) {
throw error;
}
throw new Error(`Failed to generate comments summary for ${url}`);
}
}