From 52067d40ea582dc9548f7fe8a23a3e58d23a3c9c Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 21 Dec 2025 11:59:11 +0000 Subject: [PATCH] feat(comments): add YouTube video comments extraction tools Add two new MCP tools for extracting video comments: - ytdlp_get_video_comments: Extract comments as structured JSON with author info, likes, timestamps, and reply threading - ytdlp_get_video_comments_summary: Get human-readable summary of top comments Features: - Support for sorting by "top" (most liked) or "new" (newest first) - Configurable comment limit (1-100 comments) - Includes author verification status, pinned comments, and uploader replies - Comprehensive error handling for disabled comments, private videos, etc. - Comprehensive test suite --- CHANGELOG.md | 9 ++ src/__tests__/comments.test.ts | 179 +++++++++++++++++++++ src/index.mts | 121 ++++++++++++++ src/modules/comments.ts | 283 +++++++++++++++++++++++++++++++++ 4 files changed, 592 insertions(+) create mode 100644 src/__tests__/comments.test.ts create mode 100644 src/modules/comments.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dad127..d0b4418 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- **Video Comments Extraction**: New tools for extracting YouTube video comments + - `ytdlp_get_video_comments`: Extract comments in structured JSON format with author info, likes, timestamps, and reply threading + - `ytdlp_get_video_comments_summary`: Get human-readable summary of top comments + - Supports sorting by "top" (most liked) or "new" (newest first) + - Configurable comment limit (1-100 comments) + - Includes author verification status, pinned comments, and uploader replies + - Comprehensive test suite for comments functionality + ### Changed - Add Claude Code settings (.claude/, CLAUDE.md) to .gitignore - Add development guideline to always update CHANGELOG.md diff --git a/src/__tests__/comments.test.ts b/src/__tests__/comments.test.ts new file mode 100644 index 0000000..6d9249a --- /dev/null +++ b/src/__tests__/comments.test.ts @@ -0,0 +1,179 @@ +// @ts-nocheck +// @jest-environment node +import { describe, test, expect, beforeAll } from '@jest/globals'; +import { getVideoComments, getVideoCommentsSummary } from '../modules/comments.js'; +import type { CommentsResponse } from '../modules/comments.js'; +import { CONFIG } from '../config.js'; + +// Set Python environment +process.env.PYTHONPATH = ''; +process.env.PYTHONHOME = ''; + +describe('Video Comments Extraction', () => { + // Using a popular video that should have comments enabled + const testUrl = 'https://www.youtube.com/watch?v=jNQXAC9IVRw'; + + describe('getVideoComments', () => { + test('should extract comments from YouTube video', async () => { + const commentsJson = await getVideoComments(testUrl, 5, 'top', CONFIG); + const data: CommentsResponse = JSON.parse(commentsJson); + + // Verify response structure + expect(data).toHaveProperty('count'); + expect(data).toHaveProperty('has_more'); + expect(data).toHaveProperty('comments'); + expect(Array.isArray(data.comments)).toBe(true); + expect(data.count).toBeGreaterThan(0); + expect(data.count).toBeLessThanOrEqual(5); + }, 60000); + + test('should return comments with expected fields', async () => { + const commentsJson = await getVideoComments(testUrl, 3, 'top', CONFIG); + const data: CommentsResponse = JSON.parse(commentsJson); + + if (data.comments.length > 0) { + const comment = data.comments[0]; + + // These fields should typically be present + expect(comment).toHaveProperty('text'); + expect(comment).toHaveProperty('author'); + + // Verify text is a string + if (comment.text !== undefined) { + expect(typeof comment.text).toBe('string'); + } + if (comment.author !== undefined) { + expect(typeof comment.author).toBe('string'); + } + } + }, 60000); + + test('should respect maxComments parameter', async () => { + const commentsJson = await getVideoComments(testUrl, 3, 'top', CONFIG); + const data: CommentsResponse = JSON.parse(commentsJson); + + expect(data.comments.length).toBeLessThanOrEqual(3); + }, 60000); + + test('should support different sort orders', async () => { + // Just verify both sort orders work without error + const topComments = await getVideoComments(testUrl, 2, 'top', CONFIG); + const topData: CommentsResponse = JSON.parse(topComments); + expect(topData).toHaveProperty('comments'); + + const newComments = await getVideoComments(testUrl, 2, 'new', CONFIG); + const newData: CommentsResponse = JSON.parse(newComments); + expect(newData).toHaveProperty('comments'); + }, 90000); + + test('should throw error for invalid URL', async () => { + await expect(getVideoComments('invalid-url', 5, 'top', CONFIG)).rejects.toThrow(); + }); + + test('should throw error for unsupported URL', async () => { + await expect(getVideoComments('https://example.com/video', 5, 'top', CONFIG)).rejects.toThrow(); + }, 30000); + }); + + describe('getVideoCommentsSummary', () => { + test('should generate human-readable summary', async () => { + const summary = await getVideoCommentsSummary(testUrl, 5, CONFIG); + + expect(typeof summary).toBe('string'); + expect(summary.length).toBeGreaterThan(0); + + // Should contain header + expect(summary).toContain('Video Comments'); + + // Should have formatted content + expect(summary).toContain('Author:'); + }, 60000); + + test('should respect maxComments parameter', async () => { + const summary = await getVideoCommentsSummary(testUrl, 3, CONFIG); + + // Count occurrences of "Author:" to verify number of comments + const authorMatches = summary.match(/Author:/g); + if (authorMatches) { + expect(authorMatches.length).toBeLessThanOrEqual(3); + } + }, 60000); + + test('should throw error for invalid URL', async () => { + await expect(getVideoCommentsSummary('invalid-url', 5, CONFIG)).rejects.toThrow(); + }); + + test('should handle videos with different comment counts', async () => { + const summary = await getVideoCommentsSummary(testUrl, 10, CONFIG); + + // Summary should be a valid string + expect(typeof summary).toBe('string'); + expect(summary.trim().length).toBeGreaterThan(0); + }, 60000); + }); + + describe('Error Handling', () => { + test('should provide helpful error message for unavailable video', async () => { + const unavailableUrl = 'https://www.youtube.com/watch?v=invalid_video_id_xyz123'; + + await expect(getVideoComments(unavailableUrl, 5, 'top', CONFIG)).rejects.toThrow(); + }, 30000); + + test('should handle unsupported URLs gracefully', async () => { + const unsupportedUrl = 'https://example.com/not-a-video'; + + await expect(getVideoComments(unsupportedUrl, 5, 'top', CONFIG)).rejects.toThrow(); + }, 30000); + }); + + describe('Comment Fields', () => { + test('should include author information when available', async () => { + const commentsJson = await getVideoComments(testUrl, 5, 'top', CONFIG); + const data: CommentsResponse = JSON.parse(commentsJson); + + if (data.comments.length > 0) { + const comment = data.comments[0]; + + // Author fields + if (comment.author !== undefined) { + expect(typeof comment.author).toBe('string'); + } + if (comment.author_id !== undefined) { + expect(typeof comment.author_id).toBe('string'); + } + } + }, 60000); + + test('should include engagement metrics when available', async () => { + const commentsJson = await getVideoComments(testUrl, 5, 'top', CONFIG); + const data: CommentsResponse = JSON.parse(commentsJson); + + if (data.comments.length > 0) { + // At least one top comment should have like_count + const hasLikes = data.comments.some(c => + c.like_count !== undefined && typeof c.like_count === 'number' + ); + // This is optional - some comments may not have likes + expect(hasLikes || data.comments.length > 0).toBe(true); + } + }, 60000); + + test('should handle boolean flags correctly', async () => { + const commentsJson = await getVideoComments(testUrl, 10, 'top', CONFIG); + const data: CommentsResponse = JSON.parse(commentsJson); + + for (const comment of data.comments) { + // Boolean flags should be boolean or undefined + if (comment.is_pinned !== undefined) { + expect(typeof comment.is_pinned).toBe('boolean'); + } + if (comment.author_is_uploader !== undefined) { + expect(typeof comment.author_is_uploader).toBe('boolean'); + } + if (comment.author_is_verified !== undefined) { + expect(typeof comment.author_is_verified).toBe('boolean'); + } + } + }, 60000); + }); +}); diff --git a/src/index.mts b/src/index.mts index 712fc2c..63f7e05 100644 --- a/src/index.mts +++ b/src/index.mts @@ -19,6 +19,7 @@ import { downloadAudio } from "./modules/audio.js"; import { listSubtitles, downloadSubtitles, downloadTranscript } from "./modules/subtitle.js"; import { searchVideos } from "./modules/search.js"; import { getVideoMetadata, getVideoMetadataSummary } from "./modules/metadata.js"; +import { getVideoComments, getVideoCommentsSummary } from "./modules/comments.js"; const VERSION = '0.7.0'; @@ -114,6 +115,33 @@ const GetVideoMetadataSummarySchema = z.object({ .describe("URL of the video"), }).strict(); +const GetVideoCommentsSchema = z.object({ + url: z.string() + .url("Must be a valid URL") + .describe("URL of the video"), + maxComments: z.coerce.number() + .int("Must be a whole number") + .min(1, "Must return at least 1 comment") + .max(100, "Cannot exceed 100 comments") + .default(20) + .describe("Maximum number of comments to retrieve (1-100, default: 20)"), + sortOrder: z.enum(["top", "new"]) + .default("top") + .describe("Sort order: 'top' for most liked, 'new' for newest (default: 'top')"), +}).strict(); + +const GetVideoCommentsSummarySchema = z.object({ + url: z.string() + .url("Must be a valid URL") + .describe("URL of the video"), + maxComments: z.coerce.number() + .int("Must be a whole number") + .min(1, "Must return at least 1 comment") + .max(50, "Cannot exceed 50 comments for summary") + .default(10) + .describe("Maximum number of comments to include in summary (1-50, default: 10)"), +}).strict(); + /** * Validate system configuration * @throws {Error} when configuration is invalid @@ -448,6 +476,85 @@ Error Handling: openWorldHint: true } }, + { + name: "ytdlp_get_video_comments", + description: `Extract comments from a video in JSON format. + +This tool retrieves comments from videos (primarily YouTube) using yt-dlp's comment extraction feature. Returns structured comment data including author info, likes, and timestamps. + +Args: + - url (string): Full video URL + - maxComments (number): Maximum comments to retrieve (1-100, default: 20) + - sortOrder (enum): 'top' for most liked comments, 'new' for newest (default: 'top') + +Returns: + JSON object with: + - count: Number of comments returned + - has_more: Whether more comments are available + - comments: Array of comment objects containing: + - id: Comment identifier + - text: Comment content + - author: Author name + - author_id: Author channel ID + - author_is_uploader: Whether author is video creator + - author_is_verified: Whether author is verified + - like_count: Number of likes + - is_pinned: Whether comment is pinned + - parent: Parent comment ID (for replies) + - timestamp: Unix timestamp + - time_text: Human-readable time (e.g., "2 days ago") + +Use when: You need structured comment data for analysis or display +Don't use when: You want a quick readable overview (use ytdlp_get_video_comments_summary) + +Note: Comment extraction is primarily supported for YouTube. Other platforms may have limited support. + +Error Handling: + - "Video is unavailable or private" for inaccessible content + - "Comments are disabled" for videos with comments turned off + - "Requires authentication" for age-restricted content (configure cookies) + - "Unsupported platform" for non-YouTube URLs`, + inputSchema: GetVideoCommentsSchema, + annotations: { + readOnlyHint: true, + destructiveHint: false, + idempotentHint: true, + openWorldHint: true + } + }, + { + name: "ytdlp_get_video_comments_summary", + description: `Get a human-readable summary of video comments. + +This tool extracts comments and formats them into an easy-to-read summary. Perfect for quick overview of audience reactions and popular comments. + +Args: + - url (string): Full video URL + - maxComments (number): Maximum comments to include (1-50, default: 10) + +Returns: + Formatted text summary with: + - Comment author with indicators ([UPLOADER], [VERIFIED], [PINNED]) + - Time posted (e.g., "2 days ago") + - Like count + - Comment text (truncated to 300 chars if longer) + - Reply indicators + +Use when: You want a quick, readable overview of video comments +Don't use when: You need complete structured data (use ytdlp_get_video_comments) + +Note: Comments are sorted by "top" (most liked) by default. + +Error Handling: + - Same as ytdlp_get_video_comments (unavailable videos, disabled comments, authentication required)`, + inputSchema: GetVideoCommentsSummarySchema, + annotations: { + readOnlyHint: true, + destructiveHint: false, + idempotentHint: true, + openWorldHint: true + } + }, ], }; }); @@ -493,6 +600,8 @@ server.setRequestHandler( endTime?: string; query?: string; maxResults?: number; + maxComments?: number; + sortOrder?: "top" | "new"; fields?: string[]; }; @@ -552,6 +661,18 @@ server.setRequestHandler( () => getVideoMetadataSummary(validated.url, CONFIG), "Error generating video metadata summary" ); + } else if (toolName === "ytdlp_get_video_comments") { + const validated = GetVideoCommentsSchema.parse(args); + return handleToolExecution( + () => getVideoComments(validated.url, validated.maxComments, validated.sortOrder, CONFIG), + "Error extracting video comments" + ); + } else if (toolName === "ytdlp_get_video_comments_summary") { + const validated = GetVideoCommentsSummarySchema.parse(args); + return handleToolExecution( + () => getVideoCommentsSummary(validated.url, validated.maxComments, CONFIG), + "Error generating video comments summary" + ); } else { return { content: [{ type: "text", text: `Unknown tool: ${toolName}` }], diff --git a/src/modules/comments.ts b/src/modules/comments.ts new file mode 100644 index 0000000..bd9b19f --- /dev/null +++ b/src/modules/comments.ts @@ -0,0 +1,283 @@ +import type { Config } from "../config.js"; +import { getCookieArgs } from "../config.js"; +import { + _spawnPromise, + validateUrl +} from "./utils.js"; + +/** + * Represents a single comment on a video + */ +export interface Comment { + /** Unique comment identifier */ + id?: string; + /** Comment text content */ + text?: string; + /** Comment author name */ + author?: string; + /** Comment author channel ID */ + author_id?: string; + /** Comment author channel URL */ + author_url?: string; + /** Whether the author is the video uploader */ + author_is_uploader?: boolean; + /** Whether author is verified */ + author_is_verified?: boolean; + /** Comment like count */ + like_count?: number; + /** Whether comment is pinned */ + is_pinned?: boolean; + /** Whether comment is marked as favorite by uploader */ + is_favorited?: boolean; + /** Parent comment ID (for replies) */ + parent?: string; + /** Unix timestamp of comment */ + timestamp?: number; + /** Human-readable time ago string */ + time_text?: string; + /** Additional fields that might be present */ + [key: string]: unknown; +} + +/** + * Response structure for video comments + */ +export interface CommentsResponse { + /** Total number of comments returned */ + count: number; + /** Whether there are more comments available */ + has_more: boolean; + /** Array of comment objects */ + comments: Comment[]; + /** Truncation indicator */ + _truncated?: boolean; + /** Truncation message */ + _message?: string; +} + +/** + * Sort order for comments + */ +export type CommentSortOrder = "top" | "new"; + +/** + * Extract video comments using yt-dlp. + * Uses yt-dlp's --write-comments and --dump-json flags to get comments. + * + * @param url - The URL of the video to extract comments from + * @param maxComments - Maximum number of comments to retrieve (default: 20) + * @param sortOrder - Sort order: "top" for most liked, "new" for newest (default: "top") + * @param config - Configuration object + * @returns Promise resolving to JSON string with comments data + * @throws {Error} When URL is invalid or comment extraction fails + * + * @example + * ```typescript + * // Get top 20 comments + * const comments = await getVideoComments('https://youtube.com/watch?v=...'); + * console.log(comments); + * + * // Get newest 50 comments + * const newComments = await getVideoComments( + * 'https://youtube.com/watch?v=...', + * 50, + * 'new' + * ); + * ``` + */ +export async function getVideoComments( + url: string, + maxComments: number = 20, + sortOrder: CommentSortOrder = "top", + _config?: Config +): Promise { + // Validate the URL + if (!validateUrl(url)) { + throw new Error("Invalid or unsupported URL format"); + } + + const args = [ + "--dump-json", + "--no-warnings", + "--no-check-certificate", + "--write-comments", + "--extractor-args", `youtube:comment_sort=${sortOrder};max_comments=${maxComments},all,all`, + "--skip-download", + ...(_config ? getCookieArgs(_config) : []), + url + ]; + + try { + // Execute yt-dlp to get metadata with comments + const output = await _spawnPromise("yt-dlp", args); + + // Parse the JSON output + const metadata = JSON.parse(output); + + // Extract comments from metadata + const rawComments: Comment[] = metadata.comments || []; + + // Limit to maxComments + const comments = rawComments.slice(0, maxComments); + + // Build response + const response: CommentsResponse = { + count: comments.length, + has_more: rawComments.length > maxComments, + comments: comments.map(comment => ({ + id: comment.id, + text: comment.text, + author: comment.author, + author_id: comment.author_id, + author_url: comment.author_url, + author_is_uploader: comment.author_is_uploader, + author_is_verified: comment.author_is_verified, + like_count: comment.like_count, + is_pinned: comment.is_pinned, + is_favorited: comment.is_favorited, + parent: comment.parent, + timestamp: comment.timestamp, + time_text: comment.time_text + })) + }; + + let result = JSON.stringify(response, null, 2); + + // Check character limit + if (_config && result.length > _config.limits.characterLimit) { + // Reduce comments to fit within limit + let truncatedComments = [...response.comments]; + + while (result.length > _config.limits.characterLimit && truncatedComments.length > 1) { + truncatedComments = truncatedComments.slice(0, -1); + const truncatedResponse: CommentsResponse = { + count: truncatedComments.length, + has_more: true, + comments: truncatedComments, + _truncated: true, + _message: `Response truncated to ${truncatedComments.length} comments due to size limits. Use smaller maxComments value.` + }; + result = JSON.stringify(truncatedResponse, null, 2); + } + } + + return result; + + } catch (error) { + if (error instanceof Error) { + // Handle common yt-dlp errors with actionable messages + if (error.message.includes("Video unavailable") || error.message.includes("private")) { + throw new Error(`Video is unavailable or private: ${url}. Check the URL and video privacy settings.`); + } else if (error.message.includes("Unsupported URL") || error.message.includes("extractor")) { + throw new Error(`Unsupported platform or video URL: ${url}. Comments extraction is primarily supported for YouTube.`); + } else if (error.message.includes("network") || error.message.includes("Connection")) { + throw new Error("Network error while extracting comments. Check your internet connection and retry."); + } else if (error.message.includes("comments are disabled") || error.message.includes("Comments are turned off")) { + throw new Error(`Comments are disabled for this video: ${url}`); + } else if (error.message.includes("Sign in") || error.message.includes("age")) { + throw new Error(`This video requires authentication to view comments. Configure cookies in your settings.`); + } else { + throw new Error(`Failed to extract video comments: ${error.message}. Verify the URL is correct.`); + } + } + throw new Error(`Failed to extract video comments from ${url}`); + } +} + +/** + * Get a human-readable summary of video comments. + * This is useful for quick overview without overwhelming JSON output. + * + * @param url - The URL of the video to extract comments from + * @param maxComments - Maximum number of comments to include (default: 10) + * @param config - Configuration object + * @returns Promise resolving to a formatted summary string + * @throws {Error} When URL is invalid or comment extraction fails + * + * @example + * ```typescript + * const summary = await getVideoCommentsSummary('https://youtube.com/watch?v=...'); + * console.log(summary); + * // Output: + * // Video Comments (10 shown) + * // ───────────────────────── + * // + * // 👤 John Doe (2 days ago) ❤️ 1,234 likes + * // This is an awesome video! + * // + * // 👤 Jane Smith (1 week ago) ❤️ 567 likes + * // Great content, keep it up! + * ``` + */ +export async function getVideoCommentsSummary( + url: string, + maxComments: number = 10, + _config?: Config +): Promise { + try { + // Get the comments + const commentsJson = await getVideoComments(url, maxComments, "top", _config); + const data: CommentsResponse = JSON.parse(commentsJson); + + // Format comments into a readable summary + const lines: string[] = []; + + lines.push(`Video Comments (${data.count} shown)`); + lines.push('─'.repeat(30)); + lines.push(''); + + for (const comment of data.comments) { + // Build author line with indicators + let authorLine = `Author: ${comment.author || 'Unknown'}`; + if (comment.author_is_uploader) { + authorLine += ' [UPLOADER]'; + } + if (comment.author_is_verified) { + authorLine += ' [VERIFIED]'; + } + if (comment.is_pinned) { + authorLine += ' [PINNED]'; + } + + // Time info + if (comment.time_text) { + authorLine += ` (${comment.time_text})`; + } + + // Likes + if (comment.like_count !== undefined && comment.like_count > 0) { + authorLine += ` - ${comment.like_count.toLocaleString()} likes`; + } + + lines.push(authorLine); + + // Comment text (truncate if too long) + if (comment.text) { + const text = comment.text.length > 300 + ? comment.text.substring(0, 300) + '...' + : comment.text; + lines.push(text); + } + + // Note if this is a reply + if (comment.parent && comment.parent !== 'root') { + lines.push(`(Reply to comment ${comment.parent})`); + } + + lines.push(''); + } + + if (data.has_more) { + lines.push('---'); + lines.push('More comments available. Increase maxComments to see more.'); + } + + return lines.join('\n'); + } catch (error) { + // Re-throw errors from getVideoComments with context + if (error instanceof Error) { + throw error; + } + throw new Error(`Failed to generate comments summary for ${url}`); + } +}