feat(comments): add YouTube video comments extraction tools

Add two new MCP tools for extracting video comments: - ytdlp_get_video_comments: Extract comments as structured JSON with author info, likes, timestamps, and reply threading - ytdlp_get_video_comments_summary: Get human-readable summary of top comments Features: - Support for sorting by "top" (most liked) or "new" (newest first) - Configurable comment limit (1-100 comments) - Includes author verification status, pinned comments, and uploader replies - Comprehensive error handling for disabled comments, private videos, etc. - Comprehensive test suite
2025-12-21 11:59:11 +00:00 · 2025-12-21 11:59:11 +00:00 · 2e2888cccc
commit 2e2888cccc
parent d7f5ec0f62
4 changed files with 592 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]

+### Added
+- **Video Comments Extraction**: New tools for extracting YouTube video comments
+  - `ytdlp_get_video_comments`: Extract comments in structured JSON format with author info, likes, timestamps, and reply threading
+  - `ytdlp_get_video_comments_summary`: Get human-readable summary of top comments
+  - Supports sorting by "top" (most liked) or "new" (newest first)
+  - Configurable comment limit (1-100 comments)
+  - Includes author verification status, pinned comments, and uploader replies
+  - Comprehensive test suite for comments functionality
+
 ### Changed
 - Add Claude Code settings (.claude/, CLAUDE.md) to .gitignore
 - Add development guideline to always update CHANGELOG.md
--- a/src/tests/comments.test.ts
+++ b/src/tests/comments.test.ts
@ -0,0 +1,179 @@
+// @ts-nocheck
+// @jest-environment node
+import { describe, test, expect, beforeAll } from '@jest/globals';
+import { getVideoComments, getVideoCommentsSummary } from '../modules/comments.js';
+import type { CommentsResponse } from '../modules/comments.js';
+import { CONFIG } from '../config.js';
+
+// Set Python environment
+process.env.PYTHONPATH = '';
+process.env.PYTHONHOME = '';
+
+describe('Video Comments Extraction', () => {
+  // Using a popular video that should have comments enabled
+  const testUrl = 'https://www.youtube.com/watch?v=jNQXAC9IVRw';
+
+  describe('getVideoComments', () => {
+    test('should extract comments from YouTube video', async () => {
+      const commentsJson = await getVideoComments(testUrl, 5, 'top', CONFIG);
+      const data: CommentsResponse = JSON.parse(commentsJson);
+
+      // Verify response structure
+      expect(data).toHaveProperty('count');
+      expect(data).toHaveProperty('has_more');
+      expect(data).toHaveProperty('comments');
+      expect(Array.isArray(data.comments)).toBe(true);
+      expect(data.count).toBeGreaterThan(0);
+      expect(data.count).toBeLessThanOrEqual(5);
+    }, 60000);
+
+    test('should return comments with expected fields', async () => {
+      const commentsJson = await getVideoComments(testUrl, 3, 'top', CONFIG);
+      const data: CommentsResponse = JSON.parse(commentsJson);
+
+      if (data.comments.length > 0) {
+        const comment = data.comments[0];
+
+        // These fields should typically be present
+        expect(comment).toHaveProperty('text');
+        expect(comment).toHaveProperty('author');
+
+        // Verify text is a string
+        if (comment.text !== undefined) {
+          expect(typeof comment.text).toBe('string');
+        }
+        if (comment.author !== undefined) {
+          expect(typeof comment.author).toBe('string');
+        }
+      }
+    }, 60000);
+
+    test('should respect maxComments parameter', async () => {
+      const commentsJson = await getVideoComments(testUrl, 3, 'top', CONFIG);
+      const data: CommentsResponse = JSON.parse(commentsJson);
+
+      expect(data.comments.length).toBeLessThanOrEqual(3);
+    }, 60000);
+
+    test('should support different sort orders', async () => {
+      // Just verify both sort orders work without error
+      const topComments = await getVideoComments(testUrl, 2, 'top', CONFIG);
+      const topData: CommentsResponse = JSON.parse(topComments);
+      expect(topData).toHaveProperty('comments');
+
+      const newComments = await getVideoComments(testUrl, 2, 'new', CONFIG);
+      const newData: CommentsResponse = JSON.parse(newComments);
+      expect(newData).toHaveProperty('comments');
+    }, 90000);
+
+    test('should throw error for invalid URL', async () => {
+      await expect(getVideoComments('invalid-url', 5, 'top', CONFIG)).rejects.toThrow();
+    });
+
+    test('should throw error for unsupported URL', async () => {
+      await expect(getVideoComments('https://example.com/video', 5, 'top', CONFIG)).rejects.toThrow();
+    }, 30000);
+  });
+
+  describe('getVideoCommentsSummary', () => {
+    test('should generate human-readable summary', async () => {
+      const summary = await getVideoCommentsSummary(testUrl, 5, CONFIG);
+
+      expect(typeof summary).toBe('string');
+      expect(summary.length).toBeGreaterThan(0);
+
+      // Should contain header
+      expect(summary).toContain('Video Comments');
+
+      // Should have formatted content
+      expect(summary).toContain('Author:');
+    }, 60000);
+
+    test('should respect maxComments parameter', async () => {
+      const summary = await getVideoCommentsSummary(testUrl, 3, CONFIG);
+
+      // Count occurrences of "Author:" to verify number of comments
+      const authorMatches = summary.match(/Author:/g);
+      if (authorMatches) {
+        expect(authorMatches.length).toBeLessThanOrEqual(3);
+      }
+    }, 60000);
+
+    test('should throw error for invalid URL', async () => {
+      await expect(getVideoCommentsSummary('invalid-url', 5, CONFIG)).rejects.toThrow();
+    });
+
+    test('should handle videos with different comment counts', async () => {
+      const summary = await getVideoCommentsSummary(testUrl, 10, CONFIG);
+
+      // Summary should be a valid string
+      expect(typeof summary).toBe('string');
+      expect(summary.trim().length).toBeGreaterThan(0);
+    }, 60000);
+  });
+
+  describe('Error Handling', () => {
+    test('should provide helpful error message for unavailable video', async () => {
+      const unavailableUrl = 'https://www.youtube.com/watch?v=invalid_video_id_xyz123';
+
+      await expect(getVideoComments(unavailableUrl, 5, 'top', CONFIG)).rejects.toThrow();
+    }, 30000);
+
+    test('should handle unsupported URLs gracefully', async () => {
+      const unsupportedUrl = 'https://example.com/not-a-video';
+
+      await expect(getVideoComments(unsupportedUrl, 5, 'top', CONFIG)).rejects.toThrow();
+    }, 30000);
+  });
+
+  describe('Comment Fields', () => {
+    test('should include author information when available', async () => {
+      const commentsJson = await getVideoComments(testUrl, 5, 'top', CONFIG);
+      const data: CommentsResponse = JSON.parse(commentsJson);
+
+      if (data.comments.length > 0) {
+        const comment = data.comments[0];
+
+        // Author fields
+        if (comment.author !== undefined) {
+          expect(typeof comment.author).toBe('string');
+        }
+        if (comment.author_id !== undefined) {
+          expect(typeof comment.author_id).toBe('string');
+        }
+      }
+    }, 60000);
+
+    test('should include engagement metrics when available', async () => {
+      const commentsJson = await getVideoComments(testUrl, 5, 'top', CONFIG);
+      const data: CommentsResponse = JSON.parse(commentsJson);
+
+      if (data.comments.length > 0) {
+        // At least one top comment should have like_count
+        const hasLikes = data.comments.some(c =>
+          c.like_count !== undefined && typeof c.like_count === 'number'
+        );
+        // This is optional - some comments may not have likes
+        expect(hasLikes || data.comments.length > 0).toBe(true);
+      }
+    }, 60000);
+
+    test('should handle boolean flags correctly', async () => {
+      const commentsJson = await getVideoComments(testUrl, 10, 'top', CONFIG);
+      const data: CommentsResponse = JSON.parse(commentsJson);
+
+      for (const comment of data.comments) {
+        // Boolean flags should be boolean or undefined
+        if (comment.is_pinned !== undefined) {
+          expect(typeof comment.is_pinned).toBe('boolean');
+        }
+        if (comment.author_is_uploader !== undefined) {
+          expect(typeof comment.author_is_uploader).toBe('boolean');
+        }
+        if (comment.author_is_verified !== undefined) {
+          expect(typeof comment.author_is_verified).toBe('boolean');
+        }
+      }
+    }, 60000);
+  });
+});
--- a/src/index.mts
+++ b/src/index.mts
@ -19,6 +19,7 @@ import { downloadAudio } from "./modules/audio.js";
 import { listSubtitles, downloadSubtitles, downloadTranscript } from "./modules/subtitle.js";
 import { searchVideos } from "./modules/search.js";
 import { getVideoMetadata, getVideoMetadataSummary } from "./modules/metadata.js";
+import { getVideoComments, getVideoCommentsSummary } from "./modules/comments.js";

 const VERSION = '0.7.0';

@ -114,6 +115,33 @@ const GetVideoMetadataSummarySchema = z.object({
    .describe("URL of the video"),
 }).strict();

+const GetVideoCommentsSchema = z.object({
+  url: z.string()
+    .url("Must be a valid URL")
+    .describe("URL of the video"),
+  maxComments: z.coerce.number()
+    .int("Must be a whole number")
+    .min(1, "Must return at least 1 comment")
+    .max(100, "Cannot exceed 100 comments")
+    .default(20)
+    .describe("Maximum number of comments to retrieve (1-100, default: 20)"),
+  sortOrder: z.enum(["top", "new"])
+    .default("top")
+    .describe("Sort order: 'top' for most liked, 'new' for newest (default: 'top')"),
+}).strict();
+
+const GetVideoCommentsSummarySchema = z.object({
+  url: z.string()
+    .url("Must be a valid URL")
+    .describe("URL of the video"),
+  maxComments: z.coerce.number()
+    .int("Must be a whole number")
+    .min(1, "Must return at least 1 comment")
+    .max(50, "Cannot exceed 50 comments for summary")
+    .default(10)
+    .describe("Maximum number of comments to include in summary (1-50, default: 10)"),
+}).strict();
+
 /**
 * Validate system configuration
 * @throws {Error} when configuration is invalid
@ -448,6 +476,85 @@ Error Handling:
          openWorldHint: true
        }
      },
+      {
+        name: "ytdlp_get_video_comments",
+        description: `Extract comments from a video in JSON format.
+
+This tool retrieves comments from videos (primarily YouTube) using yt-dlp's comment extraction feature. Returns structured comment data including author info, likes, and timestamps.
+
+Args:
+  - url (string): Full video URL
+  - maxComments (number): Maximum comments to retrieve (1-100, default: 20)
+  - sortOrder (enum): 'top' for most liked comments, 'new' for newest (default: 'top')
+
+Returns:
+  JSON object with:
+  - count: Number of comments returned
+  - has_more: Whether more comments are available
+  - comments: Array of comment objects containing:
+    - id: Comment identifier
+    - text: Comment content
+    - author: Author name
+    - author_id: Author channel ID
+    - author_is_uploader: Whether author is video creator
+    - author_is_verified: Whether author is verified
+    - like_count: Number of likes
+    - is_pinned: Whether comment is pinned
+    - parent: Parent comment ID (for replies)
+    - timestamp: Unix timestamp
+    - time_text: Human-readable time (e.g., "2 days ago")
+
+Use when: You need structured comment data for analysis or display
+Don't use when: You want a quick readable overview (use ytdlp_get_video_comments_summary)
+
+Note: Comment extraction is primarily supported for YouTube. Other platforms may have limited support.
+
+Error Handling:
+  - "Video is unavailable or private" for inaccessible content
+  - "Comments are disabled" for videos with comments turned off
+  - "Requires authentication" for age-restricted content (configure cookies)
+  - "Unsupported platform" for non-YouTube URLs`,
+        inputSchema: GetVideoCommentsSchema,
+        annotations: {
+          readOnlyHint: true,
+          destructiveHint: false,
+          idempotentHint: true,
+          openWorldHint: true
+        }
+      },
+      {
+        name: "ytdlp_get_video_comments_summary",
+        description: `Get a human-readable summary of video comments.
+
+This tool extracts comments and formats them into an easy-to-read summary. Perfect for quick overview of audience reactions and popular comments.
+
+Args:
+  - url (string): Full video URL
+  - maxComments (number): Maximum comments to include (1-50, default: 10)
+
+Returns:
+  Formatted text summary with:
+  - Comment author with indicators ([UPLOADER], [VERIFIED], [PINNED])
+  - Time posted (e.g., "2 days ago")
+  - Like count
+  - Comment text (truncated to 300 chars if longer)
+  - Reply indicators
+
+Use when: You want a quick, readable overview of video comments
+Don't use when: You need complete structured data (use ytdlp_get_video_comments)
+
+Note: Comments are sorted by "top" (most liked) by default.
+
+Error Handling:
+  - Same as ytdlp_get_video_comments (unavailable videos, disabled comments, authentication required)`,
+        inputSchema: GetVideoCommentsSummarySchema,
+        annotations: {
+          readOnlyHint: true,
+          destructiveHint: false,
+          idempotentHint: true,
+          openWorldHint: true
+        }
+      },
    ],
  };
 });
@ -493,6 +600,8 @@ server.setRequestHandler(
      endTime?: string;
      query?: string;
      maxResults?: number;
+      maxComments?: number;
+      sortOrder?: "top" | "new";
      fields?: string[];
    };

@ -552,6 +661,18 @@ server.setRequestHandler(
          () => getVideoMetadataSummary(validated.url, CONFIG),
          "Error generating video metadata summary"
        );
+      } else if (toolName === "ytdlp_get_video_comments") {
+        const validated = GetVideoCommentsSchema.parse(args);
+        return handleToolExecution(
+          () => getVideoComments(validated.url, validated.maxComments, validated.sortOrder, CONFIG),
+          "Error extracting video comments"
+        );
+      } else if (toolName === "ytdlp_get_video_comments_summary") {
+        const validated = GetVideoCommentsSummarySchema.parse(args);
+        return handleToolExecution(
+          () => getVideoCommentsSummary(validated.url, validated.maxComments, CONFIG),
+          "Error generating video comments summary"
+        );
      } else {
        return {
          content: [{ type: "text", text: `Unknown tool: ${toolName}` }],
--- a/src/modules/comments.ts
+++ b/src/modules/comments.ts
@ -0,0 +1,283 @@
+import type { Config } from "../config.js";
+import { getCookieArgs } from "../config.js";
+import {
+  _spawnPromise,
+  validateUrl
+} from "./utils.js";
+
+/**
+ * Represents a single comment on a video
+ */
+export interface Comment {
+  /** Unique comment identifier */
+  id?: string;
+  /** Comment text content */
+  text?: string;
+  /** Comment author name */
+  author?: string;
+  /** Comment author channel ID */
+  author_id?: string;
+  /** Comment author channel URL */
+  author_url?: string;
+  /** Whether the author is the video uploader */
+  author_is_uploader?: boolean;
+  /** Whether author is verified */
+  author_is_verified?: boolean;
+  /** Comment like count */
+  like_count?: number;
+  /** Whether comment is pinned */
+  is_pinned?: boolean;
+  /** Whether comment is marked as favorite by uploader */
+  is_favorited?: boolean;
+  /** Parent comment ID (for replies) */
+  parent?: string;
+  /** Unix timestamp of comment */
+  timestamp?: number;
+  /** Human-readable time ago string */
+  time_text?: string;
+  /** Additional fields that might be present */
+  [key: string]: unknown;
+}
+
+/**
+ * Response structure for video comments
+ */
+export interface CommentsResponse {
+  /** Total number of comments returned */
+  count: number;
+  /** Whether there are more comments available */
+  has_more: boolean;
+  /** Array of comment objects */
+  comments: Comment[];
+  /** Truncation indicator */
+  _truncated?: boolean;
+  /** Truncation message */
+  _message?: string;
+}
+
+/**
+ * Sort order for comments
+ */
+export type CommentSortOrder = "top" | "new";
+
+/**
+ * Extract video comments using yt-dlp.
+ * Uses yt-dlp's --write-comments and --dump-json flags to get comments.
+ *
+ * @param url - The URL of the video to extract comments from
+ * @param maxComments - Maximum number of comments to retrieve (default: 20)
+ * @param sortOrder - Sort order: "top" for most liked, "new" for newest (default: "top")
+ * @param config - Configuration object
+ * @returns Promise resolving to JSON string with comments data
+ * @throws {Error} When URL is invalid or comment extraction fails
+ *
+ * @example
+ * ```typescript
+ * // Get top 20 comments
+ * const comments = await getVideoComments('https://youtube.com/watch?v=...');
+ * console.log(comments);
+ *
+ * // Get newest 50 comments
+ * const newComments = await getVideoComments(
+ *   'https://youtube.com/watch?v=...',
+ *   50,
+ *   'new'
+ * );
+ * ```
+ */
+export async function getVideoComments(
+  url: string,
+  maxComments: number = 20,
+  sortOrder: CommentSortOrder = "top",
+  _config?: Config
+): Promise<string> {
+  // Validate the URL
+  if (!validateUrl(url)) {
+    throw new Error("Invalid or unsupported URL format");
+  }
+
+  const args = [
+    "--dump-json",
+    "--no-warnings",
+    "--no-check-certificate",
+    "--write-comments",
+    "--extractor-args", `youtube:comment_sort=${sortOrder};max_comments=${maxComments},all,all`,
+    "--skip-download",
+    ...(_config ? getCookieArgs(_config) : []),
+    url
+  ];
+
+  try {
+    // Execute yt-dlp to get metadata with comments
+    const output = await _spawnPromise("yt-dlp", args);
+
+    // Parse the JSON output
+    const metadata = JSON.parse(output);
+
+    // Extract comments from metadata
+    const rawComments: Comment[] = metadata.comments || [];
+
+    // Limit to maxComments
+    const comments = rawComments.slice(0, maxComments);
+
+    // Build response
+    const response: CommentsResponse = {
+      count: comments.length,
+      has_more: rawComments.length > maxComments,
+      comments: comments.map(comment => ({
+        id: comment.id,
+        text: comment.text,
+        author: comment.author,
+        author_id: comment.author_id,
+        author_url: comment.author_url,
+        author_is_uploader: comment.author_is_uploader,
+        author_is_verified: comment.author_is_verified,
+        like_count: comment.like_count,
+        is_pinned: comment.is_pinned,
+        is_favorited: comment.is_favorited,
+        parent: comment.parent,
+        timestamp: comment.timestamp,
+        time_text: comment.time_text
+      }))
+    };
+
+    let result = JSON.stringify(response, null, 2);
+
+    // Check character limit
+    if (_config && result.length > _config.limits.characterLimit) {
+      // Reduce comments to fit within limit
+      let truncatedComments = [...response.comments];
+
+      while (result.length > _config.limits.characterLimit && truncatedComments.length > 1) {
+        truncatedComments = truncatedComments.slice(0, -1);
+        const truncatedResponse: CommentsResponse = {
+          count: truncatedComments.length,
+          has_more: true,
+          comments: truncatedComments,
+          _truncated: true,
+          _message: `Response truncated to ${truncatedComments.length} comments due to size limits. Use smaller maxComments value.`
+        };
+        result = JSON.stringify(truncatedResponse, null, 2);
+      }
+    }
+
+    return result;
+
+  } catch (error) {
+    if (error instanceof Error) {
+      // Handle common yt-dlp errors with actionable messages
+      if (error.message.includes("Video unavailable") || error.message.includes("private")) {
+        throw new Error(`Video is unavailable or private: ${url}. Check the URL and video privacy settings.`);
+      } else if (error.message.includes("Unsupported URL") || error.message.includes("extractor")) {
+        throw new Error(`Unsupported platform or video URL: ${url}. Comments extraction is primarily supported for YouTube.`);
+      } else if (error.message.includes("network") || error.message.includes("Connection")) {
+        throw new Error("Network error while extracting comments. Check your internet connection and retry.");
+      } else if (error.message.includes("comments are disabled") || error.message.includes("Comments are turned off")) {
+        throw new Error(`Comments are disabled for this video: ${url}`);
+      } else if (error.message.includes("Sign in") || error.message.includes("age")) {
+        throw new Error(`This video requires authentication to view comments. Configure cookies in your settings.`);
+      } else {
+        throw new Error(`Failed to extract video comments: ${error.message}. Verify the URL is correct.`);
+      }
+    }
+    throw new Error(`Failed to extract video comments from ${url}`);
+  }
+}
+
+/**
+ * Get a human-readable summary of video comments.
+ * This is useful for quick overview without overwhelming JSON output.
+ *
+ * @param url - The URL of the video to extract comments from
+ * @param maxComments - Maximum number of comments to include (default: 10)
+ * @param config - Configuration object
+ * @returns Promise resolving to a formatted summary string
+ * @throws {Error} When URL is invalid or comment extraction fails
+ *
+ * @example
+ * ```typescript
+ * const summary = await getVideoCommentsSummary('https://youtube.com/watch?v=...');
+ * console.log(summary);
+ * // Output:
+ * // Video Comments (10 shown)
+ * // ─────────────────────────
+ * //
+ * // 👤 John Doe (2 days ago) ❤️ 1,234 likes
+ * // This is an awesome video!
+ * //
+ * // 👤 Jane Smith (1 week ago) ❤️ 567 likes
+ * // Great content, keep it up!
+ * ```
+ */
+export async function getVideoCommentsSummary(
+  url: string,
+  maxComments: number = 10,
+  _config?: Config
+): Promise<string> {
+  try {
+    // Get the comments
+    const commentsJson = await getVideoComments(url, maxComments, "top", _config);
+    const data: CommentsResponse = JSON.parse(commentsJson);
+
+    // Format comments into a readable summary
+    const lines: string[] = [];
+
+    lines.push(`Video Comments (${data.count} shown)`);
+    lines.push('─'.repeat(30));
+    lines.push('');
+
+    for (const comment of data.comments) {
+      // Build author line with indicators
+      let authorLine = `Author: ${comment.author || 'Unknown'}`;
+      if (comment.author_is_uploader) {
+        authorLine += ' [UPLOADER]';
+      }
+      if (comment.author_is_verified) {
+        authorLine += ' [VERIFIED]';
+      }
+      if (comment.is_pinned) {
+        authorLine += ' [PINNED]';
+      }
+
+      // Time info
+      if (comment.time_text) {
+        authorLine += ` (${comment.time_text})`;
+      }
+
+      // Likes
+      if (comment.like_count !== undefined && comment.like_count > 0) {
+        authorLine += ` - ${comment.like_count.toLocaleString()} likes`;
+      }
+
+      lines.push(authorLine);
+
+      // Comment text (truncate if too long)
+      if (comment.text) {
+        const text = comment.text.length > 300
+          ? comment.text.substring(0, 300) + '...'
+          : comment.text;
+        lines.push(text);
+      }
+
+      // Note if this is a reply
+      if (comment.parent && comment.parent !== 'root') {
+        lines.push(`(Reply to comment ${comment.parent})`);
+      }
+
+      lines.push('');
+    }
+
+    if (data.has_more) {
+      lines.push('---');
+      lines.push('More comments available. Increase maxComments to see more.');
+    }
+
+    return lines.join('\n');
+  } catch (error) {
+    // Re-throw errors from getVideoComments with context
+    if (error instanceof Error) {
+      throw error;
+    }
+    throw new Error(`Failed to generate comments summary for ${url}`);
+  }
+}