diff --git a/README.md b/README.md
index a465b20..f7230cb 100644
--- a/README.md
+++ b/README.md
@@ -71,12 +71,6 @@ pip install yt-dlp
* Inputs:
* `url` (string, required): URL of the video
-* **download_transcript**
- * Download and clean video subtitles to produce a plain text transcript without timestamps or formatting
- * Inputs:
- * `url` (string, required): URL of the video
- * `language` (string, optional): Language code (e.g., 'en', 'zh-Hant', 'ja'). Defaults to 'en'
-
## Usage Examples
Ask your LLM to:
@@ -86,8 +80,6 @@ Ask your LLM to:
"Download Chinese subtitles from this video: https://youtube.com/watch?v=..."
"Download this video in 1080p: https://youtube.com/watch?v=..."
"Download audio from this YouTube video: https://youtube.com/watch?v=..."
-"Get a clean transcript of this video: https://youtube.com/watch?v=..."
-"Download Spanish transcript from this video: https://youtube.com/watch?v=..."
```
## Manual Start
diff --git a/src/__tests__/subtitle.test.ts b/src/__tests__/subtitle.test.ts
index 1e55ea2..19e360b 100644
--- a/src/__tests__/subtitle.test.ts
+++ b/src/__tests__/subtitle.test.ts
@@ -3,8 +3,7 @@
import { describe, test, expect } from '@jest/globals';
import * as os from 'os';
import * as path from 'path';
-import { listSubtitles, downloadSubtitles, downloadTranscript } from '../modules/subtitle.js';
-import { cleanSubtitleToTranscript } from '../modules/utils.js';
+import { listSubtitles, downloadSubtitles } from '../modules/subtitle.js';
import { CONFIG } from '../config.js';
import * as fs from 'fs';
@@ -52,60 +51,4 @@ describe('Subtitle Functions', () => {
.toThrow();
});
});
-
- describe('downloadTranscript', () => {
- test('downloads and cleans transcript successfully', async () => {
- const result = await downloadTranscript(testUrl, 'en', testConfig);
- expect(typeof result).toBe('string');
- expect(result.length).toBeGreaterThan(0);
- expect(result).not.toContain('WEBVTT');
- expect(result).not.toContain('-->');
- expect(result).not.toMatch(/^\d+$/m);
- }, 30000);
-
- test('handles invalid URL', async () => {
- await expect(downloadTranscript('invalid-url', 'en', testConfig))
- .rejects
- .toThrow();
- });
- });
-
- describe('cleanSubtitleToTranscript', () => {
- test('cleans SRT content correctly', () => {
- const srtContent = `1
-00:00:01,000 --> 00:00:03,000
-Hello world
-
-2
-00:00:04,000 --> 00:00:06,000
-This is a test
-
-3
-00:00:07,000 --> 00:00:09,000
-Bold text here`;
-
- const result = cleanSubtitleToTranscript(srtContent);
- expect(result).toBe('Hello world This is a test Bold text here');
- });
-
- test('handles empty content', () => {
- const result = cleanSubtitleToTranscript('');
- expect(result).toBe('');
- });
-
- test('removes timestamps and sequence numbers', () => {
- const srtContent = `1
-00:00:01,000 --> 00:00:03,000
-First line
-
-2
-00:00:04,000 --> 00:00:06,000
-Second line`;
-
- const result = cleanSubtitleToTranscript(srtContent);
- expect(result).not.toContain('00:00');
- expect(result).not.toMatch(/^\d+$/);
- expect(result).toBe('First line Second line');
- });
- });
});
\ No newline at end of file
diff --git a/src/index.mts b/src/index.mts
index 6dbe2c6..2310dc1 100644
--- a/src/index.mts
+++ b/src/index.mts
@@ -15,7 +15,7 @@ import { CONFIG } from "./config.js";
import { _spawnPromise, safeCleanup } from "./modules/utils.js";
import { downloadVideo } from "./modules/video.js";
import { downloadAudio } from "./modules/audio.js";
-import { listSubtitles, downloadSubtitles, downloadTranscript } from "./modules/subtitle.js";
+import { listSubtitles, downloadSubtitles } from "./modules/subtitle.js";
const VERSION = '0.6.26';
@@ -148,18 +148,6 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
required: ["url"],
},
},
- {
- name: "download_transcript",
- description: "Download and clean video subtitles to produce a plain text transcript without timestamps or formatting.",
- inputSchema: {
- type: "object",
- properties: {
- url: { type: "string", description: "URL of the video" },
- language: { type: "string", description: "Language code (e.g., 'en', 'zh-Hant', 'ja'). Defaults to 'en'" },
- },
- required: ["url"],
- },
- },
],
};
});
@@ -223,11 +211,6 @@ server.setRequestHandler(
() => downloadAudio(args.url, CONFIG),
"Error downloading audio"
);
- } else if (toolName === "download_transcript") {
- return handleToolExecution(
- () => downloadTranscript(args.url, args.language || CONFIG.download.defaultSubtitleLanguage, CONFIG),
- "Error downloading transcript"
- );
} else {
return {
content: [{ type: "text", text: `Unknown tool: ${toolName}` }],
diff --git a/src/modules/subtitle.ts b/src/modules/subtitle.ts
index 4a71cb8..206378b 100644
--- a/src/modules/subtitle.ts
+++ b/src/modules/subtitle.ts
@@ -2,7 +2,7 @@ import * as fs from "fs";
import * as path from "path";
import * as os from "os";
import type { Config } from '../config.js';
-import { _spawnPromise, validateUrl, cleanSubtitleToTranscript } from "./utils.js";
+import { _spawnPromise, validateUrl } from "./utils.js";
/**
* Lists all available subtitles for a video.
@@ -105,65 +105,4 @@ export async function downloadSubtitles(
} finally {
fs.rmSync(tempDir, { recursive: true, force: true });
}
-}
-
-/**
- * Downloads and cleans subtitles to produce a plain text transcript.
- *
- * @param url - The URL of the video
- * @param language - Language code (e.g., 'en', 'zh-Hant', 'ja')
- * @param config - Configuration object
- * @returns Promise resolving to the cleaned transcript text
- * @throws {Error} When URL is invalid, language is not available, or download fails
- *
- * @example
- * ```typescript
- * try {
- * const transcript = await downloadTranscript('https://youtube.com/watch?v=...', 'en', config);
- * console.log('Transcript:', transcript);
- * } catch (error) {
- * console.error('Failed to download transcript:', error);
- * }
- * ```
- */
-export async function downloadTranscript(
- url: string,
- language: string,
- config: Config
-): Promise {
- if (!validateUrl(url)) {
- throw new Error('Invalid or unsupported URL format');
- }
-
- const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), config.file.tempDirPrefix));
-
- try {
- await _spawnPromise('yt-dlp', [
- '--skip-download',
- '--write-subs',
- '--write-auto-subs',
- '--sub-lang', language,
- '--sub-format', 'ttml',
- '--convert-subs', 'srt',
- '--output', path.join(tempDir, 'transcript.%(ext)s'),
- url
- ]);
-
- const srtFiles = fs.readdirSync(tempDir)
- .filter(file => file.endsWith('.srt'));
-
- if (srtFiles.length === 0) {
- throw new Error('No subtitle files found for transcript generation');
- }
-
- let transcriptContent = '';
- for (const file of srtFiles) {
- const srtContent = fs.readFileSync(path.join(tempDir, file), 'utf8');
- transcriptContent += cleanSubtitleToTranscript(srtContent) + ' ';
- }
-
- return transcriptContent.trim();
- } finally {
- fs.rmSync(tempDir, { recursive: true, force: true });
- }
}
\ No newline at end of file
diff --git a/src/modules/utils.ts b/src/modules/utils.ts
index a1efb49..70d897f 100644
--- a/src/modules/utils.ts
+++ b/src/modules/utils.ts
@@ -145,39 +145,4 @@ export function generateRandomFilename(extension: string = 'mp4'): string {
const timestamp = getFormattedTimestamp();
const randomId = randomBytes(4).toString('hex');
return `${timestamp}_${randomId}.${extension}`;
-}
-
-/**
- * Cleans SRT subtitle content to produce a plain text transcript.
- * Removes timestamps, sequence numbers, and HTML tags.
- *
- * @param srtContent - Raw SRT subtitle content
- * @returns Cleaned transcript text
- *
- * @example
- * ```typescript
- * const cleanedText = cleanSubtitleToTranscript(srtContent);
- * console.log(cleanedText); // 'Hello world this is a transcript...'
- * ```
- */
-export function cleanSubtitleToTranscript(srtContent: string): string {
- return srtContent
- .split('\n')
- .filter(line => {
- const trimmed = line.trim();
- // Remove empty lines
- if (!trimmed) return false;
- // Remove sequence numbers (lines that are just digits)
- if (/^\d+$/.test(trimmed)) return false;
- // Remove timestamp lines
- if (/^\d{2}:\d{2}:\d{2}[.,]\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}[.,]\d{3}$/.test(trimmed)) return false;
- return true;
- })
- .map(line => {
- // Remove HTML tags
- return line.replace(/<[^>]*>/g, '');
- })
- .join(' ')
- .replace(/\s+/g, ' ')
- .trim();
}
\ No newline at end of file