#!/usr/bin/env python3 """Procesamiento de vídeo: extrae audio, transcribe/traduce y quema subtítulos. Flujo: - Extrae audio con ffmpeg (WAV 16k mono) - Transcribe con faster-whisper o openai-whisper (opción task='translate') - Escribe SRT y lo incrusta en el vídeo con ffmpeg Nota: requiere ffmpeg instalado y, para modelos, faster-whisper o openai-whisper. """ import argparse import subprocess import tempfile from pathlib import Path import sys from transcribe import write_srt def extract_audio(video_path: str, out_audio: str): cmd = [ "ffmpeg", "-y", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", out_audio, ] subprocess.run(cmd, check=True) def burn_subtitles(video_path: str, srt_path: str, out_video: str): # Usar filtro subtitles de ffmpeg cmd = [ "ffmpeg", "-y", "-i", video_path, "-vf", f"subtitles={srt_path}", "-c:a", "copy", out_video, ] subprocess.run(cmd, check=True) def transcribe_and_translate_faster(audio_path: str, model: str, target: str): from faster_whisper import WhisperModel wm = WhisperModel(model, device="cpu", compute_type="int8") segments, info = wm.transcribe( audio_path, beam_size=5, task="translate", language=target ) return segments def transcribe_and_translate_openai(audio_path: str, model: str, target: str): import whisper m = whisper.load_model(model, device="cpu") result = m.transcribe( audio_path, fp16=False, task="translate", language=target ) return result.get("segments", None) def main(): parser = argparse.ArgumentParser( description=( "Extraer, transcribir/traducir y quemar subtítulos en vídeo" " (offline)" ) ) parser.add_argument( "--video", "-v", required=True, help="Ruta del archivo de vídeo" ) parser.add_argument( "--backend", "-b", choices=["faster-whisper", "openai-whisper"], default="faster-whisper", ) parser.add_argument( "--model", "-m", default="base", help="Modelo de whisper a usar (tiny, base, etc.)", ) parser.add_argument( "--to", "-t", default="es", help="Idioma de destino para traducción" ) parser.add_argument( "--out", "-o", default=None, help=( "Ruta del vídeo de salida (si no se especifica," " se usa input_burned.mp4)" ), ) parser.add_argument( "--srt", default=None, help=( "Ruta SRT a escribir (si no se especifica," " se usa input.srt)" ), ) args = parser.parse_args() video = Path(args.video) if not video.exists(): print("Vídeo no encontrado", file=sys.stderr) sys.exit(2) out_video = ( args.out if args.out else str(video.with_name(video.stem + "_burned.mp4")) ) srt_path = args.srt if args.srt else str(video.with_suffix('.srt')) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: audio_path = tmp.name try: print("Extrayendo audio con ffmpeg...") extract_audio(str(video), audio_path) print( f"Transcribiendo y traduciendo a '{args.to}'" f" usando {args.backend}..." ) if args.backend == "faster-whisper": segments = transcribe_and_translate_faster( audio_path, args.model, args.to ) else: segments = transcribe_and_translate_openai( audio_path, args.model, args.to ) if not segments: print( "No se obtuvieron segmentos de la transcripción", file=sys.stderr, ) sys.exit(3) print(f"Escribiendo SRT en {srt_path}...") write_srt(segments, srt_path) print( f"Quemando subtítulos en el vídeo -> {out_video}" f" (esto puede tardar)..." ) burn_subtitles(str(video), srt_path, out_video) print("Proceso completado.") finally: try: Path(audio_path).unlink() except Exception: pass if __name__ == "__main__": main()