#!/usr/bin/env python3 """translate_srt_with_gemini.py Lee un .srt, traduce cada bloque de texto con Gemini (Google Generative API) y escribe un nuevo .srt manteniendo índices y timestamps. Uso: export GEMINI_API_KEY="..." .venv/bin/python whisper_project/translate_srt_with_gemini.py \ --in whisper_project/dailyrutines.kokoro.dub.srt \ --out whisper_project/dailyrutines.kokoro.dub.es.srt \ --model gemini-2.5-flash Si no pasas --gemini-api-key, se usará la variable de entorno GEMINI_API_KEY. """ import argparse import json import os import time from typing import List import requests import srt # Intentar usar la librería oficial si está instalada (mejor compatibilidad) try: import google.generativeai as genai # type: ignore except Exception: genai = None def translate_text_google_gl(text: str, api_key: str, model: str = "gemini-2.5-flash") -> str: """Llamada a la API Generative Language de Google (generateContent). Devuelve el texto traducido (o el texto original si falla). """ if not api_key: raise ValueError("gemini api key required") # Si la librería oficial está disponible, usarla (maneja internamente los endpoints) if genai is not None: try: genai.configure(api_key=api_key) model_obj = genai.GenerativeModel(model) # la librería acepta un prompt simple o lista; pedimos texto traducido explícitamente prompt = f"Traduce al español el siguiente texto y devuelve solo el texto traducido:\n\n{text}" resp = model_obj.generate_content(prompt, generation_config={"max_output_tokens": 1024, "temperature": 0.0}) # resp.text está disponible en la respuesta wrapper if hasattr(resp, "text") and resp.text: return resp.text.strip() # fallback: revisar candidates if hasattr(resp, "candidates") and resp.candidates: c = resp.candidates[0] if hasattr(c, "content") and hasattr(c.content, "parts"): parts = [p.text for p in c.content.parts if getattr(p, "text", None)] if parts: return "\n".join(parts).strip() except Exception as e: print(f"Warning: genai library translate failed: {e}") # Fallback HTTP (legacy/path-variant). Intentamos v1 y v1beta2 según disponibilidad. for prefix in ("v1", "v1beta2"): endpoint = ( f"https://generativelanguage.googleapis.com/{prefix}/models/{model}:generateContent?key={api_key}" ) body = { "prompt": {"text": f"Traduce al español el siguiente texto y devuelve solo el texto traducido:\n\n{text}"}, "maxOutputTokens": 1024, "temperature": 0.0, "candidateCount": 1, } try: r = requests.post(endpoint, json=body, timeout=30) r.raise_for_status() j = r.json() # buscar candidatos if isinstance(j, dict) and "candidates" in j and isinstance(j["candidates"], list) and j["candidates"]: first = j["candidates"][0] if isinstance(first, dict): if "content" in first and isinstance(first["content"], str): return first["content"].strip() if "output" in first and isinstance(first["output"], str): return first["output"].strip() if "content" in first and isinstance(first["content"], list): parts = [] for c in first["content"]: if isinstance(c, dict) and isinstance(c.get("text"), str): parts.append(c.get("text")) if parts: return "\n".join(parts).strip() for key in ("output_text", "text", "response", "translated_text"): if key in j and isinstance(j[key], str): return j[key].strip() except Exception as e: print(f"Warning: GL translate failed ({prefix}): {e}") return text def translate_srt_file(in_path: str, out_path: str, api_key: str, model: str): with open(in_path, "r", encoding="utf-8") as fh: subs = list(srt.parse(fh.read())) for i, sub in enumerate(subs, start=1): text = sub.content.strip() if not text: continue # llamar a la API try: translated = translate_text_google_gl(text, api_key, model=model) except Exception as e: print(f"Warning: translate failed for index {sub.index}: {e}") translated = text # asignar traducido sub.content = translated # pequeño delay para no golpear la API demasiado rápido time.sleep(0.15) print(f"Translated {i}/{len(subs)}") out_s = srt.compose(subs) with open(out_path, "w", encoding="utf-8") as fh: fh.write(out_s) print(f"Wrote translated SRT to: {out_path}") def main(): p = argparse.ArgumentParser() p.add_argument("--in", dest="in_srt", required=True) p.add_argument("--out", dest="out_srt", required=True) p.add_argument("--gemini-api-key", default=None) p.add_argument("--model", default="gemini-2.5-flash") args = p.parse_args() key = args.gemini_api_key or os.environ.get("GEMINI_API_KEY") if not key: print("Provide --gemini-api-key or set GEMINI_API_KEY env var", flush=True) raise SystemExit(2) translate_srt_file(args.in_srt, args.out_srt, key, args.model) if __name__ == '__main__': main()