140 lines
5.6 KiB
Python
140 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
"""translate_srt_with_gemini.py
|
|
Lee un .srt, traduce cada bloque de texto con Gemini (Google Generative API) y
|
|
escribe un nuevo .srt manteniendo índices y timestamps.
|
|
|
|
Uso:
|
|
export GEMINI_API_KEY="..."
|
|
.venv/bin/python whisper_project/translate_srt_with_gemini.py \
|
|
--in whisper_project/dailyrutines.kokoro.dub.srt \
|
|
--out whisper_project/dailyrutines.kokoro.dub.es.srt \
|
|
--model gemini-2.5-flash
|
|
|
|
Si no pasas --gemini-api-key, se usará la variable de entorno GEMINI_API_KEY.
|
|
"""
|
|
import argparse
|
|
import json
|
|
import os
|
|
import time
|
|
from typing import List
|
|
|
|
import requests
|
|
import srt
|
|
# Intentar usar la librería oficial si está instalada (mejor compatibilidad)
|
|
try:
|
|
import google.generativeai as genai # type: ignore
|
|
except Exception:
|
|
genai = None
|
|
|
|
|
|
def translate_text_google_gl(text: str, api_key: str, model: str = "gemini-2.5-flash") -> str:
|
|
"""Llamada a la API Generative Language de Google (generateContent).
|
|
Devuelve el texto traducido (o el texto original si falla).
|
|
"""
|
|
if not api_key:
|
|
raise ValueError("gemini api key required")
|
|
# Si la librería oficial está disponible, usarla (maneja internamente los endpoints)
|
|
if genai is not None:
|
|
try:
|
|
genai.configure(api_key=api_key)
|
|
model_obj = genai.GenerativeModel(model)
|
|
# la librería acepta un prompt simple o lista; pedimos texto traducido explícitamente
|
|
prompt = f"Traduce al español el siguiente texto y devuelve solo el texto traducido:\n\n{text}"
|
|
resp = model_obj.generate_content(prompt, generation_config={"max_output_tokens": 1024, "temperature": 0.0})
|
|
# resp.text está disponible en la respuesta wrapper
|
|
if hasattr(resp, "text") and resp.text:
|
|
return resp.text.strip()
|
|
# fallback: revisar candidates
|
|
if hasattr(resp, "candidates") and resp.candidates:
|
|
c = resp.candidates[0]
|
|
if hasattr(c, "content") and hasattr(c.content, "parts"):
|
|
parts = [p.text for p in c.content.parts if getattr(p, "text", None)]
|
|
if parts:
|
|
return "\n".join(parts).strip()
|
|
except Exception as e:
|
|
print(f"Warning: genai library translate failed: {e}")
|
|
|
|
# Fallback HTTP (legacy/path-variant). Intentamos v1 y v1beta2 según disponibilidad.
|
|
for prefix in ("v1", "v1beta2"):
|
|
endpoint = (
|
|
f"https://generativelanguage.googleapis.com/{prefix}/models/{model}:generateContent?key={api_key}"
|
|
)
|
|
body = {
|
|
"prompt": {"text": f"Traduce al español el siguiente texto y devuelve solo el texto traducido:\n\n{text}"},
|
|
"maxOutputTokens": 1024,
|
|
"temperature": 0.0,
|
|
"candidateCount": 1,
|
|
}
|
|
try:
|
|
r = requests.post(endpoint, json=body, timeout=30)
|
|
r.raise_for_status()
|
|
j = r.json()
|
|
# buscar candidatos
|
|
if isinstance(j, dict) and "candidates" in j and isinstance(j["candidates"], list) and j["candidates"]:
|
|
first = j["candidates"][0]
|
|
if isinstance(first, dict):
|
|
if "content" in first and isinstance(first["content"], str):
|
|
return first["content"].strip()
|
|
if "output" in first and isinstance(first["output"], str):
|
|
return first["output"].strip()
|
|
if "content" in first and isinstance(first["content"], list):
|
|
parts = []
|
|
for c in first["content"]:
|
|
if isinstance(c, dict) and isinstance(c.get("text"), str):
|
|
parts.append(c.get("text"))
|
|
if parts:
|
|
return "\n".join(parts).strip()
|
|
for key in ("output_text", "text", "response", "translated_text"):
|
|
if key in j and isinstance(j[key], str):
|
|
return j[key].strip()
|
|
except Exception as e:
|
|
print(f"Warning: GL translate failed ({prefix}): {e}")
|
|
|
|
return text
|
|
|
|
|
|
def translate_srt_file(in_path: str, out_path: str, api_key: str, model: str):
|
|
with open(in_path, "r", encoding="utf-8") as fh:
|
|
subs = list(srt.parse(fh.read()))
|
|
|
|
for i, sub in enumerate(subs, start=1):
|
|
text = sub.content.strip()
|
|
if not text:
|
|
continue
|
|
# llamar a la API
|
|
try:
|
|
translated = translate_text_google_gl(text, api_key, model=model)
|
|
except Exception as e:
|
|
print(f"Warning: translate failed for index {sub.index}: {e}")
|
|
translated = text
|
|
# asignar traducido
|
|
sub.content = translated
|
|
# pequeño delay para no golpear la API demasiado rápido
|
|
time.sleep(0.15)
|
|
print(f"Translated {i}/{len(subs)}")
|
|
|
|
out_s = srt.compose(subs)
|
|
with open(out_path, "w", encoding="utf-8") as fh:
|
|
fh.write(out_s)
|
|
print(f"Wrote translated SRT to: {out_path}")
|
|
|
|
|
|
def main():
|
|
p = argparse.ArgumentParser()
|
|
p.add_argument("--in", dest="in_srt", required=True)
|
|
p.add_argument("--out", dest="out_srt", required=True)
|
|
p.add_argument("--gemini-api-key", default=None)
|
|
p.add_argument("--model", default="gemini-2.5-flash")
|
|
args = p.parse_args()
|
|
|
|
key = args.gemini_api_key or os.environ.get("GEMINI_API_KEY")
|
|
if not key:
|
|
print("Provide --gemini-api-key or set GEMINI_API_KEY env var", flush=True)
|
|
raise SystemExit(2)
|
|
|
|
translate_srt_file(args.in_srt, args.out_srt, key, args.model)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|