submaster/whisper_project/infra/gemini_adapter.py

import argparse
import json
import os
import time
from typing import Optional

import requests
import logging

try:
    import srt  # type: ignore
except Exception:
    srt = None

try:
    import google.generativeai as genai  # type: ignore
except Exception:
    genai = None


def translate_text_google_gl(text: str, api_key: str, model: str = "gemini-2.5-flash") -> str:
    if not api_key:
        raise ValueError("gemini api key required")
    if genai is not None:
        try:
            genai.configure(api_key=api_key)
            model_obj = genai.GenerativeModel(model)
            prompt = f"Traduce al español el siguiente texto y devuelve solo el texto traducido:\n\n{text}"
            resp = model_obj.generate_content(prompt, generation_config={"max_output_tokens": 1024, "temperature": 0.0})
            if hasattr(resp, "text") and resp.text:
                return resp.text.strip()
            if hasattr(resp, "candidates") and resp.candidates:
                c = resp.candidates[0]
                if hasattr(c, "content") and hasattr(c.content, "parts"):
                    parts = [p.text for p in c.content.parts if getattr(p, "text", None)]
                    if parts:
                        return "\n".join(parts).strip()
        except Exception:
            logging.getLogger(__name__).warning("genai library translate failed")

    for prefix in ("v1", "v1beta2"):
        endpoint = f"https://generativelanguage.googleapis.com/{prefix}/models/{model}:generateContent?key={api_key}"
        body = {
            "prompt": {"text": f"Traduce al español el siguiente texto y devuelve solo el texto traducido:\n\n{text}"},
            "maxOutputTokens": 1024,
            "temperature": 0.0,
            "candidateCount": 1,
        }
        try:
            r = requests.post(endpoint, json=body, timeout=30)
            r.raise_for_status()
            j = r.json()
            if isinstance(j, dict) and "candidates" in j and isinstance(j["candidates"], list) and j["candidates"]:
                first = j["candidates"][0]
                if isinstance(first, dict):
                    if "content" in first and isinstance(first["content"], str):
                        return first["content"].strip()
                    if "output" in first and isinstance(first["output"], str):
                        return first["output"].strip()
                    if "content" in first and isinstance(first["content"], list):
                        parts = []
                        for c in first["content"]:
                            if isinstance(c, dict) and isinstance(c.get("text"), str):
                                parts.append(c.get("text"))
                        if parts:
                            return "\n".join(parts).strip()
            for key in ("output_text", "text", "response", "translated_text"):
                if key in j and isinstance(j[key], str):
                    return j[key].strip()
        except Exception:
            logging.getLogger(__name__).warning("GL translate failed for prefix %s", prefix)

    return text


def translate_srt_file(in_path: str, out_path: str, api_key: str, model: str):
    if srt is None:
        raise RuntimeError("Dependencia 'srt' no encontrada. Instálela para trabajar con SRT.")

    with open(in_path, "r", encoding="utf-8") as fh:
        subs = list(srt.parse(fh.read()))

    for i, sub in enumerate(subs, start=1):
        text = sub.content.strip()
        if not text:
            continue
        try:
            translated = translate_text_google_gl(text, api_key, model=model)
        except Exception:
            logging.getLogger(__name__).warning("translate failed for index %s", sub.index)
            translated = text
        sub.content = translated
        time.sleep(0.15)

    out_s = srt.compose(subs)
    with open(out_path, "w", encoding="utf-8") as fh:
        fh.write(out_s)


class GeminiTranslator:
    def __init__(self, api_key: Optional[str] = None, model: str = "gemini-2.5-flash"):
        self.api_key = api_key
        self.model = model

    def translate_srt(self, in_srt: str, out_srt: str) -> None:
        key = self.api_key or os.environ.get("GEMINI_API_KEY")
        if not key:
            raise RuntimeError("GEMINI API key required for GeminiTranslator")
        translate_srt_file(in_srt, out_srt, api_key=key, model=self.model)