submaster/whisper_project/infra/argos_adapter.py

96 lines
3.1 KiB
Python

import tempfile
import os
from typing import Optional
def _ensure_argos_package():
try:
from argostranslate import package
installed = package.get_installed_packages()
for p in installed:
if p.from_code == "en" and p.to_code == "es":
return True
avail = package.get_available_packages()
for p in avail:
if p.from_code == "en" and p.to_code == "es":
return p
except Exception:
return None
def translate_srt_argos_impl(in_path: str, out_path: str) -> None:
"""Implementación interna que traduce SRT usando argostranslate si está disponible.
Esta función intenta usar argostranslate si está instalada; si no, levanta una
excepción para indicar que la dependencia no está disponible.
"""
try:
import srt # type: ignore
except Exception:
raise RuntimeError("Dependencia 'srt' no encontrada. Instálela para trabajar con SRT.")
try:
from argostranslate import package, translate
except Exception as e:
raise RuntimeError("argostranslate no disponible: instale 'argostranslate' para usar este adaptador") from e
# Asegurar paquete en->es
ok = False
installed = package.get_installed_packages()
for p in installed:
if p.from_code == "en" and p.to_code == "es":
ok = True
break
if not ok:
# intentar descargar e instalar si existe
avail = package.get_available_packages()
for p in avail:
if p.from_code == "en" and p.to_code == "es":
# intentar descargar
download_path = tempfile.mktemp(suffix=".zip")
try:
import requests
with requests.get(p.download_url, stream=True, timeout=60) as r:
r.raise_for_status()
with open(download_path, "wb") as fh:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
fh.write(chunk)
package.install_from_path(download_path)
ok = True
finally:
try:
if os.path.exists(download_path):
os.remove(download_path)
except Exception:
pass
break
if not ok:
raise RuntimeError("No se pudo encontrar/instalar paquete Argos en->es")
with open(in_path, "r", encoding="utf-8") as fh:
subs = list(srt.parse(fh.read()))
for i, sub in enumerate(subs, start=1):
text = sub.content.strip()
if not text:
continue
tr = translate.translate(text, "en", "es")
sub.content = tr
with open(out_path, "w", encoding="utf-8") as fh:
fh.write(srt.compose(subs))
class ArgosTranslator:
"""Adapter que expone la API translate_srt(in, out)."""
def __init__(self):
pass
def translate_srt(self, in_srt: str, out_srt: str) -> None:
translate_srt_argos_impl(in_srt, out_srt)