submaster/whisper_project/translate_srt_argos.py

85 lines
2.7 KiB
Python

#!/usr/bin/env python3
"""translate_srt_argos.py
Traduce un .srt localmente usando Argos Translate (más ligero que transformers/torch).
Instala automáticamente el paquete en caso de no existir.
Uso:
source .venv/bin/activate
python3 whisper_project/translate_srt_argos.py --in in.srt --out out.srt
Requisitos: argostranslate (el script intentará instalarlo si no está presente)
"""
import argparse
import srt
import tempfile
import os
try:
from argostranslate import package, translate
except Exception:
raise
def ensure_en_es_package():
installed = package.get_installed_packages()
for p in installed:
if p.from_code == 'en' and p.to_code == 'es':
return True
# Si no está instalado, buscar disponible y descargar
avail = package.get_available_packages()
for p in avail:
if p.from_code == 'en' and p.to_code == 'es':
print('Descargando paquete Argos en->es...')
download_path = tempfile.mktemp(suffix='.zip')
try:
import requests
with requests.get(p.download_url, stream=True, timeout=60) as r:
r.raise_for_status()
with open(download_path, 'wb') as fh:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
fh.write(chunk)
# instalar desde el zip descargado
package.install_from_path(download_path)
return True
except Exception as e:
print(f"Error descargando/instalando paquete Argos: {e}")
finally:
try:
if os.path.exists(download_path):
os.remove(download_path)
except Exception:
pass
return False
def translate_srt(in_path: str, out_path: str):
with open(in_path, 'r', encoding='utf-8') as fh:
subs = list(srt.parse(fh.read()))
# Asegurar paquete en->es
ok = ensure_en_es_package()
if not ok:
raise SystemExit('No se encontró paquete Argos en->es y no se pudo descargar')
for i, sub in enumerate(subs, start=1):
text = sub.content.strip()
if not text:
continue
tr = translate.translate(text, 'en', 'es')
sub.content = tr
print(f'Translated {i}/{len(subs)}')
with open(out_path, 'w', encoding='utf-8') as fh:
fh.write(srt.compose(subs))
print(f'Wrote translated SRT to: {out_path}')
if __name__ == '__main__':
p = argparse.ArgumentParser()
p.add_argument('--in', dest='in_srt', required=True)
p.add_argument('--out', dest='out_srt', required=True)
args = p.parse_args()
translate_srt(args.in_srt, args.out_srt)