submaster/tests/test_marian_adapter.py

52 lines
1.4 KiB
Python

import tempfile
import os
from whisper_project.infra import marian_adapter
SRT_SAMPLE = """1
00:00:00,000 --> 00:00:01,000
Hello world
2
00:00:01,500 --> 00:00:02,500
Second line
"""
def test_translate_srt_with_fake_translator():
# Crear archivos temporales
td = tempfile.mkdtemp(prefix="test_marian_")
in_path = os.path.join(td, "in.srt")
out_path = os.path.join(td, "out.srt")
with open(in_path, "w", encoding="utf-8") as f:
f.write(SRT_SAMPLE)
# Traductor simulado: upper-case para validar el pipeline sin dependencias
def fake_translator(texts):
return [t.upper() for t in texts]
marian_adapter.translate_srt(in_path, out_path, translator=fake_translator)
assert os.path.exists(out_path)
with open(out_path, "r", encoding="utf-8") as f:
data = f.read()
assert "HELLO WORLD" in data
assert "SECOND LINE" in data
def test_marian_translator_class_api():
td = tempfile.mkdtemp(prefix="test_marian2_")
in_path = os.path.join(td, "in2.srt")
out_path = os.path.join(td, "out2.srt")
with open(in_path, "w", encoding="utf-8") as f:
f.write(SRT_SAMPLE)
t = marian_adapter.MarianTranslator()
t.translate_srt(in_path, out_path, translator=lambda texts: [s.replace("Hello", "Hola") for s in texts])
with open(out_path, "r", encoding="utf-8") as f:
data = f.read()
assert "Hola world" in data or "Hola" in data