submaster/tests/test_marian_adapter.py

import tempfile
import os
from whisper_project.infra import marian_adapter

SRT_SAMPLE = """1
00:00:00,000 --> 00:00:01,000
Hello world

2
00:00:01,500 --> 00:00:02,500
Second line
"""


def test_translate_srt_with_fake_translator():
    # Crear archivos temporales
    td = tempfile.mkdtemp(prefix="test_marian_")
    in_path = os.path.join(td, "in.srt")
    out_path = os.path.join(td, "out.srt")

    with open(in_path, "w", encoding="utf-8") as f:
        f.write(SRT_SAMPLE)

    # Traductor simulado: upper-case para validar el pipeline sin dependencias
    def fake_translator(texts):
        return [t.upper() for t in texts]

    marian_adapter.translate_srt(in_path, out_path, translator=fake_translator)

    assert os.path.exists(out_path)
    with open(out_path, "r", encoding="utf-8") as f:
        data = f.read()

    assert "HELLO WORLD" in data
    assert "SECOND LINE" in data


def test_marian_translator_class_api():
    td = tempfile.mkdtemp(prefix="test_marian2_")
    in_path = os.path.join(td, "in2.srt")
    out_path = os.path.join(td, "out2.srt")
    with open(in_path, "w", encoding="utf-8") as f:
        f.write(SRT_SAMPLE)

    t = marian_adapter.MarianTranslator()
    t.translate_srt(in_path, out_path, translator=lambda texts: [s.replace("Hello", "Hola") for s in texts])

    with open(out_path, "r", encoding="utf-8") as f:
        data = f.read()

    assert "Hola world" in data or "Hola" in data