- Updated `srt_to_kokoro.py` to provide a CLI entrypoint with argument parsing. - Enhanced error handling and logging for better user feedback. - Introduced a compatibility layer for legacy scripts. - Added configuration handling via `config.toml` for endpoint and API key. - Improved documentation and comments for clarity. Enhance PipelineOrchestrator with in-process transcriber fallback - Implemented `InProcessTranscriber` to handle transcription using multiple strategies. - Added support for `srt_only` flag to return translated SRT without TTS synthesis. - Improved error handling and logging for transcriber initialization. Add installation and usage documentation - Created `INSTALLATION.md` for detailed setup instructions for CPU and GPU environments. - Added `USAGE.md` with practical examples for common use cases and command-line options. - Included a script for automated installation and environment setup. Implement SRT burning utility - Added `burn_srt.py` to facilitate embedding SRT subtitles into video files using ffmpeg. - Provided command-line options for style and codec customization. Update project configuration management - Introduced `config.py` to centralize configuration loading from `config.toml`. - Ensured that environment variables are not read to avoid implicit overrides. Enhance package management with `pyproject.toml` - Added `pyproject.toml` for modern packaging and dependency management. - Defined optional dependencies for CPU and TTS support. Add smoke test fixture for SRT - Created `smoke_test.srt` as a sample subtitle file for testing purposes. Update requirements and setup configurations - Revised `requirements.txt` and `setup.cfg` for better dependency management and clarity. - Included installation instructions for editable mode and local TTS support.
138 lines
4.4 KiB
Python
138 lines
4.4 KiB
Python
"""Small CLI shim for SRT -> Kokoro synthesis.
|
|
|
|
This file provides: parse_srt_file, synth_chunk (thin wrappers) and a
|
|
CLI entrypoint that uses `whisper_project.config` (config.toml) and CLI
|
|
flags. It intentionally does NOT read environment variables.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
import argparse
|
|
import logging
|
|
import sys
|
|
|
|
from whisper_project.infra.kokoro_utils import (
|
|
parse_srt_file as _parse_srt_file,
|
|
synth_chunk as _synth_chunk,
|
|
)
|
|
from whisper_project.infra.kokoro_adapter import KokoroHttpClient
|
|
from whisper_project import config
|
|
|
|
|
|
def parse_srt_file(path: str):
|
|
"""Parse a .srt and return the list of subtitles.
|
|
|
|
Delegates to `whisper_project.infra.kokoro_utils.parse_srt_file`.
|
|
"""
|
|
return _parse_srt_file(path)
|
|
|
|
|
|
def synth_chunk(
|
|
endpoint: str,
|
|
text: str,
|
|
headers: dict,
|
|
payload_template: Any,
|
|
timeout: int = 60,
|
|
) -> bytes:
|
|
"""Send text to the endpoint and return audio bytes.
|
|
|
|
Delegates to `whisper_project.infra.kokoro_utils.synth_chunk`.
|
|
"""
|
|
return _synth_chunk(endpoint, text, headers, payload_template, timeout=timeout)
|
|
|
|
|
|
def synthesize_from_srt(srt_path: str, out_wav: str, endpoint: str = "", api_key: str = ""):
|
|
"""Compatibility layer name used historically by scripts.
|
|
|
|
The canonical implementation lives in `KokoroHttpClient`. Call that class
|
|
method instead when integrating programmatically.
|
|
"""
|
|
raise NotImplementedError(
|
|
"Use KokoroHttpClient.synthesize_from_srt or the infra adapter"
|
|
)
|
|
|
|
|
|
def _build_arg_parser() -> argparse.ArgumentParser:
|
|
p = argparse.ArgumentParser()
|
|
p.add_argument("--srt", required=True, help="Path to input .srt file")
|
|
p.add_argument("--endpoint", required=False, help="Direct synthesis endpoint (optional)")
|
|
p.add_argument(
|
|
"--api-key",
|
|
required=False,
|
|
help=(
|
|
"API key for Authorization header; if omitted the value from"
|
|
" config.toml is used"
|
|
),
|
|
)
|
|
p.add_argument("--voice", default="em_alex")
|
|
p.add_argument("--model", default="model")
|
|
p.add_argument("--out", required=True, help="Output WAV path")
|
|
p.add_argument("--video", required=False, help="Optional original video path to mix or align with")
|
|
p.add_argument("--align", action="store_true", help="Align segments using SRT timestamps")
|
|
p.add_argument("--keep-chunks", action="store_true")
|
|
p.add_argument("--mix-with-original", action="store_true")
|
|
p.add_argument("--mix-background-volume", type=float, default=0.2)
|
|
p.add_argument("--replace-original", action="store_true")
|
|
p.add_argument(
|
|
"--config-mode",
|
|
choices=["defaults", "override-env", "force"],
|
|
default="override-env",
|
|
help=(
|
|
"Configuration precedence: 'defaults' = CLI > TOML; "
|
|
"'override-env' = CLI > TOML; 'force' = TOML > CLI"
|
|
),
|
|
)
|
|
return p
|
|
|
|
|
|
def main() -> None:
|
|
p = _build_arg_parser()
|
|
args = p.parse_args()
|
|
|
|
# Resolve configuration: only CLI flags and config.toml are used.
|
|
kokoro_ep = getattr(args, "endpoint", None)
|
|
kokoro_key = getattr(args, "api_key", None)
|
|
|
|
mode = getattr(args, "config_mode", "defaults")
|
|
if mode in ("defaults", "override-env"):
|
|
# CLI > TOML
|
|
endpoint = kokoro_ep or args.endpoint or config.KOKORO_ENDPOINT
|
|
api_key = kokoro_key or args.api_key or config.KOKORO_API_KEY
|
|
else:
|
|
# force: TOML > CLI
|
|
endpoint = config.KOKORO_ENDPOINT or kokoro_ep or args.endpoint
|
|
api_key = config.KOKORO_API_KEY or kokoro_key or args.api_key
|
|
|
|
if not endpoint:
|
|
logging.getLogger(__name__).error(
|
|
"Please provide --endpoint or set kokoro.endpoint in config.toml"
|
|
)
|
|
sys.exit(2)
|
|
|
|
client = KokoroHttpClient(
|
|
endpoint,
|
|
api_key=api_key,
|
|
voice=args.voice,
|
|
model=args.model,
|
|
)
|
|
|
|
try:
|
|
client.synthesize_from_srt(
|
|
srt_path=args.srt,
|
|
out_wav=args.out,
|
|
video=args.video,
|
|
align=args.align,
|
|
keep_chunks=args.keep_chunks,
|
|
mix_with_original=args.mix_with_original,
|
|
mix_background_volume=args.mix_background_volume,
|
|
)
|
|
logging.getLogger(__name__).info("Output written to: %s", args.out)
|
|
except Exception:
|
|
logging.getLogger(__name__).exception("Error synthesizing from SRT")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|