- Updated `srt_to_kokoro.py` to provide a CLI entrypoint with argument parsing. - Enhanced error handling and logging for better user feedback. - Introduced a compatibility layer for legacy scripts. - Added configuration handling via `config.toml` for endpoint and API key. - Improved documentation and comments for clarity. Enhance PipelineOrchestrator with in-process transcriber fallback - Implemented `InProcessTranscriber` to handle transcription using multiple strategies. - Added support for `srt_only` flag to return translated SRT without TTS synthesis. - Improved error handling and logging for transcriber initialization. Add installation and usage documentation - Created `INSTALLATION.md` for detailed setup instructions for CPU and GPU environments. - Added `USAGE.md` with practical examples for common use cases and command-line options. - Included a script for automated installation and environment setup. Implement SRT burning utility - Added `burn_srt.py` to facilitate embedding SRT subtitles into video files using ffmpeg. - Provided command-line options for style and codec customization. Update project configuration management - Introduced `config.py` to centralize configuration loading from `config.toml`. - Ensured that environment variables are not read to avoid implicit overrides. Enhance package management with `pyproject.toml` - Added `pyproject.toml` for modern packaging and dependency management. - Defined optional dependencies for CPU and TTS support. Add smoke test fixture for SRT - Created `smoke_test.srt` as a sample subtitle file for testing purposes. Update requirements and setup configurations - Revised `requirements.txt` and `setup.cfg` for better dependency management and clarity. - Included installation instructions for editable mode and local TTS support.
43 lines
1.1 KiB
TOML
43 lines
1.1 KiB
TOML
[build-system]
|
|
requires = ["setuptools>=61.0", "wheel"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "whisper_project"
|
|
version = "0.1.0"
|
|
description = "Canalización multimedia: extracción, transcripción, traducción y TTS (Kokoro/Coqui)."
|
|
readme = "README.md"
|
|
requires-python = ">=3.11"
|
|
authors = [ { name = "Nextream" } ]
|
|
license = { text = "MIT" }
|
|
|
|
dependencies = [
|
|
"numpy==1.26.4",
|
|
"ffmpeg-python==0.4.0",
|
|
"faster-whisper==1.2.0",
|
|
"transformers==4.34.0",
|
|
"tokenizers==0.13.3",
|
|
"sentencepiece==0.1.99",
|
|
"huggingface-hub==0.16.4",
|
|
"sacremoses==0.0.53",
|
|
"ctranslate2==3.18.0",
|
|
"onnxruntime==1.15.1",
|
|
"requests==2.31.0",
|
|
"tqdm==4.66.1",
|
|
"coloredlogs==15.0.1",
|
|
"humanfriendly==10.0",
|
|
"flatbuffers==23.5.26",
|
|
"av==10.0.0",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
cpu = [ "torch==2.2.2", "onnxruntime==1.15.1" ]
|
|
tts = [ "TTS==0.13.0", "soundfile==0.12.1", "librosa==0.10.0.post2", "pyttsx3==2.90" ]
|
|
dev = [ "pytest", "pre-commit", "black", "ruff" ]
|
|
|
|
[project.scripts]
|
|
whisper_project = "whisper_project.main:main"
|
|
[build-system]
|
|
requires = ["setuptools>=61.0", "wheel"]
|
|
build-backend = "setuptools.build_meta"
|