TubeScript-API/yt_wrap.py

233 lines
8.5 KiB
Python

"""Utility wrapper for yt-dlp calls with robust cookie handling via cookiejar.
Provides:
- CookieManager: reads cookie string from RedisArchivist and writes a Netscape cookie file
that can be passed to yt-dlp via the --cookiefile option (path).
- YtDlpClient: base class to perform download/extract calls to yt-dlp with consistent
error handling and optional automatic cookie injection.
Usage example:
from yt_wrap import YtDlpClient
client = YtDlpClient(config=config_dict)
info, err = client.extract_info('https://www.youtube.com/watch?v=K08TM4OVLyo')
"""
from __future__ import annotations
import logging
import os
import tempfile
import typing as t
from http import cookiejar
import yt_dlp
# Import project-specific RedisArchivist if available; otherwise provide a local stub
try:
from common.src.ta_redis import RedisArchivist
except Exception:
class RedisArchivist:
"""Fallback stub for environments without the project's RedisArchivist.
Methods mimic the interface used by CookieManager: get_message_str, set_message,
del_message, get_message_dict. These stubs are no-ops and return None/False.
"""
def __init__(self, *args, **kwargs):
pass
def get_message_str(self, key: str):
return None
def set_message(self, key: str, value, expire: int = None, save: bool = False):
return False
def del_message(self, key: str):
return False
def get_message_dict(self, key: str):
return None
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
class CookieManager:
"""Manage cookie string storage and provide a cookiefile path for yt-dlp.
This writes the Netscape cookie string to a temporary file (Netscape format)
which is compatible with `yt-dlp`'s `--cookiefile` option and with
`http.cookiejar.MozillaCookieJar`.
"""
DEFAULT_MESSAGE_KEY = "cookie"
def __init__(self, redis_archivist: t.Optional[object] = None):
# Accept a RedisArchivist-like object for testability; otherwise try to create one
if redis_archivist is not None:
self.redis = redis_archivist
else:
if RedisArchivist is None:
self.redis = None
else:
try:
self.redis = RedisArchivist()
except Exception:
self.redis = None
self._temp_files: list[str] = []
def get_cookiefile_path(self, message_key: str | None = None) -> t.Optional[str]:
"""Return a filesystem path to a Netscape cookie file written from the stored cookie string.
If no cookie is available, returns None.
"""
key = message_key or self.DEFAULT_MESSAGE_KEY
cookie_str = None
if self.redis is not None and hasattr(self.redis, "get_message_str"):
try:
cookie_str = self.redis.get_message_str(key)
except Exception as exc:
logger.debug("CookieManager: error reading from redis: %s", exc)
cookie_str = None
if not cookie_str:
# No cookie stored
return None
# Ensure cookie string ends with newline
cookie_str = cookie_str.strip("\x00")
if not cookie_str.endswith("\n"):
cookie_str = cookie_str + "\n"
# Write to a temp file in the system temp dir
tf = tempfile.NamedTemporaryFile(mode="w", delete=False, prefix="yt_cookies_", suffix=".txt")
tf.write(cookie_str)
tf.flush()
tf.close()
self._temp_files.append(tf.name)
# Validate it's a Netscape cookie file by attempting to load with MozillaCookieJar
try:
jar = cookiejar.MozillaCookieJar()
jar.load(tf.name, ignore_discard=True, ignore_expires=True)
except Exception:
# It's okay if load fails; yt-dlp expects the netscape format; keep the file anyway
logger.debug("CookieManager: written cookie file but couldn't load with MozillaCookieJar")
return tf.name
def cleanup(self) -> None:
"""Remove temporary cookie files created by get_cookiefile_path."""
for p in getattr(self, "_temp_files", []):
try:
os.unlink(p)
except Exception:
logger.debug("CookieManager: failed to unlink temp cookie file %s", p)
self._temp_files = []
class YtDlpClient:
"""Base client to interact with yt-dlp.
- `base_opts` are merged with per-call options.
- If `use_redis_cookies` is True, the client will try to fetch a cookiefile
path from Redis via `CookieManager` and inject `cookiefile` into options.
Methods return tuples like (result, error) where result is data or True/False and
error is a string or None.
"""
DEFAULT_OPTS: dict = {
"quiet": True,
"socket_timeout": 10,
"extractor_retries": 2,
"retries": 3,
}
def __init__(self, base_opts: dict | None = None, use_redis_cookies: bool = True, redis_archivist: t.Optional[object] = None):
self.base_opts = dict(self.DEFAULT_OPTS)
if base_opts:
self.base_opts.update(base_opts)
self.use_redis_cookies = use_redis_cookies
self.cookie_mgr = CookieManager(redis_archivist)
def _build_opts(self, extra: dict | None = None) -> dict:
opts = dict(self.base_opts)
if extra:
opts.update(extra)
# If cookie management is enabled, attempt to attach a cookiefile path
if self.use_redis_cookies:
cookiefile = self.cookie_mgr.get_cookiefile_path()
if cookiefile:
opts["cookiefile"] = cookiefile
return opts
def extract_info(self, url: str, extra_opts: dict | None = None) -> tuple[dict | None, str | None]:
"""Extract info for a url using yt-dlp.extract_info.
Returns (info_dict, error_str). If successful, error_str is None.
"""
opts = self._build_opts(extra_opts)
try:
with yt_dlp.YoutubeDL(opts) as ydl:
info = ydl.extract_info(url, download=False)
except cookiejar.LoadError as exc:
logger.error("Cookie load error: %s", exc)
return None, f"cookie_load_error: {exc}"
except yt_dlp.utils.ExtractorError as exc:
logger.warning("ExtractorError for %s: %s", url, exc)
return None, str(exc)
except yt_dlp.utils.DownloadError as exc:
msg = str(exc)
logger.warning("DownloadError for %s: %s", url, msg)
if "Temporary failure in name resolution" in msg:
raise ConnectionError("lost the internet, abort!") from exc
# Detect rate limiting
if "HTTP Error 429" in msg or "too many requests" in msg.lower():
return None, "HTTP 429: rate limited"
return None, msg
except Exception as exc: # pragma: no cover - defensive
logger.exception("Unexpected error in extract_info: %s", exc)
return None, str(exc)
finally:
# Clean up temp cookie files after the call
try:
self.cookie_mgr.cleanup()
except Exception:
pass
return info, None
def download(self, url: str, extra_opts: dict | None = None) -> tuple[bool, str | None]:
"""Invoke ydl.download for the provided url. Returns (success, error_message).
"""
opts = self._build_opts(extra_opts)
try:
with yt_dlp.YoutubeDL(opts) as ydl:
ydl.download([url])
except yt_dlp.utils.DownloadError as exc:
msg = str(exc)
logger.warning("DownloadError while downloading %s: %s", url, msg)
if "Temporary failure in name resolution" in msg:
raise ConnectionError("lost the internet, abort!") from exc
if "HTTP Error 429" in msg or "too many requests" in msg.lower():
return False, "HTTP 429: rate limited"
return False, msg
except Exception as exc:
logger.exception("Unexpected error during download: %s", exc)
return False, str(exc)
finally:
try:
self.cookie_mgr.cleanup()
except Exception:
pass
return True, None
# If running as a script, show a tiny demo (no network calls are performed here)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
client = YtDlpClient()
print(client._build_opts())