"""Utility wrapper for yt-dlp calls with robust cookie handling via cookiejar. Provides: - CookieManager: reads cookie string from RedisArchivist and writes a Netscape cookie file that can be passed to yt-dlp via the --cookiefile option (path). - YtDlpClient: base class to perform download/extract calls to yt-dlp with consistent error handling and optional automatic cookie injection. Usage example: from yt_wrap import YtDlpClient client = YtDlpClient(config=config_dict) info, err = client.extract_info('https://www.youtube.com/watch?v=K08TM4OVLyo') """ from __future__ import annotations import logging import os import tempfile import typing as t from http import cookiejar import yt_dlp # Import project-specific RedisArchivist if available; otherwise provide a local stub try: from common.src.ta_redis import RedisArchivist except Exception: class RedisArchivist: """Fallback stub for environments without the project's RedisArchivist. Methods mimic the interface used by CookieManager: get_message_str, set_message, del_message, get_message_dict. These stubs are no-ops and return None/False. """ def __init__(self, *args, **kwargs): pass def get_message_str(self, key: str): return None def set_message(self, key: str, value, expire: int = None, save: bool = False): return False def del_message(self, key: str): return False def get_message_dict(self, key: str): return None logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) class CookieManager: """Manage cookie string storage and provide a cookiefile path for yt-dlp. This writes the Netscape cookie string to a temporary file (Netscape format) which is compatible with `yt-dlp`'s `--cookiefile` option and with `http.cookiejar.MozillaCookieJar`. """ DEFAULT_MESSAGE_KEY = "cookie" def __init__(self, redis_archivist: t.Optional[object] = None): # Accept a RedisArchivist-like object for testability; otherwise try to create one if redis_archivist is not None: self.redis = redis_archivist else: if RedisArchivist is None: self.redis = None else: try: self.redis = RedisArchivist() except Exception: self.redis = None self._temp_files: list[str] = [] def get_cookiefile_path(self, message_key: str | None = None) -> t.Optional[str]: """Return a filesystem path to a Netscape cookie file written from the stored cookie string. If no cookie is available, returns None. """ key = message_key or self.DEFAULT_MESSAGE_KEY cookie_str = None if self.redis is not None and hasattr(self.redis, "get_message_str"): try: cookie_str = self.redis.get_message_str(key) except Exception as exc: logger.debug("CookieManager: error reading from redis: %s", exc) cookie_str = None if not cookie_str: # No cookie stored return None # Ensure cookie string ends with newline cookie_str = cookie_str.strip("\x00") if not cookie_str.endswith("\n"): cookie_str = cookie_str + "\n" # Write to a temp file in the system temp dir tf = tempfile.NamedTemporaryFile(mode="w", delete=False, prefix="yt_cookies_", suffix=".txt") tf.write(cookie_str) tf.flush() tf.close() self._temp_files.append(tf.name) # Validate it's a Netscape cookie file by attempting to load with MozillaCookieJar try: jar = cookiejar.MozillaCookieJar() jar.load(tf.name, ignore_discard=True, ignore_expires=True) except Exception: # It's okay if load fails; yt-dlp expects the netscape format; keep the file anyway logger.debug("CookieManager: written cookie file but couldn't load with MozillaCookieJar") return tf.name def cleanup(self) -> None: """Remove temporary cookie files created by get_cookiefile_path.""" for p in getattr(self, "_temp_files", []): try: os.unlink(p) except Exception: logger.debug("CookieManager: failed to unlink temp cookie file %s", p) self._temp_files = [] class YtDlpClient: """Base client to interact with yt-dlp. - `base_opts` are merged with per-call options. - If `use_redis_cookies` is True, the client will try to fetch a cookiefile path from Redis via `CookieManager` and inject `cookiefile` into options. Methods return tuples like (result, error) where result is data or True/False and error is a string or None. """ DEFAULT_OPTS: dict = { "quiet": True, "socket_timeout": 10, "extractor_retries": 2, "retries": 3, } def __init__(self, base_opts: dict | None = None, use_redis_cookies: bool = True, redis_archivist: t.Optional[object] = None): self.base_opts = dict(self.DEFAULT_OPTS) if base_opts: self.base_opts.update(base_opts) self.use_redis_cookies = use_redis_cookies self.cookie_mgr = CookieManager(redis_archivist) def _build_opts(self, extra: dict | None = None) -> dict: opts = dict(self.base_opts) if extra: opts.update(extra) # If cookie management is enabled, attempt to attach a cookiefile path if self.use_redis_cookies: cookiefile = self.cookie_mgr.get_cookiefile_path() if cookiefile: opts["cookiefile"] = cookiefile return opts def extract_info(self, url: str, extra_opts: dict | None = None) -> tuple[dict | None, str | None]: """Extract info for a url using yt-dlp.extract_info. Returns (info_dict, error_str). If successful, error_str is None. """ opts = self._build_opts(extra_opts) try: with yt_dlp.YoutubeDL(opts) as ydl: info = ydl.extract_info(url, download=False) except cookiejar.LoadError as exc: logger.error("Cookie load error: %s", exc) return None, f"cookie_load_error: {exc}" except yt_dlp.utils.ExtractorError as exc: logger.warning("ExtractorError for %s: %s", url, exc) return None, str(exc) except yt_dlp.utils.DownloadError as exc: msg = str(exc) logger.warning("DownloadError for %s: %s", url, msg) if "Temporary failure in name resolution" in msg: raise ConnectionError("lost the internet, abort!") from exc # Detect rate limiting if "HTTP Error 429" in msg or "too many requests" in msg.lower(): return None, "HTTP 429: rate limited" return None, msg except Exception as exc: # pragma: no cover - defensive logger.exception("Unexpected error in extract_info: %s", exc) return None, str(exc) finally: # Clean up temp cookie files after the call try: self.cookie_mgr.cleanup() except Exception: pass return info, None def download(self, url: str, extra_opts: dict | None = None) -> tuple[bool, str | None]: """Invoke ydl.download for the provided url. Returns (success, error_message). """ opts = self._build_opts(extra_opts) try: with yt_dlp.YoutubeDL(opts) as ydl: ydl.download([url]) except yt_dlp.utils.DownloadError as exc: msg = str(exc) logger.warning("DownloadError while downloading %s: %s", url, msg) if "Temporary failure in name resolution" in msg: raise ConnectionError("lost the internet, abort!") from exc if "HTTP Error 429" in msg or "too many requests" in msg.lower(): return False, "HTTP 429: rate limited" return False, msg except Exception as exc: logger.exception("Unexpected error during download: %s", exc) return False, str(exc) finally: try: self.cookie_mgr.cleanup() except Exception: pass return True, None # If running as a script, show a tiny demo (no network calls are performed here) if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) client = YtDlpClient() print(client._build_opts())