233 lines
8.5 KiB
Python
233 lines
8.5 KiB
Python
"""Utility wrapper for yt-dlp calls with robust cookie handling via cookiejar.
|
|
|
|
Provides:
|
|
- CookieManager: reads cookie string from RedisArchivist and writes a Netscape cookie file
|
|
that can be passed to yt-dlp via the --cookiefile option (path).
|
|
- YtDlpClient: base class to perform download/extract calls to yt-dlp with consistent
|
|
error handling and optional automatic cookie injection.
|
|
|
|
Usage example:
|
|
from yt_wrap import YtDlpClient
|
|
client = YtDlpClient(config=config_dict)
|
|
info, err = client.extract_info('https://www.youtube.com/watch?v=K08TM4OVLyo')
|
|
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import tempfile
|
|
import typing as t
|
|
from http import cookiejar
|
|
|
|
import yt_dlp
|
|
|
|
# Import project-specific RedisArchivist if available; otherwise provide a local stub
|
|
try:
|
|
from common.src.ta_redis import RedisArchivist
|
|
except Exception:
|
|
class RedisArchivist:
|
|
"""Fallback stub for environments without the project's RedisArchivist.
|
|
|
|
Methods mimic the interface used by CookieManager: get_message_str, set_message,
|
|
del_message, get_message_dict. These stubs are no-ops and return None/False.
|
|
"""
|
|
def __init__(self, *args, **kwargs):
|
|
pass
|
|
|
|
def get_message_str(self, key: str):
|
|
return None
|
|
|
|
def set_message(self, key: str, value, expire: int = None, save: bool = False):
|
|
return False
|
|
|
|
def del_message(self, key: str):
|
|
return False
|
|
|
|
def get_message_dict(self, key: str):
|
|
return None
|
|
|
|
logger = logging.getLogger(__name__)
|
|
logger.addHandler(logging.NullHandler())
|
|
|
|
|
|
class CookieManager:
|
|
"""Manage cookie string storage and provide a cookiefile path for yt-dlp.
|
|
|
|
This writes the Netscape cookie string to a temporary file (Netscape format)
|
|
which is compatible with `yt-dlp`'s `--cookiefile` option and with
|
|
`http.cookiejar.MozillaCookieJar`.
|
|
"""
|
|
|
|
DEFAULT_MESSAGE_KEY = "cookie"
|
|
|
|
def __init__(self, redis_archivist: t.Optional[object] = None):
|
|
# Accept a RedisArchivist-like object for testability; otherwise try to create one
|
|
if redis_archivist is not None:
|
|
self.redis = redis_archivist
|
|
else:
|
|
if RedisArchivist is None:
|
|
self.redis = None
|
|
else:
|
|
try:
|
|
self.redis = RedisArchivist()
|
|
except Exception:
|
|
self.redis = None
|
|
self._temp_files: list[str] = []
|
|
|
|
def get_cookiefile_path(self, message_key: str | None = None) -> t.Optional[str]:
|
|
"""Return a filesystem path to a Netscape cookie file written from the stored cookie string.
|
|
|
|
If no cookie is available, returns None.
|
|
"""
|
|
key = message_key or self.DEFAULT_MESSAGE_KEY
|
|
cookie_str = None
|
|
if self.redis is not None and hasattr(self.redis, "get_message_str"):
|
|
try:
|
|
cookie_str = self.redis.get_message_str(key)
|
|
except Exception as exc:
|
|
logger.debug("CookieManager: error reading from redis: %s", exc)
|
|
cookie_str = None
|
|
|
|
if not cookie_str:
|
|
# No cookie stored
|
|
return None
|
|
|
|
# Ensure cookie string ends with newline
|
|
cookie_str = cookie_str.strip("\x00")
|
|
if not cookie_str.endswith("\n"):
|
|
cookie_str = cookie_str + "\n"
|
|
|
|
# Write to a temp file in the system temp dir
|
|
tf = tempfile.NamedTemporaryFile(mode="w", delete=False, prefix="yt_cookies_", suffix=".txt")
|
|
tf.write(cookie_str)
|
|
tf.flush()
|
|
tf.close()
|
|
self._temp_files.append(tf.name)
|
|
|
|
# Validate it's a Netscape cookie file by attempting to load with MozillaCookieJar
|
|
try:
|
|
jar = cookiejar.MozillaCookieJar()
|
|
jar.load(tf.name, ignore_discard=True, ignore_expires=True)
|
|
except Exception:
|
|
# It's okay if load fails; yt-dlp expects the netscape format; keep the file anyway
|
|
logger.debug("CookieManager: written cookie file but couldn't load with MozillaCookieJar")
|
|
|
|
return tf.name
|
|
|
|
def cleanup(self) -> None:
|
|
"""Remove temporary cookie files created by get_cookiefile_path."""
|
|
for p in getattr(self, "_temp_files", []):
|
|
try:
|
|
os.unlink(p)
|
|
except Exception:
|
|
logger.debug("CookieManager: failed to unlink temp cookie file %s", p)
|
|
self._temp_files = []
|
|
|
|
|
|
class YtDlpClient:
|
|
"""Base client to interact with yt-dlp.
|
|
|
|
- `base_opts` are merged with per-call options.
|
|
- If `use_redis_cookies` is True, the client will try to fetch a cookiefile
|
|
path from Redis via `CookieManager` and inject `cookiefile` into options.
|
|
|
|
Methods return tuples like (result, error) where result is data or True/False and
|
|
error is a string or None.
|
|
"""
|
|
|
|
DEFAULT_OPTS: dict = {
|
|
"quiet": True,
|
|
"socket_timeout": 10,
|
|
"extractor_retries": 2,
|
|
"retries": 3,
|
|
}
|
|
|
|
def __init__(self, base_opts: dict | None = None, use_redis_cookies: bool = True, redis_archivist: t.Optional[object] = None):
|
|
self.base_opts = dict(self.DEFAULT_OPTS)
|
|
if base_opts:
|
|
self.base_opts.update(base_opts)
|
|
self.use_redis_cookies = use_redis_cookies
|
|
self.cookie_mgr = CookieManager(redis_archivist)
|
|
|
|
def _build_opts(self, extra: dict | None = None) -> dict:
|
|
opts = dict(self.base_opts)
|
|
if extra:
|
|
opts.update(extra)
|
|
|
|
# If cookie management is enabled, attempt to attach a cookiefile path
|
|
if self.use_redis_cookies:
|
|
cookiefile = self.cookie_mgr.get_cookiefile_path()
|
|
if cookiefile:
|
|
opts["cookiefile"] = cookiefile
|
|
return opts
|
|
|
|
def extract_info(self, url: str, extra_opts: dict | None = None) -> tuple[dict | None, str | None]:
|
|
"""Extract info for a url using yt-dlp.extract_info.
|
|
|
|
Returns (info_dict, error_str). If successful, error_str is None.
|
|
"""
|
|
opts = self._build_opts(extra_opts)
|
|
try:
|
|
with yt_dlp.YoutubeDL(opts) as ydl:
|
|
info = ydl.extract_info(url, download=False)
|
|
except cookiejar.LoadError as exc:
|
|
logger.error("Cookie load error: %s", exc)
|
|
return None, f"cookie_load_error: {exc}"
|
|
except yt_dlp.utils.ExtractorError as exc:
|
|
logger.warning("ExtractorError for %s: %s", url, exc)
|
|
return None, str(exc)
|
|
except yt_dlp.utils.DownloadError as exc:
|
|
msg = str(exc)
|
|
logger.warning("DownloadError for %s: %s", url, msg)
|
|
if "Temporary failure in name resolution" in msg:
|
|
raise ConnectionError("lost the internet, abort!") from exc
|
|
# Detect rate limiting
|
|
if "HTTP Error 429" in msg or "too many requests" in msg.lower():
|
|
return None, "HTTP 429: rate limited"
|
|
return None, msg
|
|
except Exception as exc: # pragma: no cover - defensive
|
|
logger.exception("Unexpected error in extract_info: %s", exc)
|
|
return None, str(exc)
|
|
finally:
|
|
# Clean up temp cookie files after the call
|
|
try:
|
|
self.cookie_mgr.cleanup()
|
|
except Exception:
|
|
pass
|
|
|
|
return info, None
|
|
|
|
def download(self, url: str, extra_opts: dict | None = None) -> tuple[bool, str | None]:
|
|
"""Invoke ydl.download for the provided url. Returns (success, error_message).
|
|
"""
|
|
opts = self._build_opts(extra_opts)
|
|
try:
|
|
with yt_dlp.YoutubeDL(opts) as ydl:
|
|
ydl.download([url])
|
|
except yt_dlp.utils.DownloadError as exc:
|
|
msg = str(exc)
|
|
logger.warning("DownloadError while downloading %s: %s", url, msg)
|
|
if "Temporary failure in name resolution" in msg:
|
|
raise ConnectionError("lost the internet, abort!") from exc
|
|
if "HTTP Error 429" in msg or "too many requests" in msg.lower():
|
|
return False, "HTTP 429: rate limited"
|
|
return False, msg
|
|
except Exception as exc:
|
|
logger.exception("Unexpected error during download: %s", exc)
|
|
return False, str(exc)
|
|
finally:
|
|
try:
|
|
self.cookie_mgr.cleanup()
|
|
except Exception:
|
|
pass
|
|
|
|
return True, None
|
|
|
|
|
|
# If running as a script, show a tiny demo (no network calls are performed here)
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
client = YtDlpClient()
|
|
print(client._build_opts())
|