#!/usr/bin/env python3 """playwright_extract_m3u8.py Abre una página de YouTube con Playwright y captura la primera URL m3u8/HLS visible en las peticiones de red. También puede exportar cookies al formato Netscape para usarlas con yt-dlp/tu API. Uso: python3 tools/playwright_extract_m3u8.py --video https://www.youtube.com/watch?v=ID [--profile /path/to/profile] [--headless] Requisitos (host): pip install playwright python -m playwright install Notas: - Recomiendo ejecutarlo en el host (no en el contenedor) para usar el perfil de Chrome y para que Playwright pueda manejar la interfaz gráfica si necesitas login/manual. - Si pasas --profile, se lanzará una sesión persistente usando ese directorio (útil para usar tu sesión de Chrome ya logueada). Si dejas vacío, se usa un contexto limpio. """ import argparse import os import json import time from pathlib import Path try: from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout except Exception as e: print("playwright no está instalado. Instala con: pip install playwright && python -m playwright install") raise def write_netscape_cookie_file(cookies, target_path): # cookies: list of dicts like Playwright provides lines = ["# Netscape HTTP Cookie File"] for c in cookies: domain = c.get("domain", "") flag = "TRUE" if domain.startswith('.') else "FALSE" path = c.get("path", "/") secure = "TRUE" if c.get("secure") else "FALSE" expires = str(int(c.get("expires", 0))) if c.get("expires") else "0" name = c.get("name", "") value = c.get("value", "") lines.append("\t".join([domain, flag, path, secure, expires, name, value])) Path(target_path).parent.mkdir(parents=True, exist_ok=True) with open(target_path, "w", encoding="utf-8") as fh: fh.write("\n".join(lines) + "\n") def extract_m3u8(video_url: str, profile: str | None, headless: bool, timeout: int = 45, save_cookies: bool = True): result = {"m3u8_urls": [], "cookies_file": None, "errors": []} data_dir = Path.cwd() / "data" data_dir.mkdir(exist_ok=True) target_cookies = str(data_dir / "cookies.txt") with sync_playwright() as p: # Usar Chromium para mejor compatibilidad con Chrome profile browser_type = p.chromium # establecer User-Agent a uno real para simular navegador ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" extra_headers = {"Accept-Language": "en-US,en;q=0.9", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"} launch_args = ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] if profile: # persistent context uses a profile dir (user data dir) user_data_dir = profile # avoid passing user_agent due to some Playwright builds missing API; set headers only context = browser_type.launch_persistent_context(user_data_dir=user_data_dir, headless=headless, extra_http_headers=extra_headers, args=launch_args) else: # pass common args to help in container environments browser = browser_type.launch(headless=headless, args=launch_args) # do not pass user_agent param; rely on browser default and headers context = browser.new_context(extra_http_headers=extra_headers) # debug info try: print(f"[playwright] started browser headless={headless} profile={'yes' if profile else 'no'}") except Exception: pass page = context.new_page() collected = set() def on_response(resp): try: url = resp.url # heurística: m3u8 en URL o content-type de respuesta if ".m3u8" in url.lower(): collected.add(url) else: ct = resp.headers.get("content-type", "") if "application/vnd.apple.mpegurl" in ct or "vnd.apple.mpegurl" in ct or "application/x-mpegURL" in ct: collected.add(url) except Exception: pass page.on("response", on_response) try: page.goto(video_url, timeout=timeout * 1000) # esperar un poco para que las peticiones de manifest se disparen wait_seconds = 6 for i in range(wait_seconds): time.sleep(1) # si encontramos algo temprano, romper if collected: break # Si no encontramos m3u8, intentar forzar la apertura del player y realizar scroll if not collected: try: # click play page.evaluate("() => { const v = document.querySelector('video'); if (v) v.play(); }") except Exception: pass # esperar más time.sleep(3) # recopilar URLs result_urls = list(collected) # desduplicar y ordenar result_urls = sorted(set(result_urls)) result['m3u8_urls'] = result_urls # guardar cookies si se pidió if save_cookies: try: cookies = context.cookies() write_netscape_cookie_file(cookies, target_cookies) result['cookies_file'] = target_cookies except Exception as e: result['errors'].append(f"cookie_export_error:{e}") except PWTimeout as e: result['errors'].append(f"page_timeout: {e}") except Exception as e: import traceback result['errors'].append(traceback.format_exc()) finally: # intentar cerrar context y browser si existen try: if 'context' in locals() and context: try: context.close() except Exception: pass except Exception: pass try: if 'browser' in locals() and browser: try: browser.close() except Exception: pass except Exception: pass return result if __name__ == '__main__': parser = argparse.ArgumentParser(description='Playwright m3u8 extractor for YouTube') parser.add_argument('--video', required=True, help='Video URL or ID (e.g. https://www.youtube.com/watch?v=ID)') parser.add_argument('--profile', default='', help='Path to browser profile (user data dir) to reuse logged session') parser.add_argument('--headless', action='store_true', help='Run headless') parser.add_argument('--timeout', type=int, default=45, help='Timeout for page load (seconds)') parser.add_argument('--no-cookies', dest='save_cookies', action='store_false', help='Don\'t save cookies to ./data/cookies.txt') args = parser.parse_args() video = args.video if len(video) == 11 and not video.startswith('http'): video = f'https://www.youtube.com/watch?v={video}' res = extract_m3u8(video, profile=args.profile or None, headless=args.headless, timeout=args.timeout, save_cookies=args.save_cookies) print(json.dumps(res, indent=2, ensure_ascii=False))