Add Playwright tools for extracting M3U8 URLs and proxy management

- Introduced `playwright_extract_m3u8.py` to extract M3U8 URLs from YouTube videos using Playwright.
- Added `README_PLAYWRIGHT.md` for usage instructions and requirements.
- Created `expand_and_test_proxies.py` to expand user-provided proxies and test their validity.
- Implemented `generate_proxy_whitelist.py` to generate a whitelist of working proxies based on testing results.
- Added sample proxy files: `user_proxies.txt` for user-defined proxies and `proxies_sample.txt` as a template.
- Generated `expanded_proxies.txt`, `whitelist.json`, and `whitelist.txt` for storing expanded and valid proxies.
- Included error handling and logging for proxy testing results.
This commit is contained in:
Cesar Mendivil 2026-03-17 00:29:51 -07:00
parent c9f8c9290b
commit 2923510c51
14 changed files with 1728 additions and 176 deletions

View File

@ -3,8 +3,8 @@ FROM python:3.11-slim
ENV PYTHONUNBUFFERED=1
# Instalar ffmpeg, Node.js (LTS via NodeSource) y herramientas necesarias
# Node.js + yt-dlp-utils son requeridos para resolver el n-challenge y signature de YouTube
# Instalar ffmpeg, Node.js 20 LTS y herramientas necesarias
# Node.js es requerido por yt-dlp --js-runtimes para resolver n-challenge/signature de YouTube
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
ffmpeg \
@ -13,8 +13,7 @@ RUN apt-get update \
gnupg \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y --no-install-recommends nodejs \
&& rm -rf /var/lib/apt/lists/* \
&& npm install -g yt-dlp-utils 2>/dev/null || true
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
@ -22,7 +21,7 @@ WORKDIR /app
COPY requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r /app/requirements.txt
# Instalar yt-dlp desde la última versión del binario oficial (no pip) para tener siempre la más reciente
# Instalar yt-dlp desde el binario oficial más reciente (no pip) para siempre tener la última versión
RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp \
&& chmod a+rx /usr/local/bin/yt-dlp
@ -42,5 +41,5 @@ USER appuser
EXPOSE 8000
# Comando por defecto para ejecutar la API
# Comando para ejecutar la API
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@ -1,23 +1,31 @@
services:
# Servicio FastAPI - Backend API
tubescript-api:
build:
context: .
dockerfile: Dockerfile.api
args:
# Invalida solo la capa COPY . /app para que siempre tome el código más reciente
# sin necesidad de --no-cache (que descarga todo desde cero)
# Invalida la capa COPY . /app sin necesidad de --no-cache completo
CACHEBUST: "${CACHEBUST:-1}"
image: tubescript-api:latest
container_name: tubescript_api
ports:
- "8282:8000"
volumes:
# Datos persistentes: cookies.txt, config, etc.
- ./data:/app/data:rw
# ── Perfiles de navegador del HOST (read-only) ──────────────────────────
# yt-dlp puede leer cookies directamente del navegador con
# POST /extract_chrome_cookies?browser=chrome
# Descomenta el navegador que tengas instalado en el host:
- ${HOME}/.config/google-chrome:/host-chrome:ro
# - ${HOME}/.config/chromium:/host-chromium:ro
# - ${HOME}/.config/BraveSoftware/Brave-Browser:/host-brave:ro
# - ${HOME}/.mozilla/firefox:/host-firefox:ro
# - ${HOME}/.config/microsoft-edge:/host-edge:ro
environment:
- PYTHONUNBUFFERED=1
- API_COOKIES_PATH=/app/data/cookies.txt
# Optional: set API_PROXY when you want the container to use a SOCKS/HTTP proxy
# Proxy opcional: socks5h://127.0.0.1:9050
- API_PROXY=${API_PROXY:-}
restart: unless-stopped
networks:

View File

@ -1,125 +1,83 @@
#!/bin/bash
# Script para reconstruir las imágenes Docker de TubeScript
# Script para reconstruir TubeScript-API desde cero
set -e
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m'
ok() { echo -e "${GREEN}$1${NC}"; }
warn() { echo -e "${YELLOW}⚠️ $1${NC}"; }
err() { echo -e "${RED}$1${NC}"; }
echo "════════════════════════════════════════════════════════════"
echo " 🔨 TubeScript-API - Rebuild de Docker"
echo " 🔨 TubeScript-API — Rebuild completo"
echo "════════════════════════════════════════════════════════════"
echo ""
# Colores
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m'
print_success() {
echo -e "${GREEN}$1${NC}"
}
print_warning() {
echo -e "${YELLOW}⚠️ $1${NC}"
}
print_error() {
echo -e "${RED}$1${NC}"
}
# Verificar Docker
echo "🔍 Verificando Docker..."
if ! command -v docker &> /dev/null; then
print_error "Docker no está instalado"
# ── Verificar Docker (plugin compose, no docker-compose legacy) ──────────────
if ! docker compose version &>/dev/null; then
err "docker compose no está disponible. Instala Docker Desktop o el plugin compose."
exit 1
fi
if ! command -v docker-compose &> /dev/null; then
print_error "Docker Compose no está instalado"
exit 1
fi
print_success "Docker encontrado"
ok "Docker compose disponible: $(docker compose version --short 2>/dev/null || echo 'ok')"
echo ""
# Asegurar carpeta data para montajes de configuración
echo "📁 Asegurando carpeta './data' para montaje de configuración..."
if [ ! -d "./data" ]; then
mkdir -p ./data
chmod 755 ./data || true
print_success "Carpeta ./data creada"
# ── Carpeta data ──────────────────────────────────────────────────────────────
mkdir -p ./data
chmod 777 ./data 2>/dev/null || true
ok "Carpeta ./data lista (permisos 777)"
echo " Coloca cookies.txt en ./data/cookies.txt para autenticación"
echo ""
# ── Detener contenedores existentes ──────────────────────────────────────────
echo "🛑 Deteniendo contenedores..."
docker compose down --remove-orphans 2>/dev/null || true
ok "Contenedores detenidos"
echo ""
# ── Eliminar imagen anterior para forzar build limpio ─────────────────────────
echo "🧹 Eliminando imagen anterior (tubescript-api:latest)..."
docker rmi tubescript-api:latest 2>/dev/null && ok "Imagen anterior eliminada" || warn "No había imagen previa"
echo ""
# ── Build sin caché ───────────────────────────────────────────────────────────
echo "🔨 Construyendo imagen desde cero (--no-cache)..."
echo " Esto puede tardar 3-5 minutos la primera vez..."
echo ""
CACHEBUST=$(date +%s) docker compose build --no-cache
ok "Imagen construida exitosamente"
echo ""
# ── Iniciar servicios ─────────────────────────────────────────────────────────
echo "🚀 Iniciando servicios..."
docker compose up -d
ok "Servicios iniciados"
echo ""
# ── Esperar y mostrar estado ──────────────────────────────────────────────────
echo "⏳ Esperando que la API arranque (15s)..."
sleep 15
echo ""
echo "📊 Estado de contenedores:"
docker compose ps
echo ""
# ── Health check ──────────────────────────────────────────────────────────────
echo "🩺 Verificando API..."
if curl -sf http://localhost:8282/docs -o /dev/null; then
ok "API respondiendo en http://localhost:8282"
else
print_success "Carpeta ./data ya existe"
fi
echo "Nota: coloca aquí archivos persistentes como stream_config.json, streams_state.json y cookies.txt (ej: ./data/cookies.txt)"
echo ""
# Detener contenedores
echo "🛑 Deteniendo contenedores existentes..."
docker compose down 2>/dev/null || true
print_success "Contenedores detenidos"
echo ""
# Limpiar imágenes antiguas (opcional)
echo "🧹 ¿Deseas eliminar las imágenes antiguas? (s/N)"
read -p "> " clean_images
if [ "$clean_images" = "s" ] || [ "$clean_images" = "S" ]; then
echo "Eliminando imágenes antiguas..."
docker compose down --rmi all 2>/dev/null || true
print_success "Imágenes antiguas eliminadas"
fi
echo ""
# Reconstruir con CACHEBUST para invalidar solo la capa COPY . /app
# CACHEBUST=$(date +%s) se exporta para que docker-compose.yml lo tome via ${CACHEBUST:-1}
echo "🔨 Reconstruyendo imagen con código actualizado..."
echo "Usando CACHEBUST=$(date +%s) para forzar copia fresca del código..."
echo ""
export CACHEBUST="$(date +%s)"
docker compose build
if [ $? -eq 0 ]; then
print_success "Imagen reconstruida exitosamente"
else
print_error "Error al reconstruir imagen"
exit 1
fi
echo ""
# Preguntar si desea iniciar
echo "🚀 ¿Deseas iniciar los servicios ahora? (S/n)"
read -p "> " start_services
if [ "$start_services" != "n" ] && [ "$start_services" != "N" ]; then
echo ""
echo "🚀 Iniciando servicios..."
docker compose up -d
if [ $? -eq 0 ]; then
print_success "Servicios iniciados"
echo ""
echo "📊 Estado de los servicios:"
sleep 3
docker compose ps
echo ""
echo "════════════════════════════════════════════════════════════"
print_success "¡Rebuild completado!"
echo "════════════════════════════════════════════════════════════"
echo ""
echo "🌐 Servicios disponibles:"
echo " API: http://localhost:8282"
echo " Docs API: http://localhost:8282/docs"
echo ""
else
print_error "Error al iniciar servicios"
exit 1
fi
else
echo ""
print_success "Rebuild completado (servicios no iniciados)"
echo ""
echo "Para iniciar los servicios:"
echo " CACHEBUST=\$(date +%s) docker compose up -d --build"
warn "API aún no responde (puede necesitar más tiempo). Revisa: docker compose logs -f"
fi
echo ""
echo "════════════════════════════════════════════════════════════"
ok "¡Rebuild completado!"
echo "════════════════════════════════════════════════════════════"
echo ""
echo " API: http://localhost:8282"
echo " Docs: http://localhost:8282/docs"
echo " Logs: docker compose logs -f"
echo " Cookies: curl -X POST http://localhost:8282/upload_cookies -F 'file=@cookies.txt'"
echo ""

141
export-chrome-cookies.sh Executable file
View File

@ -0,0 +1,141 @@
#!/bin/bash
# ─────────────────────────────────────────────────────────────────────────────
# export-chrome-cookies.sh
# Exporta cookies de YouTube desde el perfil del navegador del HOST
# usando yt-dlp, y las copia a ./data/cookies.txt para que la API las use.
#
# Uso:
# ./export-chrome-cookies.sh # Chrome (default)
# ./export-chrome-cookies.sh chromium # Chromium
# ./export-chrome-cookies.sh brave # Brave
# ./export-chrome-cookies.sh firefox # Firefox
# ./export-chrome-cookies.sh edge # Edge
#
# IMPORTANTE:
# - Cierra el navegador antes de ejecutar (Chrome bloquea el archivo de cookies)
# - En Linux no requiere contraseña ni keychain especial
# ─────────────────────────────────────────────────────────────────────────────
set -e
BROWSER="${1:-chrome}"
OUTPUT="./data/cookies.txt"
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m'
ok() { echo -e "${GREEN}$1${NC}"; }
warn() { echo -e "${YELLOW}⚠️ $1${NC}"; }
err() { echo -e "${RED}$1${NC}"; exit 1; }
echo ""
echo "🍪 Exportando cookies de YouTube desde: $BROWSER"
echo ""
# Verificar yt-dlp
if ! command -v yt-dlp &>/dev/null; then
err "yt-dlp no está instalado. Instala con: pip install yt-dlp"
fi
# Verificar que el navegador no esté corriendo (puede causar errores de bloqueo)
BROWSER_PROC=""
case "$BROWSER" in
chrome) BROWSER_PROC="google-chrome\|chrome" ;;
chromium) BROWSER_PROC="chromium" ;;
brave) BROWSER_PROC="brave" ;;
firefox) BROWSER_PROC="firefox" ;;
edge) BROWSER_PROC="msedge\|microsoft-edge" ;;
esac
if [ -n "$BROWSER_PROC" ] && pgrep -f "$BROWSER_PROC" &>/dev/null; then
warn "El navegador '$BROWSER' parece estar corriendo."
warn "Ciérralo antes de exportar para evitar errores de bloqueo del DB."
echo ""
read -p "¿Continuar de todos modos? (s/N): " confirm
[[ "$confirm" =~ ^[sS]$ ]] || { echo "Cancelado."; exit 0; }
echo ""
fi
# Crear directorio de destino
mkdir -p "$(dirname "$OUTPUT")"
# Detectar ruta del perfil
PROFILE_PATH=""
case "$BROWSER" in
chrome)
for p in "$HOME/.config/google-chrome/Default" "$HOME/.config/google-chrome/Profile 1"; do
[ -d "$p" ] && PROFILE_PATH="$p" && break
done
;;
chromium)
PROFILE_PATH="$HOME/.config/chromium/Default"
;;
brave)
PROFILE_PATH="$HOME/.config/BraveSoftware/Brave-Browser/Default"
;;
firefox)
# Firefox: yt-dlp detecta el perfil automáticamente
PROFILE_PATH=""
;;
edge)
PROFILE_PATH="$HOME/.config/microsoft-edge/Default"
;;
*)
err "Navegador '$BROWSER' no soportado. Usa: chrome, chromium, brave, firefox, edge"
;;
esac
if [ -n "$PROFILE_PATH" ] && [ ! -d "$PROFILE_PATH" ]; then
err "No se encontró el perfil de $BROWSER en: $PROFILE_PATH"
fi
# Construir argumento --cookies-from-browser
if [ -n "$PROFILE_PATH" ]; then
BROWSER_ARG="${BROWSER}:${PROFILE_PATH}"
echo " Perfil: $PROFILE_PATH"
else
BROWSER_ARG="$BROWSER"
echo " Perfil: detectado automáticamente"
fi
echo " Destino: $OUTPUT"
echo ""
# Exportar cookies con yt-dlp
echo "⏳ Extrayendo cookies..."
yt-dlp \
--cookies-from-browser "$BROWSER_ARG" \
--cookies "$OUTPUT" \
--skip-download \
--no-warnings \
--extractor-args "youtube:player_client=tv_embedded" \
"https://www.youtube.com/watch?v=dQw4w9WgXcQ" 2>&1 || {
err "Error al extraer cookies. Asegúrate de que el navegador está cerrado y tienes sesión en YouTube."
}
# Verificar resultado
if [ ! -f "$OUTPUT" ] || [ ! -s "$OUTPUT" ]; then
err "No se generó el archivo de cookies o está vacío."
fi
YT_LINES=$(grep -c "youtube.com" "$OUTPUT" 2>/dev/null || echo 0)
FILE_SIZE=$(du -h "$OUTPUT" | cut -f1)
echo ""
ok "Cookies exportadas exitosamente"
echo " Archivo: $OUTPUT"
echo " Tamaño: $FILE_SIZE"
echo " Líneas youtube.com: $YT_LINES"
echo ""
if [ "$YT_LINES" -lt 3 ]; then
warn "Pocas cookies de YouTube encontradas ($YT_LINES)."
warn "Verifica que estás logueado en YouTube en $BROWSER."
else
ok "Cookies de YouTube encontradas: $YT_LINES líneas"
fi
echo ""
echo "📋 Próximos pasos:"
echo " 1. Si el contenedor está corriendo, las cookies ya están disponibles en /app/data/"
echo " 2. Si no está corriendo: docker compose up -d"
echo " 3. Prueba: curl http://localhost:8282/cookies/status"
echo ""

699
main.py
View File

@ -6,8 +6,10 @@ import time
import re
import tempfile
import glob
import random
from fastapi import FastAPI, HTTPException, UploadFile, File
from typing import List, Dict, Any, cast
from fastapi.responses import JSONResponse
# Intentar importar youtube_transcript_api como fallback
try:
@ -34,6 +36,62 @@ DEFAULT_COOKIES_PATH = './data/cookies.txt'
# Proxy opcional para requests/yt-dlp (ej. socks5h://127.0.0.1:9050)
DEFAULT_PROXY = os.getenv('API_PROXY', '')
# Nuevo: rotador/simple selector de proxies
# - Si se define API_PROXY se usa directamente.
# - Si se define API_PROXIES (lista separada por comas) se elige uno al azar.
# Ej: API_PROXIES="socks5h://127.0.0.1:9050,http://10.0.0.1:3128"
# Nuevo: ruta por defecto del archivo whitelist
PROXY_WHITELIST_FILE = os.getenv('PROXY_WHITELIST_FILE', 'tools/whitelist.txt')
_proxy_whitelist_cache = { 'ts': 0, 'proxies': [] }
def _load_whitelist_file(path: str, ttl: int = 30):
"""Carga proxies desde archivo path con TTL en segundos para cache.
Retorna lista de proxies (puede ser vacía).
"""
now = time.time()
if _proxy_whitelist_cache['proxies'] and (now - _proxy_whitelist_cache['ts'] < ttl):
return _proxy_whitelist_cache['proxies']
proxies = []
try:
if os.path.exists(path):
with open(path, 'r', encoding='utf-8') as fh:
for line in fh:
p = line.strip()
if p and not p.startswith('#'):
proxies.append(p)
except Exception:
proxies = []
_proxy_whitelist_cache['proxies'] = proxies
_proxy_whitelist_cache['ts'] = now
return proxies
def _get_proxy_choice() -> str | None:
"""Devuelve una URL de proxy elegida:
- Prioridad: API_PROXY (single) -> API_PROXIES (comma list) -> PROXY_WHITELIST_FILE -> None
"""
# 1) Legacy single proxy has priority
single = os.getenv('API_PROXY', '') or DEFAULT_PROXY or ''
if single:
return single
# 2) comma-separated list from env
lst = os.getenv('API_PROXIES', '') or ''
if lst:
proxies = [p.strip() for p in lst.split(',') if p.strip()]
if proxies:
return random.choice(proxies)
# 3) whitelist file
wl_file = os.getenv('PROXY_WHITELIST_FILE', PROXY_WHITELIST_FILE)
proxies = _load_whitelist_file(wl_file)
if proxies:
return random.choice(proxies)
return None
def clean_youtube_json(raw_json: Dict) -> List[Dict]:
"""
Transforma el formato complejo 'json3' de YouTube a un formato
@ -186,18 +244,21 @@ NODE_PATH = "/usr/bin/node"
def _yt_client_args(has_cookies: bool, for_stream: bool = False) -> list:
"""Devuelve --extractor-args y --js-runtimes para metadata/streams.
Estrategia (basada en pruebas reales 2026-03-05):
- Sin cookies android (sin n-challenge, sin Node.js)
- Con cookies web + Node.js (web acepta cookies; Node resuelve n-challenge/signature)
- for_stream android (mejor compatibilidad HLS en lives)
Estrategia actualizada 2026-03-07:
- android REQUIERE GVS PO Token desde 2026 formatos HTTPS omitidos HTTP 403.
YA NO SE USA para metadata ni streams.
- Sin cookies tv_embedded (sin PO Token, sin n-challenge, funciona para metadata)
- Con cookies web + Node.js (Node resuelve n-challenge/signature)
- for_stream tv_embedded (más fiable para HLS/lives sin cookies)
Diagnóstico:
- mweb con cookies requiere GVS PO Token (no disponible)
- android con cookies yt-dlp lo salta (no soporta cookies)
- web con cookies + --js-runtimes node funciona
- android requiere GVS PO Token (2026) NO usar
- mweb requiere Visitor Data PO Token NO usar sin cookies
- tv_embedded sin PO Token requerido funciona para metadata/stream
- web + Node.js funciona con cookies
"""
if for_stream or not has_cookies:
return ["--extractor-args", "youtube:player_client=android"]
return ["--extractor-args", "youtube:player_client=tv_embedded"]
else:
return [
"--extractor-args", "youtube:player_client=web",
@ -208,13 +269,18 @@ def _yt_client_args(has_cookies: bool, for_stream: bool = False) -> list:
def _yt_subs_args(has_cookies: bool) -> list:
"""Devuelve --extractor-args para descarga de subtítulos.
Para subtítulos siempre usamos android:
- android sin cookies funciona, obtiene auto-subs sin n-challenge
- android con cookies yt-dlp lo salta pero descarga igual sin cookies
- web con cookies falla en sub-langs no exactos (ej: en vs en-US)
Resultado: android es siempre el cliente más fiable para subtítulos.
Estrategia actualizada 2026-03-07:
- android requiere GVS PO Token desde 2026 subtítulos HTTP 403 NO usar.
- tv_embedded sin PO Token, obtiene auto-subs sin bot-check preferido.
- mweb fallback útil si tv_embedded no trae subs en ciertos idiomas.
- web + Node sólo con cookies (resuelve n-challenge).
"""
return ["--extractor-args", "youtube:player_client=android"]
if has_cookies:
return [
"--extractor-args", "youtube:player_client=web",
"--js-runtimes", f"node:{NODE_PATH}",
]
return ["--extractor-args", "youtube:player_client=tv_embedded,mweb"]
@ -245,7 +311,7 @@ def get_transcript_data(video_id: str, lang: str = "es"):
# cookies_path: prefer the temporary cookiefile if present, otherwise fall back to env path
cookies_path = cookiefile_path or os.getenv('API_COOKIES_PATH', DEFAULT_COOKIES_PATH)
# proxy support
proxy = os.getenv('API_PROXY', DEFAULT_PROXY) or None
proxy = _get_proxy_choice()
proxies = {'http': proxy, 'https': proxy} if proxy else None
def load_cookies_from_file(path: str) -> dict:
@ -276,6 +342,7 @@ def get_transcript_data(video_id: str, lang: str = "es"):
return cookies
cookies_for_requests = load_cookies_from_file(cookies_path) if cookies_path else {}
_has_ck_subs = bool(cookies_path and os.path.exists(cookies_path))
# Intento rápido y fiable: usar yt-dlp para descargar subtítulos (auto o manual) al tmpdir
try:
@ -290,8 +357,8 @@ def get_transcript_data(video_id: str, lang: str = "es"):
elif len(lang) == 2:
sub_langs = [lang, f"{lang}-{lang.upper()}", f"{lang}-419", f"{lang}-en"]
# siempre android para subtítulos — NO pasar --cookies porque android no las soporta
# (yt-dlp salta el cliente android si recibe cookies → no descarga nada)
# tv_embedded/mweb para subtítulos sin cookies (no requieren PO Token)
# web + Node.js cuando hay cookies (resuelve n-challenge)
ytdlp_cmd = [
"yt-dlp",
url,
@ -302,8 +369,10 @@ def get_transcript_data(video_id: str, lang: str = "es"):
"-o", os.path.join(tmpdl, "%(id)s.%(ext)s"),
"--no-warnings",
"--sub-lang", ",".join(sub_langs),
] + _yt_subs_args(False)
# NO se pasan cookies con android (android no las soporta en yt-dlp)
] + _yt_subs_args(_has_ck_subs)
# Pasar cookies solo cuando se usa cliente web (con cookies)
if _has_ck_subs:
ytdlp_cmd.extend(["--cookies", cookies_path])
# attach proxy if configured
if proxy:
@ -610,8 +679,10 @@ def get_transcript_data(video_id: str, lang: str = "es"):
"--sub-format", "json3/vtt/srv3/best",
"-o", os.path.join(tmpdir, "%(id)s.%(ext)s"),
"--no-warnings",
] + _yt_subs_args(False)
# NO cookies con android (android no las soporta, yt-dlp lo saltaría)
] + _yt_subs_args(_has_ck_subs)
# Pasar cookies sólo con cliente web
if _has_ck_subs:
cmd.extend(["--cookies", cookies_path])
# añadir proxy a la llamada de yt-dlp si está configurado
if proxy:
@ -654,7 +725,7 @@ def get_transcript_data(video_id: str, lang: str = "es"):
"Prueba: ?lang=en | /debug/fetch_subs/{video_id} | sube cookies con /upload_cookies"
)
# ── Clientes exactos de NewPipeExtractor (ClientsConstants.java dev 2026-03-05) ──
# ── Clientes Innertube (sincronizados con NewPipeExtractor + yt-dlp 2026-03) ──
_NP_IOS = {
"clientName": "IOS", "clientVersion": "21.03.2",
"clientScreen": "WATCH", "platform": "MOBILE",
@ -668,7 +739,16 @@ _NP_ANDROID = {
"osName": "Android", "osVersion": "16", "androidSdkVersion": 36,
"userAgent": "com.google.android.youtube/21.03.36 (Linux; U; Android 16) gzip",
}
# GAPIS: youtubei.googleapis.com — NewPipe lo usa para iOS y Android (YoutubeStreamHelper.java)
# tv_embedded: NO requiere PO Token, siempre devuelve videoDetails + hlsManifestUrl en lives
# Es el cliente más fiable para obtener title/description sin autenticación.
_NP_TV_EMBEDDED = {
"clientName": "TVHTML5_SIMPLY_EMBEDDED_PLAYER",
"clientVersion": "2.0",
"clientScreen": "EMBED",
"platform": "TV",
"userAgent": "Mozilla/5.0 (SMART-TV; LINUX; Tizen 6.0) AppleWebKit/538.1 (KHTML, like Gecko) Version/6.0 TV Safari/538.1",
}
# GAPIS: youtubei.googleapis.com — usado por NewPipe para iOS/Android/TV
_GAPIS_BASE = "https://youtubei.googleapis.com/youtubei/v1"
@ -754,12 +834,14 @@ def _np_call_player(video_id: str, client: dict,
def innertube_get_stream(video_id: str, proxy: str = None) -> dict:
"""
Obtiene URL de stream replicando exactamente NewPipeExtractor:
1. visitorData via /visitor_id (para ambos clientes)
2. iOS /player iosStreamingData.hlsManifestUrl (prioritario para lives)
3. Android /player formats directas (videos normales)
Obtiene URL de stream replicando NewPipeExtractor + fallback tv_embedded.
Sin cookies | Sin firma JS | Sin PO Token | Sin bot-check desde servidores
Orden de intentos:
1. iOS hlsManifestUrl (prioritario para lives, trae videoDetails)
2. Android formats directas + videoDetails
3. tv_embedded sin PO Token, siempre trae videoDetails y hlsManifestUrl en lives
Sin cookies | Sin firma JS | Sin bot-check desde servidores
"""
result = {
"title": None, "description": None,
@ -771,7 +853,7 @@ def innertube_get_stream(video_id: str, proxy: str = None) -> dict:
vd_ios = _np_get_visitor_data(_NP_IOS, proxies)
vd_android = _np_get_visitor_data(_NP_ANDROID, proxies)
# iOS — preferido para hlsManifestUrl en lives (como hace NewPipe)
# ── iOS — preferido para hlsManifestUrl en lives ──────────────────────────
ios = _np_call_player(video_id, _NP_IOS, vd_ios, proxies)
ps = ios.get("playabilityStatus") or {}
if ps.get("status") == "LOGIN_REQUIRED":
@ -779,8 +861,8 @@ def innertube_get_stream(video_id: str, proxy: str = None) -> dict:
return result
vd_meta = ios.get("videoDetails") or {}
result["title"] = vd_meta.get("title")
result["description"] = vd_meta.get("shortDescription")
result["title"] = vd_meta.get("title") or None
result["description"] = vd_meta.get("shortDescription") or None
result["is_live"] = bool(vd_meta.get("isLive") or vd_meta.get("isLiveContent"))
ios_sd = ios.get("streamingData") or {}
@ -791,20 +873,38 @@ def innertube_get_stream(video_id: str, proxy: str = None) -> dict:
{"itag": f.get("itag"), "mimeType": f.get("mimeType"), "quality": f.get("quality")}
for f in (ios_sd.get("formats", []) + ios_sd.get("adaptiveFormats", []))[:8]
]
# Intentar completar videoDetails si iOS no los trajo
if not result["title"]:
vd_android_resp = _np_call_player(video_id, _NP_ANDROID, vd_android, proxies)
vd2 = vd_android_resp.get("videoDetails") or {}
result["title"] = vd2.get("title") or result["title"]
result["description"] = vd2.get("shortDescription") or result["description"]
if not result["title"]:
# último intento: tv_embedded
tv = _np_call_player(video_id, _NP_TV_EMBEDDED, "", proxies)
vd3 = tv.get("videoDetails") or {}
result["title"] = vd3.get("title") or result["title"]
result["description"] = vd3.get("shortDescription") or result["description"]
return result
# Android — para videos normales o si iOS no dio HLS
# ── Android — para videos normales o si iOS no dio HLS ───────────────────
android = _np_call_player(video_id, _NP_ANDROID, vd_android, proxies)
if not result["title"]:
vd2 = android.get("videoDetails") or {}
result["title"] = vd2.get("title")
result["description"] = vd2.get("shortDescription")
result["is_live"] = bool(vd2.get("isLive") or vd2.get("isLiveContent"))
result["title"] = vd2.get("title") or None
result["description"] = vd2.get("shortDescription") or None
result["is_live"] = result["is_live"] or bool(
vd2.get("isLive") or vd2.get("isLiveContent"))
android_sd = android.get("streamingData") or {}
hls = android_sd.get("hlsManifestUrl")
if hls:
result["hls_url"] = hls
if not result["title"]:
tv = _np_call_player(video_id, _NP_TV_EMBEDDED, "", proxies)
vd3 = tv.get("videoDetails") or {}
result["title"] = vd3.get("title") or result["title"]
result["description"] = vd3.get("shortDescription") or result["description"]
return result
all_fmts = android_sd.get("formats", []) + android_sd.get("adaptiveFormats", [])
@ -816,37 +916,136 @@ def innertube_get_stream(video_id: str, proxy: str = None) -> dict:
{"itag": f.get("itag"), "mimeType": f.get("mimeType"), "quality": f.get("quality")}
for f in best[:8]
]
if not result["title"]:
tv = _np_call_player(video_id, _NP_TV_EMBEDDED, "", proxies)
vd3 = tv.get("videoDetails") or {}
result["title"] = vd3.get("title") or result["title"]
result["description"] = vd3.get("shortDescription") or result["description"]
return result
# ── tv_embedded — sin PO Token, último recurso para streamingData ─────────
tv = _np_call_player(video_id, _NP_TV_EMBEDDED, "", proxies)
vd3 = tv.get("videoDetails") or {}
if not result["title"]:
result["title"] = vd3.get("title") or None
result["description"] = vd3.get("shortDescription") or None
result["is_live"] = result["is_live"] or bool(
vd3.get("isLive") or vd3.get("isLiveContent"))
tv_sd = tv.get("streamingData") or {}
hls = tv_sd.get("hlsManifestUrl")
if hls:
result["hls_url"] = hls
return result
all_fmts_tv = tv_sd.get("formats", []) + tv_sd.get("adaptiveFormats", [])
best_tv = sorted([f for f in all_fmts_tv if f.get("url")],
key=lambda x: x.get("bitrate", 0), reverse=True)
if best_tv:
result["hls_url"] = best_tv[0]["url"]
result["formats"] = [
{"itag": f.get("itag"), "mimeType": f.get("mimeType"), "quality": f.get("quality")}
for f in best_tv[:8]
]
return result
result["error"] = (
"Innertube no devolvió streamingData. "
"Innertube no devolvió streamingData (iOS + Android + tv_embedded). "
"Puede ser DRM, región bloqueada, privado, o YouTube actualizó su API."
)
return result
def _fetch_metadata_ytdlp(video_id: str, proxy: str = None) -> dict:
"""Obtiene title, description, is_live usando yt-dlp.
Prueba clientes en orden hasta obtener título:
1. tv_embedded sin PO Token, devuelve videoDetails completo
2. ios HLS nativo, suele traer title
3. mweb fallback adicional
4. --print title (rápido, último recurso)
"""
url = f"https://www.youtube.com/watch?v={video_id}"
proxy_args = ["--proxy", proxy] if proxy else []
# Intentar con --dump-json para cada cliente
for client in ("tv_embedded", "ios", "mweb"):
cmd = [
"yt-dlp", "--skip-download", "--dump-json", "--no-warnings",
"--extractor-args", f"youtube:player_client={client}",
url,
] + proxy_args
try:
res = subprocess.run(cmd, capture_output=True, text=True, timeout=25)
if res.returncode == 0 and res.stdout.strip():
d = json.loads(res.stdout.strip())
title = d.get("title") or d.get("fulltitle")
if title:
return {
"title": title,
"description": d.get("description") or None,
"is_live": bool(d.get("is_live") or d.get("was_live")),
}
except Exception:
continue
# Último recurso: --print title (muy rápido, sólo el título)
for client in ("tv_embedded", "ios", "mweb"):
cmd = [
"yt-dlp", "--skip-download", "--no-warnings",
"--print", "%(title)s\n%(is_live)s\n%(description)s",
"--extractor-args", f"youtube:player_client={client}",
url,
] + proxy_args
try:
res = subprocess.run(cmd, capture_output=True, text=True, timeout=20)
if res.returncode == 0 and res.stdout.strip():
lines = res.stdout.strip().splitlines()
title = lines[0].strip() if lines else None
if title and title.lower() not in ("none", "na", ""):
is_live = lines[1].strip().lower() in ("true", "1") if len(lines) > 1 else False
desc = "\n".join(lines[2:]).strip() if len(lines) > 2 else None
return {
"title": title,
"description": desc or None,
"is_live": is_live,
}
except Exception:
continue
return {"title": None, "description": None, "is_live": False}
def get_stream_url(video_id: str):
"""
Obtiene la URL de transmisión m3u8/HLS.
Devuelve: (stream_url, title, description, is_live, error)
Estrategia:
1. innertube_get_stream() técnica NewPipe, sin cookies, sin bot-check
2. Fallback yt-dlp si Innertube falla
1. innertube_get_stream() iOS + Android + tv_embedded, sin cookies
2. Fallback yt-dlp con tv_embedded/ios/web
3. title/description siempre se completan con _fetch_metadata_ytdlp si faltan
"""
video_id = extract_video_id(video_id)
proxy = os.getenv('API_PROXY', DEFAULT_PROXY) or None
# ── 1. Innertube directo (NewPipe) ────────────────────────────────────────
it = innertube_get_stream(video_id, proxy=proxy)
if it.get("hls_url"):
return (it["hls_url"], it.get("title"), it.get("description"),
it.get("is_live", False), None)
title = it.get("title")
description = it.get("description")
is_live = it.get("is_live", False)
if it.get("hls_url"):
# Completar metadatos con yt-dlp si Innertube no los trajo
if not title:
meta = _fetch_metadata_ytdlp(video_id, proxy=proxy)
title = meta["title"] or title
description = meta["description"] or description
is_live = is_live or meta["is_live"]
return it["hls_url"], title, description, is_live, None
# ── 2. Fallback yt-dlp ────────────────────────────────────────────────────
cookie_mgr = CookieManager()
cookiefile_path = cookie_mgr.get_cookiefile_path()
@ -884,7 +1083,8 @@ def get_stream_url(video_id: str):
except Exception:
return None, False
clients = ["android", "ios"] + (["web"] if has_ck else [])
# tv_embedded no requiere PO Token; ios da HLS nativo; web+cookies resuelve n-challenge
clients = ["tv_embedded", "ios"] + (["web"] if has_ck else [])
fmts = (["91", "92", "93", "94", "95", "96",
"best[protocol=m3u8_native]", "best[protocol=m3u8]", "best"]
if is_live else
@ -896,6 +1096,12 @@ def get_stream_url(video_id: str):
for fmt in fmts:
u, is_b = _ytdlp_url(fmt, client)
if u:
# Completar metadatos si todavía faltan
if not title:
meta = _fetch_metadata_ytdlp(video_id, proxy=proxy)
title = meta["title"] or title
description = meta["description"] or description
is_live = is_live or meta["is_live"]
return u, title, description, is_live, None
if is_b:
got_bot = True
@ -905,18 +1111,96 @@ def get_stream_url(video_id: str):
except Exception:
pass
# Último intento de metadatos aunque no haya stream
if not title:
meta = _fetch_metadata_ytdlp(video_id, proxy=proxy)
title = meta["title"] or title
description = meta["description"] or description
if got_bot:
# Intentar fallback con Playwright usando _attempt_playwright_fallback y devolver m3u8/cookies si encuentra; si falla, devolver mensaje anterior con detalle.
try:
pw_m3u8, pw_cookies, pw_err = _attempt_playwright_fallback(video_id)
if pw_m3u8:
# si Playwright encontró el m3u8, retornar exitoso
return pw_m3u8, title, description, is_live, None
# si Playwright no tuvo éxito, incluir su error en la respuesta
detail = pw_err or 'YouTube detectó actividad de bot. Sube cookies.txt con /upload_cookies.'
except Exception as e:
detail = f'YouTube detectó actividad de bot. Además, Playwright fallback falló: {str(e)[:200]}'
return None, title, description, is_live, detail
return None, title, description, is_live, (
"YouTube detectó actividad de bot. "
"Sube cookies.txt: curl -X POST http://localhost:8282/upload_cookies -F 'file=@cookies.txt'"
)
return None, title, description, is_live, (
it.get("error") or
"No se pudo obtener la URL del stream. "
"Si es un live, verifica que esté EN VIVO (🔴) ahora mismo."
)
# ...existing code (old get_stream_url body — reemplazado arriba) — ELIMINAR...
@app.get("/debug/stream/{video_id}")
def debug_stream(video_id: str):
"""Diagnóstico completo del endpoint /stream: muestra qué devuelve cada cliente
Innertube (iOS, Android, tv_embedded) y yt-dlp por separado.
"""
video_id = extract_video_id(video_id)
proxy = _get_proxy_choice()
proxies = {"http": proxy, "https": proxy} if proxy else None
def _call(client_dict, label):
try:
vd_data = _np_get_visitor_data(client_dict, proxies)
resp = _np_call_player(video_id, client_dict, vd_data, proxies)
ps = resp.get("playabilityStatus") or {}
vd = resp.get("videoDetails") or {}
sd = resp.get("streamingData") or {}
return {
"client": label,
"status": ps.get("status"),
"reason": ps.get("reason", ""),
"title": vd.get("title"),
"description_preview": str(vd.get("shortDescription", "") or "")[:120],
"isLive": vd.get("isLive"),
"isLiveContent": vd.get("isLiveContent"),
"hlsManifestUrl": (sd.get("hlsManifestUrl") or "")[:100],
"formats_count": len(sd.get("formats", [])),
"adaptiveFormats_count": len(sd.get("adaptiveFormats", [])),
"streamingData_keys": list(sd.keys()),
}
except Exception as e:
return {"client": label, "error": str(e)}
results = [
_call(_NP_IOS, "iOS"),
_call(_NP_ANDROID, "Android"),
_call(_NP_TV_EMBEDDED, "tv_embedded"),
]
# yt-dlp dump-json con tv_embedded
ytdlp_meta = {}
try:
url = f"https://www.youtube.com/watch?v={video_id}"
cmd = ["yt-dlp", "--skip-download", "--dump-json", "--no-warnings",
"--extractor-args", "youtube:player_client=tv_embedded", url]
if proxy:
cmd.extend(["--proxy", proxy])
res = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if res.returncode == 0 and res.stdout.strip():
d = json.loads(res.stdout.strip())
ytdlp_meta = {
"title": d.get("title"),
"description_preview": str(d.get("description") or "")[:120],
"is_live": d.get("is_live"),
"was_live": d.get("was_live"),
}
else:
ytdlp_meta = {"error": res.stderr[:500]}
except Exception as e:
ytdlp_meta = {"error": str(e)}
return {
"video_id": video_id,
"innertube_clients": results,
"ytdlp_tv_embedded": ytdlp_meta,
}
@app.get("/transcript/{video_id}")
def transcript_endpoint(video_id: str, lang: str = "es"):
@ -1048,7 +1332,211 @@ async def upload_cookies(file: UploadFile = File(...)):
except Exception as e:
raise HTTPException(status_code=500, detail=f'Error al guardar cookies: {str(e)[:200]}')
@app.get("/debug/metadata/{video_id}")
# ── Rutas conocidas de perfiles de navegador en Linux/Mac/Windows ────────────
_BROWSER_PROFILES = {
"chrome": [
# Linux
os.path.expanduser("~/.config/google-chrome/Default"),
os.path.expanduser("~/.config/google-chrome/Profile 1"),
# Montaje desde docker-compose (host path mapeado)
"/host-chrome/Default",
"/host-chrome",
# macOS
os.path.expanduser("~/Library/Application Support/Google/Chrome/Default"),
],
"chromium": [
os.path.expanduser("~/.config/chromium/Default"),
"/host-chromium/Default",
"/host-chromium",
os.path.expanduser("~/Library/Application Support/Chromium/Default"),
],
"brave": [
os.path.expanduser("~/.config/BraveSoftware/Brave-Browser/Default"),
"/host-brave/Default",
"/host-brave",
os.path.expanduser("~/Library/Application Support/BraveSoftware/Brave-Browser/Default"),
],
"firefox": [
# Firefox usa --cookies-from-browser firefox directamente, yt-dlp detecta el perfil
os.path.expanduser("~/.mozilla/firefox"),
"/host-firefox",
],
"edge": [
os.path.expanduser("~/.config/microsoft-edge/Default"),
"/host-edge/Default",
],
}
def _find_browser_profile(browser: str) -> str | None:
"""Devuelve la primera ruta de perfil existente para el navegador dado."""
for path in _BROWSER_PROFILES.get(browser, []):
if os.path.exists(path):
return path
return None
def _extract_cookies_from_browser(browser: str, profile_path: str | None,
target: str, proxy: str | None = None) -> dict:
"""
Usa yt-dlp --cookies-from-browser para extraer cookies de YouTube
del perfil del navegador indicado y guardarlas en target (Netscape format).
"""
cmd = [
"yt-dlp",
"--cookies-from-browser", browser if not profile_path else f"{browser}:{profile_path}",
"--cookies", target, # exportar a archivo Netscape
"--skip-download",
"--no-warnings",
"--extractor-args", "youtube:player_client=tv_embedded",
"https://www.youtube.com/watch?v=dQw4w9WgXcQ", # video corto para forzar extracción
]
if proxy:
cmd.extend(["--proxy", proxy])
try:
res = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
stderr = res.stderr or ""
stdout = res.stdout or ""
# Verificar que el archivo fue creado y no está vacío
if os.path.exists(target) and os.path.getsize(target) > 100:
# Contar cookies de youtube.com
yt_cookies = 0
with open(target, "r", errors="ignore") as fh:
for line in fh:
if ".youtube.com" in line or "youtube.com" in line:
yt_cookies += 1
return {
"success": True,
"browser": browser,
"profile_path": profile_path,
"cookies_file": target,
"youtube_cookie_lines": yt_cookies,
"stderr_preview": stderr[:300] if stderr else "",
}
else:
return {
"success": False,
"browser": browser,
"error": "No se generó el archivo de cookies o está vacío",
"stderr": stderr[:500],
"stdout": stdout[:200],
"returncode": res.returncode,
}
except subprocess.TimeoutExpired:
return {"success": False, "browser": browser, "error": "Timeout al extraer cookies (60s)"}
except FileNotFoundError:
return {"success": False, "browser": browser, "error": "yt-dlp no encontrado"}
except Exception as e:
return {"success": False, "browser": browser, "error": str(e)[:200]}
@app.post("/extract_chrome_cookies")
def extract_chrome_cookies(browser: str = "chrome", profile_path: str = ""):
"""
Extrae cookies de YouTube directamente desde el perfil del navegador instalado
en el HOST (montado como volumen) y las guarda en /app/data/cookies.txt.
Parámetros:
- browser: chrome | chromium | brave | firefox | edge (default: chrome)
- profile_path: ruta manual al perfil (opcional, se auto-detecta si está vacío)
Requisito en docker-compose.yml (ya incluido):
volumes:
- ~/.config/google-chrome:/host-chrome:ro
Ejemplo:
curl -X POST "http://localhost:8282/extract_chrome_cookies?browser=chrome"
curl -X POST "http://localhost:8282/extract_chrome_cookies?browser=brave"
"""
proxy = _get_proxy_choice()
target = os.getenv('API_COOKIES_PATH', DEFAULT_COOKIES_PATH)
# Asegurar directorio destino
target_dir = os.path.dirname(target) or "."
os.makedirs(target_dir, exist_ok=True)
browser = browser.lower().strip()
valid_browsers = list(_BROWSER_PROFILES.keys())
if browser not in valid_browsers:
raise HTTPException(
status_code=400,
detail=f"Navegador '{browser}' no soportado. Usa: {', '.join(valid_browsers)}"
)
# Auto-detectar perfil si no se indicó
resolved_profile = profile_path.strip() or _find_browser_profile(browser)
if not resolved_profile and browser != "firefox":
# Para Firefox yt-dlp lo detecta solo; para el resto necesitamos la ruta
available = {b: _find_browser_profile(b) for b in valid_browsers}
found = {b: p for b, p in available.items() if p}
raise HTTPException(
status_code=404,
detail=(
f"No se encontró el perfil de '{browser}' en las rutas conocidas. "
f"Agrega el volumen en docker-compose.yml o pasa profile_path manualmente. "
f"Perfiles encontrados: {found if found else 'ninguno'}"
)
)
result = _extract_cookies_from_browser(browser, resolved_profile, target, proxy)
if not result["success"]:
raise HTTPException(status_code=500, detail=result)
return {
"detail": f"Cookies extraídas de {browser} y guardadas en {target}",
**result,
"next_step": "Los endpoints /transcript y /stream usarán estas cookies automáticamente.",
}
@app.get("/cookies/status")
def cookies_status():
"""Muestra el estado actual de las cookies configuradas y qué navegadores están disponibles."""
target = os.getenv('API_COOKIES_PATH', DEFAULT_COOKIES_PATH)
proxy = _get_proxy_choice()
# Estado del archivo de cookies actual
cookies_info = {"path": target, "exists": False, "size_bytes": 0, "youtube_lines": 0}
if os.path.exists(target):
cookies_info["exists"] = True
cookies_info["size_bytes"] = os.path.getsize(target)
yt_lines = 0
try:
with open(target, "r", errors="ignore") as fh:
for line in fh:
if "youtube.com" in line and not line.startswith("#"):
yt_lines += 1
except Exception:
pass
cookies_info["youtube_lines"] = yt_lines
# Detectar perfiles de navegador disponibles (en el contenedor / host montado)
available_browsers = {}
for browser in _BROWSER_PROFILES:
path = _find_browser_profile(browser)
available_browsers[browser] = {
"found": bool(path),
"profile_path": path,
}
return {
"cookies_file": cookies_info,
"available_browsers": available_browsers,
"extract_endpoint": "POST /extract_chrome_cookies?browser=chrome",
"upload_endpoint": "POST /upload_cookies",
"proxy": proxy or "no configurado",
"note": (
"Para usar cookies de Chrome del host, agrega en docker-compose.yml: "
"volumes: - ~/.config/google-chrome:/host-chrome:ro"
),
}
def debug_metadata(video_id: str):
"""Endpoint de depuración: obtiene --dump-json de yt-dlp para un video.
Devuelve la metadata (automatic_captions, subtitles, requested_subtitles) para inspección.
@ -1057,7 +1545,7 @@ def debug_metadata(video_id: str):
cookie_mgr = CookieManager()
cookiefile_path = cookie_mgr.get_cookiefile_path()
cookies_path = cookiefile_path or os.getenv('API_COOKIES_PATH', DEFAULT_COOKIES_PATH)
proxy = os.getenv('API_PROXY', DEFAULT_PROXY) or None
proxy = _get_proxy_choice()
url = f"https://www.youtube.com/watch?v={video_id}"
@ -1321,7 +1809,6 @@ def fetch_vtt_subtitles(video_id: str, lang: str = 'es'):
pass
return None, f'Error leyendo archivo de subtítulos: {str(e)[:200]}'
@app.post('/upload_vtt/{video_id}')
async def upload_vtt(video_id: str, file: UploadFile = File(...)):
"""Permite subir un archivo VTT para un video y devuelve segmentos parseados y texto.
@ -1409,6 +1896,110 @@ def transcript_alt(video_id: str, lang: str = 'es'):
'source': 'youtube-transcript-api'
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
@app.get('/playwright/stream/{video_id}')
def playwright_stream(video_id: str, profile: str = '', headless: bool = True, timeout: int = 60):
"""Usa Playwright (script tools/playwright_extract_m3u8.py) para abrir el video
en un navegador real (o con perfil persistente) y extraer las URLs m3u8 y cookies.
Parámetros:
- profile: ruta al user-data-dir de Chrome (opcional). Si el contenedor tiene el
perfil montado en /host-chrome, pásalo como `/host-chrome/Default`.
- headless: true/false para ejecutar sin UI.
- timeout: segundos máximos a esperar por la ejecución del script.
Uso (ejemplo):
curl 'http://localhost:8282/playwright/stream/cmqVmX2UVBM?headless=false&profile=/host-chrome'
Nota: el script genera `./data/cookies.txt` si logra extraer cookies.
"""
vid = extract_video_id(video_id)
if not vid:
raise HTTPException(status_code=400, detail='video_id inválido')
script = os.path.join(os.getcwd(), 'tools', 'playwright_extract_m3u8.py')
if not os.path.exists(script):
raise HTTPException(status_code=500, detail='Script Playwright no encontrado en tools/playwright_extract_m3u8.py')
cmd = ['python3', script, '--video', f'https://www.youtube.com/watch?v={vid}', '--timeout', str(timeout)]
if headless:
cmd.append('--headless')
# profile can be provided via env PLAYWRIGHT_PROFILE or param
profile_path = profile or os.getenv('PLAYWRIGHT_PROFILE', '')
if profile_path:
cmd.extend(['--profile', profile_path])
try:
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout + 10)
except subprocess.TimeoutExpired:
raise HTTPException(status_code=504, detail='Playwright timed out')
except Exception as e:
raise HTTPException(status_code=500, detail=f'Error ejecutando Playwright: {str(e)[:300]}')
if proc.returncode != 0:
# incluir stderr para diagnóstico
detail = (proc.stderr or proc.stdout or 'Error desconocido')[:2000]
return JSONResponse(status_code=500, content={"error": "Playwright error", "detail": detail})
try:
out = json.loads(proc.stdout or '{}')
except Exception:
return JSONResponse(status_code=500, content={"error": "No se pudo parsear salida Playwright", "raw": proc.stdout[:2000]})
return out
def _attempt_playwright_fallback(video_id: str, headless: bool = True, profile: str | None = None, timeout: int = 60):
"""Ejecuta el script Playwright para intentar extraer m3u8 y cookies.
Retorna (m3u8_url or None, cookies_saved_path or None, error_message or None)
"""
script = os.path.join(os.getcwd(), 'tools', 'playwright_extract_m3u8.py')
if not os.path.exists(script):
return None, None, 'Playwright extractor script no disponible'
cmd = ['python3', script, '--video', f'https://www.youtube.com/watch?v={video_id}', '--timeout', str(timeout)]
if headless:
cmd.append('--headless')
# profile can be provided via env PLAYWRIGHT_PROFILE or param
profile_path = profile or os.getenv('PLAYWRIGHT_PROFILE', '')
if profile_path:
cmd.extend(['--profile', profile_path])
try:
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout + 10)
except subprocess.TimeoutExpired:
return None, None, 'Playwright timed out'
except Exception as e:
return None, None, f'Error ejecutando Playwright: {str(e)[:200]}'
if proc.returncode != 0:
# incluir stderr para diagnóstico
detail = (proc.stderr or proc.stdout or 'Error desconocido')[:2000]
return None, None, f'Playwright error: {detail}'
try:
data = json.loads(proc.stdout or '{}')
except Exception:
return None, None, 'No se pudo parsear la salida de Playwright'
urls = data.get('m3u8_urls') or []
cookies_file = data.get('cookies_file')
if not urls:
return None, cookies_file, 'No se encontró m3u8 via Playwright'
# tomar la primera URL válida
m3u8 = urls[0]
# Si Playwright devolvió cookies, moverlas a API_COOKIES_PATH para que el resto del sistema las use
if cookies_file and os.path.exists(cookies_file):
target = os.getenv('API_COOKIES_PATH', DEFAULT_COOKIES_PATH)
try:
target_dir = os.path.dirname(target) or '.'
os.makedirs(target_dir, exist_ok=True)
# copiar contenido
with open(cookies_file, 'rb') as src, open(target, 'wb') as dst:
dst.write(src.read())
return m3u8, target, None
except Exception as e:
return m3u8, None, f'm3u8 encontrado pero no se pudo guardar cookies: {str(e)[:200]}'
return m3u8, None, None

View File

@ -0,0 +1,27 @@
Playwright extractor
=====================
Este script abre un video de YouTube con Playwright, captura peticiones de red y busca
URLs M3U8/HLS. Opcionalmente exporta cookies al formato Netscape en `./data/cookies.txt`.
Requisitos (host):
pip install playwright
python -m playwright install
Uso ejemplo (headful, usando tu perfil de Chrome):
python3 tools/playwright_extract_m3u8.py --video https://www.youtube.com/watch?v=cmqVmX2UVBM --profile ~/.config/google-chrome --headless
Si no usas perfil, quita `--profile` y el script abrirá un contexto temporal.
Salida JSON:
{
"m3u8_urls": [ ... ],
"cookies_file": "./data/cookies.txt",
"errors": []
}
Consejos:
- Ejecuta en el host (no en contenedor) si quieres usar tu perfil real de Chrome.
- Si Playwright no encuentra el ejecutable del navegador, corre `python -m playwright install`.
- Para usar las cookies exportadas desde la API: `curl -s http://localhost:8282/cookies/status` para comprobarlas.

View File

@ -0,0 +1,113 @@
#!/usr/bin/env python3
"""
expand_and_test_proxies.py
Lee tools/user_proxies.txt, genera variantes (intenta también SOCKS5/SOCKS5H en puertos comunes)
y ejecuta tools/generate_proxy_whitelist.py con la lista expandida.
Uso:
python3 tools/expand_and_test_proxies.py
Salida:
- tools/expanded_proxies.txt (lista expandida)
- llama a generate_proxy_whitelist.py y produce tools/whitelist.json y tools/whitelist.txt
"""
import os
import re
import subprocess
from pathlib import Path
BASE = Path(__file__).resolve().parent
USER_FILE = BASE / 'user_proxies.txt'
EXPANDED_FILE = BASE / 'expanded_proxies.txt'
GEN_SCRIPT = BASE / 'generate_proxy_whitelist.py'
COMMON_SOCKS_PORTS = [1080, 10808, 9050]
def normalize_line(line: str) -> str | None:
s = line.strip()
if not s or s.startswith('#'):
return None
return s
def parse_host_port(s: str):
# remove scheme if present
m = re.match(r'^(?:(?P<scheme>[a-zA-Z0-9+.-]+)://)?(?P<host>[^:/@]+)(?::(?P<port>\d+))?(?:@.*)?$', s)
if not m:
return None, None, None
scheme = m.group('scheme')
host = m.group('host')
port = m.group('port')
port = int(port) if port else None
return scheme, host, port
def build_variants(s: str):
scheme, host, port = parse_host_port(s)
variants = []
# keep original if it has scheme
if scheme:
variants.append(s)
else:
# assume http by default if none
if port:
variants.append(f'http://{host}:{port}')
else:
variants.append(f'http://{host}:80')
# Try socks5h on same port if port present
if port:
variants.append(f'socks5h://{host}:{port}')
# Try socks5h on common ports
for p in COMMON_SOCKS_PORTS:
variants.append(f'socks5h://{host}:{p}')
# Deduplicate preserving order
seen = set()
out = []
for v in variants:
if v in seen:
continue
seen.add(v)
out.append(v)
return out
def main():
if not USER_FILE.exists():
print(f'No se encontró {USER_FILE}. Crea el archivo con proxies (uno por línea).')
return
all_variants = []
with USER_FILE.open('r', encoding='utf-8') as fh:
for line in fh:
s = normalize_line(line)
if not s:
continue
vars = build_variants(s)
all_variants.extend(vars)
# write expanded file
with EXPANDED_FILE.open('w', encoding='utf-8') as fh:
for v in all_variants:
fh.write(v + '\n')
print(f'Wrote expanded proxies to {EXPANDED_FILE} ({len(all_variants)} entries)')
# Call generator
cmd = [ 'python3', str(GEN_SCRIPT), '--input', str(EXPANDED_FILE), '--out-json', str(BASE / 'whitelist.json'), '--out-txt', str(BASE / 'whitelist.txt'), '--test-url', 'https://www.youtube.com/watch?v=dQw4w9WgXcQ', '--concurrency', '6']
print('Running generator...')
try:
res = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
print('Generator exit code:', res.returncode)
print('stdout:\n', res.stdout)
print('stderr:\n', res.stderr)
except Exception as e:
print('Error running generator:', e)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,30 @@
http://48.210.225.96:80
socks5h://48.210.225.96:80
socks5h://48.210.225.96:1080
socks5h://48.210.225.96:10808
socks5h://48.210.225.96:9050
http://107.174.231.218:8888
socks5h://107.174.231.218:8888
socks5h://107.174.231.218:1080
socks5h://107.174.231.218:10808
socks5h://107.174.231.218:9050
http://188.239.43.6:80
socks5h://188.239.43.6:80
socks5h://188.239.43.6:1080
socks5h://188.239.43.6:10808
socks5h://188.239.43.6:9050
http://52.229.30.3:80
socks5h://52.229.30.3:80
socks5h://52.229.30.3:1080
socks5h://52.229.30.3:10808
socks5h://52.229.30.3:9050
http://142.93.202.130:3128
socks5h://142.93.202.130:3128
socks5h://142.93.202.130:1080
socks5h://142.93.202.130:10808
socks5h://142.93.202.130:9050
http://154.219.101.86:8888
socks5h://154.219.101.86:8888
socks5h://154.219.101.86:1080
socks5h://154.219.101.86:10808
socks5h://154.219.101.86:9050

View File

@ -0,0 +1,242 @@
#!/usr/bin/env python3
"""
generate_proxy_whitelist.py
Lee una lista de proxies desde un archivo (proxies.txt), prueba cada proxy con yt-dlp
intentando descargar metadata mínimo de YouTube, mide latencia y genera:
- whitelist.json : lista estructurada de proxies con estado y métricas
- whitelist.txt : solo proxies válidos, ordenados por latencia
Formato de proxies.txt: una URL por línea, ejemplos:
socks5h://127.0.0.1:1080
http://10.0.0.1:3128
Uso:
python3 tools/generate_proxy_whitelist.py --input tools/proxies.txt --out tools/whitelist.json --test-url https://www.youtube.com/watch?v=dQw4w9WgXcQ
Notas:
- Requiere tener `yt-dlp` instalado en el entorno donde se ejecuta este script.
- Este script intenta usar yt-dlp porque valida directamente que el proxy funciona
para las llamadas a YouTube (incluye manejo de JS/firma en yt-dlp cuando aplique).
- Ajusta timeouts y pruebas por concurrencia según tus necesidades.
"""
import argparse
import json
import subprocess
import time
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urlparse
import requests
# Mensajes que indican bloqueo/bot-check de yt-dlp
BOT_MARKERS = ("sign in to confirm", "not a bot", "sign in to", "HTTP Error 403", "HTTP Error 429")
def test_proxy(proxy: str, test_url: str, timeout: int = 25) -> dict:
"""Prueba un proxy ejecutando yt-dlp --dump-json sobre test_url.
Retorna dict con info: proxy, ok, rc, stderr, elapsed_ms, stdout_preview
"""
proxy = proxy.strip()
if not proxy:
return {"proxy": proxy, "ok": False, "error": "empty"}
cmd = [
"yt-dlp",
"--skip-download",
"--dump-json",
"--no-warnings",
"--extractor-args", "youtube:player_client=tv_embedded",
"--socket-timeout", "10",
test_url,
"--proxy", proxy,
]
start = time.perf_counter()
try:
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
elapsed = (time.perf_counter() - start) * 1000.0
stdout = proc.stdout or ""
stderr = proc.stderr or ""
rc = proc.returncode
# heurística de éxito: rc == 0 y stdout no vacío y no markers de bot en stderr
stderr_low = stderr.lower()
bot_hit = any(m.lower() in stderr_low for m in BOT_MARKERS)
ok = (rc == 0 and stdout.strip() != "" and not bot_hit)
return {
"proxy": proxy,
"ok": ok,
"rc": rc,
"elapsed_ms": int(elapsed),
"bot_detected": bool(bot_hit),
"stderr_preview": stderr[:1000],
"stdout_preview": stdout[:2000],
}
except subprocess.TimeoutExpired:
elapsed = (time.perf_counter() - start) * 1000.0
return {"proxy": proxy, "ok": False, "error": "timeout", "elapsed_ms": int(elapsed)}
except FileNotFoundError:
return {"proxy": proxy, "ok": False, "error": "yt-dlp-not-found"}
except Exception as e:
elapsed = (time.perf_counter() - start) * 1000.0
return {"proxy": proxy, "ok": False, "error": str(e), "elapsed_ms": int(elapsed)}
def generate_whitelist(input_file: str, out_json: str, out_txt: str, test_url: str, concurrency: int = 6):
proxies = []
with open(input_file, 'r', encoding='utf-8') as fh:
for line in fh:
line = line.strip()
if not line or line.startswith('#'):
continue
proxies.append(line)
results = []
with ThreadPoolExecutor(max_workers=concurrency) as ex:
futures = {ex.submit(test_proxy, p, test_url): p for p in proxies}
for fut in as_completed(futures):
try:
r = fut.result()
except Exception as e:
r = {"proxy": futures[fut], "ok": False, "error": str(e)}
results.append(r)
print(f"Tested: {r.get('proxy')} ok={r.get('ok')} rc={r.get('rc', '-') } elapsed={r.get('elapsed_ms','-')}ms")
# Ordenar proxies válidos por elapsed asc
valid = [r for r in results if r.get('ok')]
valid_sorted = sorted(valid, key=lambda x: x.get('elapsed_ms', 999999))
# Guardar JSON completo
out = {"tested_at": int(time.time()), "test_url": test_url, "results": results, "valid_count": len(valid_sorted)}
with open(out_json, 'w', encoding='utf-8') as fh:
json.dump(out, fh, indent=2, ensure_ascii=False)
# Guardar lista TXT (whitelist) con orden preferido
with open(out_txt, 'w', encoding='utf-8') as fh:
for r in valid_sorted:
fh.write(r['proxy'] + '\n')
return out, valid_sorted
def _extract_proxies_from_json(obj):
"""Dado un objeto JSON (parsed), intenta extraer una lista de proxies en forma de URLs.
Soporta varias estructuras comunes:
- lista simple de strings: ["socks5h://1.2.3.4:1080", ...]
- lista de objetos con keys como ip, port, protocol
- objetos anidados con 'proxy' o 'url' o 'address'
"""
proxies = []
if isinstance(obj, list):
for item in obj:
if isinstance(item, str):
proxies.append(item.strip())
elif isinstance(item, dict):
# intentar keys comunes
# ejemplos: {"ip":"1.2.3.4","port":1080, "protocol":"socks5"}
ip = item.get('ip') or item.get('host') or item.get('address') or item.get('ip_address')
port = item.get('port') or item.get('p')
proto = item.get('protocol') or item.get('proto') or item.get('type') or item.get('scheme')
if ip and port:
proto = proto or 'http'
proxies.append(f"{proto}://{ip}:{port}")
continue
# buscar valores en keys que puedan contener url
for k in ('proxy','url','address','connect'):
v = item.get(k)
if isinstance(v, str) and v.strip():
proxies.append(v.strip())
break
elif isinstance(obj, dict):
# encontrar listas dentro del dict
for v in obj.values():
if isinstance(v, (list, dict)):
proxies.extend(_extract_proxies_from_json(v))
# si el dict mismo tiene un campo 'proxy' o similar
for k in ('proxies','list','data'):
if k in obj and isinstance(obj[k], (list,dict)):
proxies.extend(_extract_proxies_from_json(obj[k]))
return [p for p in proxies if p]
def download_and_write_proxies(url: str, out_file: str) -> int:
"""Descarga JSON desde `url`, extrae proxies y las escribe en `out_file`.
Retorna número de proxies escritas.
"""
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
data = r.json()
except Exception as e:
raise RuntimeError(f"Error descargando/parsing JSON desde {url}: {e}")
proxies = _extract_proxies_from_json(data)
# normalizar: si la entrada es 'ip:port' convertir a http://ip:port
normalized = []
for p in proxies:
p = p.strip()
if not p:
continue
# si es 'ip:port' o 'ip port'
if ':' in p and not p.lower().startswith(('http://','https://','socks5://','socks5h://','socks4://')):
# asumir http
normalized.append('http://' + p)
else:
normalized.append(p)
# dedup preserving order
seen = set()
out = []
for p in normalized:
if p in seen:
continue
seen.add(p)
out.append(p)
if not out:
# como fallback, si JSON es una estructura plana de objetos con 'ip' y 'port'
# ya manejado, si nada, error
raise RuntimeError(f"No se extrajeron proxies del JSON: {url}")
with open(out_file, 'w', encoding='utf-8') as fh:
for p in out:
fh.write(p + '\n')
return len(out)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Test a list of proxies with yt-dlp and generate a whitelist')
parser.add_argument('--input', default='tools/proxies.txt', help='Input file with proxies (one per line)')
parser.add_argument('--out-json', default='tools/whitelist.json', help='Output JSON results')
parser.add_argument('--out-txt', default='tools/whitelist.txt', help='Output whitelist (one proxy per line)')
parser.add_argument('--test-url', default='https://www.youtube.com/watch?v=dQw4w9WgXcQ', help='YouTube test URL to use')
parser.add_argument('--concurrency', type=int, default=6, help='Concurrent workers')
parser.add_argument('--from-url', default='', help='Download a JSON of proxies from a URL and use it as input')
args = parser.parse_args()
# If from-url provided, download and write to temporary input file
input_file = args.input
temp_written = False
try:
if args.from_url:
print(f"Downloading proxies JSON from: {args.from_url}")
written = download_and_write_proxies(args.from_url, input_file)
print(f"Wrote {written} proxies to {input_file}")
temp_written = True
if not os.path.exists(input_file):
print(f"Input file {input_file} not found. Create it with one proxy per line or use --from-url.")
raise SystemExit(1)
out, valid_sorted = generate_whitelist(input_file, args.out_json, args.out_txt, args.test_url, args.concurrency)
print('\nSummary:')
print(f" Tested: {len(out['results'])}, Valid: {len(valid_sorted)}")
print(f" JSON: {args.out_json}, TXT whitelist: {args.out_txt}")
finally:
# optionally remove temp file? keep it for inspection
pass

177
tools/playwright_extract_m3u8.py Executable file
View File

@ -0,0 +1,177 @@
#!/usr/bin/env python3
"""playwright_extract_m3u8.py
Abre una página de YouTube con Playwright y captura la primera URL m3u8/HLS
visible en las peticiones de red. También puede exportar cookies al formato
Netscape para usarlas con yt-dlp/tu API.
Uso:
python3 tools/playwright_extract_m3u8.py --video https://www.youtube.com/watch?v=ID [--profile /path/to/profile] [--headless]
Requisitos (host):
pip install playwright
python -m playwright install
Notas:
- Recomiendo ejecutarlo en el host (no en el contenedor) para usar el perfil de Chrome
y para que Playwright pueda manejar la interfaz gráfica si necesitas login/manual.
- Si pasas --profile, se lanzará una sesión persistente usando ese directorio (útil
para usar tu sesión de Chrome ya logueada). Si dejas vacío, se usa un contexto limpio.
"""
import argparse
import os
import json
import time
from pathlib import Path
try:
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
except Exception as e:
print("playwright no está instalado. Instala con: pip install playwright && python -m playwright install")
raise
def write_netscape_cookie_file(cookies, target_path):
# cookies: list of dicts like Playwright provides
lines = ["# Netscape HTTP Cookie File"]
for c in cookies:
domain = c.get("domain", "")
flag = "TRUE" if domain.startswith('.') else "FALSE"
path = c.get("path", "/")
secure = "TRUE" if c.get("secure") else "FALSE"
expires = str(int(c.get("expires", 0))) if c.get("expires") else "0"
name = c.get("name", "")
value = c.get("value", "")
lines.append("\t".join([domain, flag, path, secure, expires, name, value]))
Path(target_path).parent.mkdir(parents=True, exist_ok=True)
with open(target_path, "w", encoding="utf-8") as fh:
fh.write("\n".join(lines) + "\n")
def extract_m3u8(video_url: str, profile: str | None, headless: bool, timeout: int = 45, save_cookies: bool = True):
result = {"m3u8_urls": [], "cookies_file": None, "errors": []}
data_dir = Path.cwd() / "data"
data_dir.mkdir(exist_ok=True)
target_cookies = str(data_dir / "cookies.txt")
with sync_playwright() as p:
# Usar Chromium para mejor compatibilidad con Chrome profile
browser_type = p.chromium
# establecer User-Agent a uno real para simular navegador
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
extra_headers = {"Accept-Language": "en-US,en;q=0.9", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"}
launch_args = ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
if profile:
# persistent context uses a profile dir (user data dir)
user_data_dir = profile
# avoid passing user_agent due to some Playwright builds missing API; set headers only
context = browser_type.launch_persistent_context(user_data_dir=user_data_dir, headless=headless, extra_http_headers=extra_headers, args=launch_args)
else:
# pass common args to help in container environments
browser = browser_type.launch(headless=headless, args=launch_args)
# do not pass user_agent param; rely on browser default and headers
context = browser.new_context(extra_http_headers=extra_headers)
# debug info
try:
print(f"[playwright] started browser headless={headless} profile={'yes' if profile else 'no'}")
except Exception:
pass
page = context.new_page()
collected = set()
def on_response(resp):
try:
url = resp.url
# heurística: m3u8 en URL o content-type de respuesta
if ".m3u8" in url.lower():
collected.add(url)
else:
ct = resp.headers.get("content-type", "")
if "application/vnd.apple.mpegurl" in ct or "vnd.apple.mpegurl" in ct or "application/x-mpegURL" in ct:
collected.add(url)
except Exception:
pass
page.on("response", on_response)
try:
page.goto(video_url, timeout=timeout * 1000)
# esperar un poco para que las peticiones de manifest se disparen
wait_seconds = 6
for i in range(wait_seconds):
time.sleep(1)
# si encontramos algo temprano, romper
if collected:
break
# Si no encontramos m3u8, intentar forzar la apertura del player y realizar scroll
if not collected:
try:
# click play
page.evaluate("() => { const v = document.querySelector('video'); if (v) v.play(); }")
except Exception:
pass
# esperar más
time.sleep(3)
# recopilar URLs
result_urls = list(collected)
# desduplicar y ordenar
result_urls = sorted(set(result_urls))
result['m3u8_urls'] = result_urls
# guardar cookies si se pidió
if save_cookies:
try:
cookies = context.cookies()
write_netscape_cookie_file(cookies, target_cookies)
result['cookies_file'] = target_cookies
except Exception as e:
result['errors'].append(f"cookie_export_error:{e}")
except PWTimeout as e:
result['errors'].append(f"page_timeout: {e}")
except Exception as e:
import traceback
result['errors'].append(traceback.format_exc())
finally:
# intentar cerrar context y browser si existen
try:
if 'context' in locals() and context:
try:
context.close()
except Exception:
pass
except Exception:
pass
try:
if 'browser' in locals() and browser:
try:
browser.close()
except Exception:
pass
except Exception:
pass
return result
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Playwright m3u8 extractor for YouTube')
parser.add_argument('--video', required=True, help='Video URL or ID (e.g. https://www.youtube.com/watch?v=ID)')
parser.add_argument('--profile', default='', help='Path to browser profile (user data dir) to reuse logged session')
parser.add_argument('--headless', action='store_true', help='Run headless')
parser.add_argument('--timeout', type=int, default=45, help='Timeout for page load (seconds)')
parser.add_argument('--no-cookies', dest='save_cookies', action='store_false', help='Don\'t save cookies to ./data/cookies.txt')
args = parser.parse_args()
video = args.video
if len(video) == 11 and not video.startswith('http'):
video = f'https://www.youtube.com/watch?v={video}'
res = extract_m3u8(video, profile=args.profile or None, headless=args.headless, timeout=args.timeout, save_cookies=args.save_cookies)
print(json.dumps(res, indent=2, ensure_ascii=False))

0
tools/proxies_sample.txt Normal file
View File

10
tools/user_proxies.txt Normal file
View File

@ -0,0 +1,10 @@
# Proxies proporcionados por el usuario (formato: esquema://ip:port)
# Fuente: lista JSON proporcionada por el usuario — comprobadas por Google (campo "google": true)
http://48.210.225.96:80
http://107.174.231.218:8888
http://188.239.43.6:80
http://52.229.30.3:80
http://142.93.202.130:3128
http://154.219.101.86:8888

256
tools/whitelist.json Normal file
View File

@ -0,0 +1,256 @@
{
"tested_at": 1772912928,
"test_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
"results": [
{
"proxy": "http://107.174.231.218:8888",
"ok": false,
"rc": 1,
"elapsed_ms": 2714,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('Unable to connect to proxy', OSError('Tunnel connection failed: 400 Bad Request')) (caused by ProxyError(\"('Unable to connect to proxy', OSError('Tunnel connection failed: 400 Bad Request'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://107.174.231.218:8888",
"ok": false,
"rc": 1,
"elapsed_ms": 1473,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48')) (caused by ProxyError(\"('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://48.210.225.96:9050",
"ok": false,
"rc": 1,
"elapsed_ms": 4559,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48')) (caused by ProxyError(\"('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://48.210.225.96:80",
"ok": false,
"rc": 1,
"elapsed_ms": 4850,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48')) (caused by ProxyError(\"('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "http://48.210.225.96:80",
"ok": false,
"rc": 1,
"elapsed_ms": 5159,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('Unable to connect to proxy', OSError('Tunnel connection failed: 400 Bad Request')) (caused by ProxyError(\"('Unable to connect to proxy', OSError('Tunnel connection failed: 400 Bad Request'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://107.174.231.218:1080",
"ok": false,
"rc": 1,
"elapsed_ms": 1057,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused (caused by TransportError(\"SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused\"))\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://107.174.231.218:10808",
"ok": false,
"rc": 1,
"elapsed_ms": 1208,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused (caused by TransportError(\"SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused\"))\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://107.174.231.218:9050",
"ok": false,
"rc": 1,
"elapsed_ms": 1123,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused (caused by TransportError(\"SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused\"))\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://188.239.43.6:80",
"ok": false,
"rc": 1,
"elapsed_ms": 7075,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 104] Connection reset by peer (caused by TransportError(\"SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 104] Connection reset by peer\"))\n",
"stdout_preview": ""
},
{
"proxy": "http://188.239.43.6:80",
"ok": false,
"rc": 1,
"elapsed_ms": 7192,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')) (caused by TransportError(\"('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))\"))\n",
"stdout_preview": ""
},
{
"proxy": "http://52.229.30.3:80",
"ok": false,
"rc": 1,
"elapsed_ms": 2332,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('Unable to connect to proxy', OSError('Tunnel connection failed: 400 Bad Request')) (caused by ProxyError(\"('Unable to connect to proxy', OSError('Tunnel connection failed: 400 Bad Request'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://52.229.30.3:80",
"ok": false,
"rc": 1,
"elapsed_ms": 2265,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48')) (caused by ProxyError(\"('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://48.210.225.96:1080",
"ok": false,
"error": "timeout",
"elapsed_ms": 25022
},
{
"proxy": "socks5h://48.210.225.96:10808",
"ok": false,
"error": "timeout",
"elapsed_ms": 25036
},
{
"proxy": "socks5h://52.229.30.3:9050",
"ok": false,
"rc": 1,
"elapsed_ms": 2430,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48')) (caused by ProxyError(\"('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "http://142.93.202.130:3128",
"ok": false,
"rc": 1,
"elapsed_ms": 1668,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('Unable to connect to proxy', OSError('Tunnel connection failed: 400 Bad Request')) (caused by ProxyError(\"('Unable to connect to proxy', OSError('Tunnel connection failed: 400 Bad Request'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://142.93.202.130:3128",
"ok": false,
"rc": 1,
"elapsed_ms": 1652,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48')) (caused by ProxyError(\"('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://188.239.43.6:1080",
"ok": false,
"error": "timeout",
"elapsed_ms": 25031
},
{
"proxy": "socks5h://188.239.43.6:10808",
"ok": false,
"error": "timeout",
"elapsed_ms": 25030
},
{
"proxy": "socks5h://142.93.202.130:1080",
"ok": false,
"rc": 1,
"elapsed_ms": 1364,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused (caused by TransportError(\"SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused\"))\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://142.93.202.130:10808",
"ok": false,
"rc": 1,
"elapsed_ms": 1405,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused (caused by TransportError(\"SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused\"))\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://142.93.202.130:9050",
"ok": false,
"rc": 1,
"elapsed_ms": 1322,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused (caused by TransportError(\"SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused\"))\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://154.219.101.86:1080",
"ok": false,
"rc": 1,
"elapsed_ms": 2199,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused (caused by TransportError(\"SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused\"))\n",
"stdout_preview": ""
},
{
"proxy": "http://154.219.101.86:8888",
"ok": false,
"rc": 1,
"elapsed_ms": 3651,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('Unable to connect to proxy', OSError('Tunnel connection failed: 400 Bad Request')) (caused by ProxyError(\"('Unable to connect to proxy', OSError('Tunnel connection failed: 400 Bad Request'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://154.219.101.86:8888",
"ok": false,
"rc": 1,
"elapsed_ms": 3628,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: ('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48')) (caused by ProxyError(\"('[Errno 0] Invalid response version from server. Expected 05 got 48', InvalidVersionError(0, 'Invalid response version from server. Expected 05 got 48'))\")); please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://154.219.101.86:10808",
"ok": false,
"rc": 1,
"elapsed_ms": 1981,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused (caused by TransportError(\"SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused\"))\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://188.239.43.6:9050",
"ok": false,
"error": "timeout",
"elapsed_ms": 25023
},
{
"proxy": "socks5h://154.219.101.86:9050",
"ok": false,
"rc": 1,
"elapsed_ms": 1962,
"bot_detected": false,
"stderr_preview": "ERROR: [youtube] dQw4w9WgXcQ: Unable to download API page: SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused (caused by TransportError(\"SocksHTTPSConnection(host='www.youtube.com', port=443): Failed to establish a new connection: [Errno 111] Connection refused\"))\n",
"stdout_preview": ""
},
{
"proxy": "socks5h://52.229.30.3:1080",
"ok": false,
"error": "timeout",
"elapsed_ms": 25026
},
{
"proxy": "socks5h://52.229.30.3:10808",
"ok": false,
"error": "timeout",
"elapsed_ms": 25028
}
],
"valid_count": 0
}

0
tools/whitelist.txt Normal file
View File