mirror of
https://github.com/pacnpal/Pac-cogs.git
synced 2025-12-20 02:41:06 -05:00
Switching from regex patterns to yt-dlp simulation for URL detection:
Removed the regex-based URL detection Added proper yt-dlp simulation to check if URLs are supported Added better error handling for URL checking Added detailed logging of URL detection results Improving FFmpeg integration: Added proper ffprobe binary download alongside FFmpeg Added better verification of both binaries Added retry mechanisms for binary downloads Added proper cleanup of failed downloads Enhancing error handling and logging: Added detailed logging throughout the system Added better error recovery mechanisms Added proper cleanup of temporary files Added better tracking of failed operations
This commit is contained in:
@@ -12,7 +12,7 @@ import platform
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from contextlib import contextmanager
|
||||
from typing import Optional
|
||||
from typing import Optional, Dict, List
|
||||
import time
|
||||
|
||||
from .exceptions import DownloadError
|
||||
@@ -37,31 +37,31 @@ class FFmpegDownloader:
|
||||
"Windows": {
|
||||
"x86_64": {
|
||||
"url": "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip",
|
||||
"bin_name": "ffmpeg.exe",
|
||||
"bin_names": ["ffmpeg.exe", "ffprobe.exe"],
|
||||
}
|
||||
},
|
||||
"Linux": {
|
||||
"x86_64": {
|
||||
"url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz",
|
||||
"bin_name": "ffmpeg",
|
||||
"bin_names": ["ffmpeg", "ffprobe"],
|
||||
},
|
||||
"aarch64": {
|
||||
"url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-arm64-static.tar.xz",
|
||||
"bin_name": "ffmpeg",
|
||||
"bin_names": ["ffmpeg", "ffprobe"],
|
||||
},
|
||||
"armv7l": {
|
||||
"url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-armhf-static.tar.xz",
|
||||
"bin_name": "ffmpeg",
|
||||
"bin_names": ["ffmpeg", "ffprobe"],
|
||||
},
|
||||
},
|
||||
"Darwin": {
|
||||
"x86_64": {
|
||||
"url": "https://evermeet.cx/ffmpeg/getrelease/zip",
|
||||
"bin_name": "ffmpeg",
|
||||
"bin_names": ["ffmpeg", "ffprobe"],
|
||||
},
|
||||
"arm64": {
|
||||
"url": "https://evermeet.cx/ffmpeg/getrelease/zip",
|
||||
"bin_name": "ffmpeg",
|
||||
"bin_names": ["ffmpeg", "ffprobe"],
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -73,15 +73,17 @@ class FFmpegDownloader:
|
||||
if self.machine == "arm64":
|
||||
self.machine = "aarch64" # Normalize ARM64 naming
|
||||
self.base_dir = base_dir
|
||||
self.ffmpeg_path = self.base_dir / self._get_binary_name()
|
||||
self.ffmpeg_path = self.base_dir / self._get_binary_names()[0]
|
||||
self.ffprobe_path = self.base_dir / self._get_binary_names()[1]
|
||||
|
||||
logger.info(f"Initialized FFmpeg downloader for {system}/{machine}")
|
||||
logger.info(f"FFmpeg binary path: {self.ffmpeg_path}")
|
||||
logger.info(f"FFprobe binary path: {self.ffprobe_path}")
|
||||
|
||||
def _get_binary_name(self) -> str:
|
||||
"""Get the appropriate binary name for the current system"""
|
||||
def _get_binary_names(self) -> List[str]:
|
||||
"""Get the appropriate binary names for the current system"""
|
||||
try:
|
||||
return self.FFMPEG_URLS[self.system][self.machine]["bin_name"]
|
||||
return self.FFMPEG_URLS[self.system][self.machine]["bin_names"]
|
||||
except KeyError:
|
||||
raise DownloadError(f"Unsupported system/architecture: {self.system}/{self.machine}")
|
||||
|
||||
@@ -92,8 +94,8 @@ class FFmpegDownloader:
|
||||
except KeyError:
|
||||
raise DownloadError(f"Unsupported system/architecture: {self.system}/{self.machine}")
|
||||
|
||||
def download(self) -> Path:
|
||||
"""Download and set up FFmpeg binary with retries"""
|
||||
def download(self) -> Dict[str, Path]:
|
||||
"""Download and set up FFmpeg and FFprobe binaries with retries"""
|
||||
max_retries = 3
|
||||
retry_delay = 5
|
||||
last_error = None
|
||||
@@ -106,12 +108,13 @@ class FFmpegDownloader:
|
||||
self.base_dir.mkdir(parents=True, exist_ok=True)
|
||||
os.chmod(str(self.base_dir), 0o777)
|
||||
|
||||
# Clean up any existing file or directory
|
||||
if self.ffmpeg_path.exists():
|
||||
if self.ffmpeg_path.is_dir():
|
||||
shutil.rmtree(str(self.ffmpeg_path))
|
||||
else:
|
||||
self.ffmpeg_path.unlink()
|
||||
# Clean up any existing files
|
||||
for binary_path in [self.ffmpeg_path, self.ffprobe_path]:
|
||||
if binary_path.exists():
|
||||
if binary_path.is_dir():
|
||||
shutil.rmtree(str(binary_path))
|
||||
else:
|
||||
binary_path.unlink()
|
||||
|
||||
with temp_path_context() as temp_dir:
|
||||
# Download archive
|
||||
@@ -121,18 +124,23 @@ class FFmpegDownloader:
|
||||
if not self._verify_download(archive_path):
|
||||
raise DownloadError("Downloaded file verification failed")
|
||||
|
||||
# Extract FFmpeg binary
|
||||
self._extract_binary(archive_path, temp_dir)
|
||||
# Extract binaries
|
||||
self._extract_binaries(archive_path, temp_dir)
|
||||
|
||||
# Set proper permissions
|
||||
os.chmod(str(self.ffmpeg_path), 0o755)
|
||||
for binary_path in [self.ffmpeg_path, self.ffprobe_path]:
|
||||
os.chmod(str(binary_path), 0o755)
|
||||
|
||||
# Verify binary
|
||||
# Verify binaries
|
||||
if not self.verify():
|
||||
raise DownloadError("FFmpeg binary verification failed")
|
||||
raise DownloadError("Binary verification failed")
|
||||
|
||||
logger.info(f"Successfully downloaded FFmpeg to {self.ffmpeg_path}")
|
||||
return self.ffmpeg_path
|
||||
logger.info(f"Successfully downloaded FFprobe to {self.ffprobe_path}")
|
||||
return {
|
||||
"ffmpeg": self.ffmpeg_path,
|
||||
"ffprobe": self.ffprobe_path
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
last_error = str(e)
|
||||
@@ -193,9 +201,9 @@ class FFmpegDownloader:
|
||||
logger.error(f"Download verification failed: {str(e)}")
|
||||
return False
|
||||
|
||||
def _extract_binary(self, archive_path: Path, temp_dir: str):
|
||||
"""Extract FFmpeg binary from archive"""
|
||||
logger.info("Extracting FFmpeg binary")
|
||||
def _extract_binaries(self, archive_path: Path, temp_dir: str):
|
||||
"""Extract FFmpeg and FFprobe binaries from archive"""
|
||||
logger.info("Extracting FFmpeg and FFprobe binaries")
|
||||
|
||||
if self.system == "Windows":
|
||||
self._extract_zip(archive_path, temp_dir)
|
||||
@@ -205,50 +213,70 @@ class FFmpegDownloader:
|
||||
def _extract_zip(self, archive_path: Path, temp_dir: str):
|
||||
"""Extract from zip archive (Windows)"""
|
||||
with zipfile.ZipFile(archive_path, "r") as zip_ref:
|
||||
ffmpeg_files = [f for f in zip_ref.namelist() if self._get_binary_name() in f]
|
||||
if not ffmpeg_files:
|
||||
raise DownloadError("FFmpeg binary not found in archive")
|
||||
|
||||
zip_ref.extract(ffmpeg_files[0], temp_dir)
|
||||
extracted_path = Path(temp_dir) / ffmpeg_files[0]
|
||||
shutil.copy2(extracted_path, self.ffmpeg_path)
|
||||
binary_names = self._get_binary_names()
|
||||
for binary_name in binary_names:
|
||||
binary_files = [f for f in zip_ref.namelist() if binary_name in f]
|
||||
if not binary_files:
|
||||
raise DownloadError(f"{binary_name} not found in archive")
|
||||
|
||||
zip_ref.extract(binary_files[0], temp_dir)
|
||||
extracted_path = Path(temp_dir) / binary_files[0]
|
||||
target_path = self.base_dir / binary_name
|
||||
shutil.copy2(extracted_path, target_path)
|
||||
|
||||
def _extract_tar(self, archive_path: Path, temp_dir: str):
|
||||
"""Extract from tar archive (Linux/macOS)"""
|
||||
with tarfile.open(archive_path, "r:xz") as tar_ref:
|
||||
ffmpeg_files = [f for f in tar_ref.getnames() if f.endswith("/ffmpeg")]
|
||||
if not ffmpeg_files:
|
||||
raise DownloadError("FFmpeg binary not found in archive")
|
||||
|
||||
tar_ref.extract(ffmpeg_files[0], temp_dir)
|
||||
extracted_path = Path(temp_dir) / ffmpeg_files[0]
|
||||
shutil.copy2(extracted_path, self.ffmpeg_path)
|
||||
binary_names = self._get_binary_names()
|
||||
for binary_name in binary_names:
|
||||
binary_files = [f for f in tar_ref.getnames() if f.endswith(f"/{binary_name}")]
|
||||
if not binary_files:
|
||||
raise DownloadError(f"{binary_name} not found in archive")
|
||||
|
||||
tar_ref.extract(binary_files[0], temp_dir)
|
||||
extracted_path = Path(temp_dir) / binary_files[0]
|
||||
target_path = self.base_dir / binary_name
|
||||
shutil.copy2(extracted_path, target_path)
|
||||
|
||||
def verify(self) -> bool:
|
||||
"""Verify FFmpeg binary works"""
|
||||
"""Verify FFmpeg and FFprobe binaries work"""
|
||||
try:
|
||||
if not self.ffmpeg_path.exists():
|
||||
if not self.ffmpeg_path.exists() or not self.ffprobe_path.exists():
|
||||
return False
|
||||
|
||||
# Ensure proper permissions
|
||||
os.chmod(str(self.ffmpeg_path), 0o755)
|
||||
os.chmod(str(self.ffprobe_path), 0o755)
|
||||
|
||||
# Test FFmpeg functionality
|
||||
result = subprocess.run(
|
||||
ffmpeg_result = subprocess.run(
|
||||
[str(self.ffmpeg_path), "-version"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
timeout=5
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
version = result.stdout.decode().split('\n')[0]
|
||||
logger.info(f"FFmpeg verification successful: {version}")
|
||||
# Test FFprobe functionality
|
||||
ffprobe_result = subprocess.run(
|
||||
[str(self.ffprobe_path), "-version"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
timeout=5
|
||||
)
|
||||
|
||||
if ffmpeg_result.returncode == 0 and ffprobe_result.returncode == 0:
|
||||
ffmpeg_version = ffmpeg_result.stdout.decode().split('\n')[0]
|
||||
ffprobe_version = ffprobe_result.stdout.decode().split('\n')[0]
|
||||
logger.info(f"FFmpeg verification successful: {ffmpeg_version}")
|
||||
logger.info(f"FFprobe verification successful: {ffprobe_version}")
|
||||
return True
|
||||
else:
|
||||
logger.error(f"FFmpeg verification failed: {result.stderr.decode()}")
|
||||
if ffmpeg_result.returncode != 0:
|
||||
logger.error(f"FFmpeg verification failed: {ffmpeg_result.stderr.decode()}")
|
||||
if ffprobe_result.returncode != 0:
|
||||
logger.error(f"FFprobe verification failed: {ffprobe_result.stderr.decode()}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"FFmpeg verification failed: {e}")
|
||||
logger.error(f"Binary verification failed: {e}")
|
||||
return False
|
||||
|
||||
@@ -316,15 +316,22 @@ class VideoProcessor:
|
||||
if monitored_channels and message.channel.id not in monitored_channels:
|
||||
return
|
||||
|
||||
# Find all video URLs in message with improved pattern matching
|
||||
# Find all video URLs in message using yt-dlp simulation
|
||||
logger.info(f"Checking message {message.id} for video URLs...")
|
||||
urls = []
|
||||
if message.guild.id in self.components:
|
||||
downloader = self.components[message.guild.id]["downloader"]
|
||||
if downloader:
|
||||
# Check each word in the message
|
||||
for word in message.content.split():
|
||||
if downloader.is_supported_url(word):
|
||||
urls.append(word)
|
||||
# Use yt-dlp simulation to check if URL is supported
|
||||
try:
|
||||
if downloader.is_supported_url(word):
|
||||
logger.info(f"Found supported URL: {word}")
|
||||
urls.append(word)
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking URL {word}: {str(e)}")
|
||||
continue
|
||||
|
||||
if urls:
|
||||
logger.info(f"Found {len(urls)} video URLs in message {message.id}")
|
||||
|
||||
@@ -36,7 +36,6 @@ class VideoDownloader:
|
||||
# Ensure download path exists with proper permissions
|
||||
self.download_path = Path(download_path)
|
||||
self.download_path.mkdir(parents=True, exist_ok=True)
|
||||
# Ensure directory has rwx permissions for user and rx for group/others
|
||||
os.chmod(str(self.download_path), 0o755)
|
||||
logger.info(f"Initialized download directory: {self.download_path}")
|
||||
|
||||
@@ -44,14 +43,10 @@ class VideoDownloader:
|
||||
self.max_quality = max_quality
|
||||
self.max_file_size = max_file_size
|
||||
self.enabled_sites = enabled_sites
|
||||
self.url_patterns = self._get_url_patterns()
|
||||
|
||||
# Initialize FFmpeg manager
|
||||
self.ffmpeg_mgr = FFmpegManager()
|
||||
ffmpeg_path = self.ffmpeg_mgr.get_ffmpeg_path()
|
||||
if not os.path.exists(ffmpeg_path):
|
||||
raise FileNotFoundError(f"FFmpeg not found at {ffmpeg_path}")
|
||||
logger.info(f"Using FFmpeg from: {ffmpeg_path}")
|
||||
logger.info(f"FFmpeg path: {self.ffmpeg_mgr.get_ffmpeg_path()}")
|
||||
|
||||
# Create thread pool for this instance
|
||||
self.download_pool = ThreadPoolExecutor(
|
||||
@@ -63,7 +58,7 @@ class VideoDownloader:
|
||||
self.active_downloads: Dict[str, str] = {}
|
||||
self._downloads_lock = asyncio.Lock()
|
||||
|
||||
# Configure yt-dlp options with absolute FFmpeg path
|
||||
# Configure yt-dlp options
|
||||
self.ydl_opts = {
|
||||
"format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best", # More flexible format
|
||||
"outtmpl": "%(title)s.%(ext)s", # Base filename only, path added later
|
||||
@@ -79,55 +74,55 @@ class VideoDownloader:
|
||||
"extractor_retries": self.MAX_RETRIES,
|
||||
"postprocessor_hooks": [self._check_file_size],
|
||||
"progress_hooks": [self._progress_hook],
|
||||
"ffmpeg_location": str(ffmpeg_path), # Convert Path to string
|
||||
"prefer_ffmpeg": True, # Force use of FFmpeg
|
||||
"hls_prefer_ffmpeg": True, # Use FFmpeg for HLS
|
||||
"ffmpeg_location": self.ffmpeg_mgr.get_ffmpeg_path(),
|
||||
"logger": logger, # Use our logger
|
||||
"ignoreerrors": True, # Don't stop on download errors
|
||||
"no_color": True, # Disable ANSI colors in output
|
||||
"geo_bypass": True, # Try to bypass geo-restrictions
|
||||
"socket_timeout": 30, # Increase timeout
|
||||
"external_downloader": {
|
||||
"m3u8": "ffmpeg", # Use FFmpeg for m3u8 downloads
|
||||
},
|
||||
"external_downloader_args": {
|
||||
"ffmpeg": ["-v", "warning"], # Reduce FFmpeg verbosity
|
||||
}
|
||||
}
|
||||
logger.info("VideoDownloader initialized successfully")
|
||||
|
||||
def __del__(self):
|
||||
"""Ensure thread pool is shutdown and files are cleaned up"""
|
||||
def is_supported_url(self, url: str) -> bool:
|
||||
"""Check if URL is supported by attempting a simulated download"""
|
||||
try:
|
||||
# Cancel all active downloads
|
||||
for file_path in self.active_downloads.values():
|
||||
# Configure yt-dlp for simulation
|
||||
simulate_opts = {
|
||||
**self.ydl_opts,
|
||||
"simulate": True, # Only simulate download
|
||||
"quiet": True, # Reduce output noise
|
||||
"no_warnings": True,
|
||||
"extract_flat": True, # Don't download video info
|
||||
"skip_download": True, # Skip actual download
|
||||
"format": "best", # Don't spend time finding best format
|
||||
}
|
||||
|
||||
# Create a new yt-dlp instance for simulation
|
||||
with yt_dlp.YoutubeDL(simulate_opts) as ydl:
|
||||
try:
|
||||
secure_delete_file(file_path)
|
||||
# Try to extract info without downloading
|
||||
info = ydl.extract_info(url, download=False)
|
||||
if info is None:
|
||||
logger.debug(f"URL not supported: {url}")
|
||||
return False
|
||||
|
||||
# Check if site is enabled (if enabled_sites is configured)
|
||||
if self.enabled_sites:
|
||||
extractor = info.get('extractor', '').lower()
|
||||
if not any(site.lower() in extractor for site in self.enabled_sites):
|
||||
logger.info(f"Site {extractor} not in enabled sites list")
|
||||
return False
|
||||
|
||||
logger.info(f"URL supported: {url} (Extractor: {info.get('extractor', 'unknown')})")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting file during cleanup: {str(e)}")
|
||||
self.active_downloads.clear()
|
||||
|
||||
# Shutdown thread pool
|
||||
if hasattr(self, "download_pool"):
|
||||
self.download_pool.shutdown(wait=True)
|
||||
if "Unsupported URL" not in str(e):
|
||||
logger.error(f"Error checking URL {url}: {str(e)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during VideoDownloader cleanup: {str(e)}")
|
||||
|
||||
def _get_url_patterns(self) -> List[Tuple[str, str]]:
|
||||
"""Get URL patterns and names for supported sites"""
|
||||
patterns = []
|
||||
try:
|
||||
with yt_dlp.YoutubeDL() as ydl:
|
||||
for ie in ydl._ies:
|
||||
if hasattr(ie, "_VALID_URL") and ie._VALID_URL:
|
||||
if not self.enabled_sites or any(
|
||||
site.lower() in ie.IE_NAME.lower()
|
||||
for site in self.enabled_sites
|
||||
):
|
||||
patterns.append((ie._VALID_URL, ie.IE_NAME))
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting URL patterns: {str(e)}")
|
||||
return patterns
|
||||
logger.error(f"Error during URL check: {str(e)}")
|
||||
return False
|
||||
|
||||
def _check_file_size(self, info):
|
||||
"""Check if file size is within limits"""
|
||||
@@ -342,16 +337,3 @@ class VideoDownloader:
|
||||
return False
|
||||
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
|
||||
return False
|
||||
|
||||
def is_supported_url(self, url: str) -> bool:
|
||||
"""Check if URL is supported using regex patterns"""
|
||||
try:
|
||||
# Try each pattern
|
||||
for pattern, site_name in self.url_patterns:
|
||||
if re.match(pattern, url):
|
||||
logger.debug(f"URL matched pattern for {site_name}")
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking URL support: {str(e)}")
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user