Switching from regex patterns to yt-dlp simulation for URL detection:

Removed the regex-based URL detection
Added proper yt-dlp simulation to check if URLs are supported
Added better error handling for URL checking
Added detailed logging of URL detection results
Improving FFmpeg integration:

Added proper ffprobe binary download alongside FFmpeg
Added better verification of both binaries
Added retry mechanisms for binary downloads
Added proper cleanup of failed downloads
Enhancing error handling and logging:

Added detailed logging throughout the system
Added better error recovery mechanisms
Added proper cleanup of temporary files
Added better tracking of failed operations
This commit is contained in:
pacnpal
2024-11-15 03:40:53 +00:00
parent 8503fc6fdd
commit 3e50faec75
3 changed files with 128 additions and 111 deletions

View File

@@ -12,7 +12,7 @@ import platform
import hashlib import hashlib
from pathlib import Path from pathlib import Path
from contextlib import contextmanager from contextlib import contextmanager
from typing import Optional from typing import Optional, Dict, List
import time import time
from .exceptions import DownloadError from .exceptions import DownloadError
@@ -37,31 +37,31 @@ class FFmpegDownloader:
"Windows": { "Windows": {
"x86_64": { "x86_64": {
"url": "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip", "url": "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip",
"bin_name": "ffmpeg.exe", "bin_names": ["ffmpeg.exe", "ffprobe.exe"],
} }
}, },
"Linux": { "Linux": {
"x86_64": { "x86_64": {
"url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz", "url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz",
"bin_name": "ffmpeg", "bin_names": ["ffmpeg", "ffprobe"],
}, },
"aarch64": { "aarch64": {
"url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-arm64-static.tar.xz", "url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-arm64-static.tar.xz",
"bin_name": "ffmpeg", "bin_names": ["ffmpeg", "ffprobe"],
}, },
"armv7l": { "armv7l": {
"url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-armhf-static.tar.xz", "url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-armhf-static.tar.xz",
"bin_name": "ffmpeg", "bin_names": ["ffmpeg", "ffprobe"],
}, },
}, },
"Darwin": { "Darwin": {
"x86_64": { "x86_64": {
"url": "https://evermeet.cx/ffmpeg/getrelease/zip", "url": "https://evermeet.cx/ffmpeg/getrelease/zip",
"bin_name": "ffmpeg", "bin_names": ["ffmpeg", "ffprobe"],
}, },
"arm64": { "arm64": {
"url": "https://evermeet.cx/ffmpeg/getrelease/zip", "url": "https://evermeet.cx/ffmpeg/getrelease/zip",
"bin_name": "ffmpeg", "bin_names": ["ffmpeg", "ffprobe"],
}, },
}, },
} }
@@ -73,15 +73,17 @@ class FFmpegDownloader:
if self.machine == "arm64": if self.machine == "arm64":
self.machine = "aarch64" # Normalize ARM64 naming self.machine = "aarch64" # Normalize ARM64 naming
self.base_dir = base_dir self.base_dir = base_dir
self.ffmpeg_path = self.base_dir / self._get_binary_name() self.ffmpeg_path = self.base_dir / self._get_binary_names()[0]
self.ffprobe_path = self.base_dir / self._get_binary_names()[1]
logger.info(f"Initialized FFmpeg downloader for {system}/{machine}") logger.info(f"Initialized FFmpeg downloader for {system}/{machine}")
logger.info(f"FFmpeg binary path: {self.ffmpeg_path}") logger.info(f"FFmpeg binary path: {self.ffmpeg_path}")
logger.info(f"FFprobe binary path: {self.ffprobe_path}")
def _get_binary_name(self) -> str: def _get_binary_names(self) -> List[str]:
"""Get the appropriate binary name for the current system""" """Get the appropriate binary names for the current system"""
try: try:
return self.FFMPEG_URLS[self.system][self.machine]["bin_name"] return self.FFMPEG_URLS[self.system][self.machine]["bin_names"]
except KeyError: except KeyError:
raise DownloadError(f"Unsupported system/architecture: {self.system}/{self.machine}") raise DownloadError(f"Unsupported system/architecture: {self.system}/{self.machine}")
@@ -92,8 +94,8 @@ class FFmpegDownloader:
except KeyError: except KeyError:
raise DownloadError(f"Unsupported system/architecture: {self.system}/{self.machine}") raise DownloadError(f"Unsupported system/architecture: {self.system}/{self.machine}")
def download(self) -> Path: def download(self) -> Dict[str, Path]:
"""Download and set up FFmpeg binary with retries""" """Download and set up FFmpeg and FFprobe binaries with retries"""
max_retries = 3 max_retries = 3
retry_delay = 5 retry_delay = 5
last_error = None last_error = None
@@ -106,12 +108,13 @@ class FFmpegDownloader:
self.base_dir.mkdir(parents=True, exist_ok=True) self.base_dir.mkdir(parents=True, exist_ok=True)
os.chmod(str(self.base_dir), 0o777) os.chmod(str(self.base_dir), 0o777)
# Clean up any existing file or directory # Clean up any existing files
if self.ffmpeg_path.exists(): for binary_path in [self.ffmpeg_path, self.ffprobe_path]:
if self.ffmpeg_path.is_dir(): if binary_path.exists():
shutil.rmtree(str(self.ffmpeg_path)) if binary_path.is_dir():
else: shutil.rmtree(str(binary_path))
self.ffmpeg_path.unlink() else:
binary_path.unlink()
with temp_path_context() as temp_dir: with temp_path_context() as temp_dir:
# Download archive # Download archive
@@ -121,18 +124,23 @@ class FFmpegDownloader:
if not self._verify_download(archive_path): if not self._verify_download(archive_path):
raise DownloadError("Downloaded file verification failed") raise DownloadError("Downloaded file verification failed")
# Extract FFmpeg binary # Extract binaries
self._extract_binary(archive_path, temp_dir) self._extract_binaries(archive_path, temp_dir)
# Set proper permissions # Set proper permissions
os.chmod(str(self.ffmpeg_path), 0o755) for binary_path in [self.ffmpeg_path, self.ffprobe_path]:
os.chmod(str(binary_path), 0o755)
# Verify binary # Verify binaries
if not self.verify(): if not self.verify():
raise DownloadError("FFmpeg binary verification failed") raise DownloadError("Binary verification failed")
logger.info(f"Successfully downloaded FFmpeg to {self.ffmpeg_path}") logger.info(f"Successfully downloaded FFmpeg to {self.ffmpeg_path}")
return self.ffmpeg_path logger.info(f"Successfully downloaded FFprobe to {self.ffprobe_path}")
return {
"ffmpeg": self.ffmpeg_path,
"ffprobe": self.ffprobe_path
}
except Exception as e: except Exception as e:
last_error = str(e) last_error = str(e)
@@ -193,9 +201,9 @@ class FFmpegDownloader:
logger.error(f"Download verification failed: {str(e)}") logger.error(f"Download verification failed: {str(e)}")
return False return False
def _extract_binary(self, archive_path: Path, temp_dir: str): def _extract_binaries(self, archive_path: Path, temp_dir: str):
"""Extract FFmpeg binary from archive""" """Extract FFmpeg and FFprobe binaries from archive"""
logger.info("Extracting FFmpeg binary") logger.info("Extracting FFmpeg and FFprobe binaries")
if self.system == "Windows": if self.system == "Windows":
self._extract_zip(archive_path, temp_dir) self._extract_zip(archive_path, temp_dir)
@@ -205,50 +213,70 @@ class FFmpegDownloader:
def _extract_zip(self, archive_path: Path, temp_dir: str): def _extract_zip(self, archive_path: Path, temp_dir: str):
"""Extract from zip archive (Windows)""" """Extract from zip archive (Windows)"""
with zipfile.ZipFile(archive_path, "r") as zip_ref: with zipfile.ZipFile(archive_path, "r") as zip_ref:
ffmpeg_files = [f for f in zip_ref.namelist() if self._get_binary_name() in f] binary_names = self._get_binary_names()
if not ffmpeg_files: for binary_name in binary_names:
raise DownloadError("FFmpeg binary not found in archive") binary_files = [f for f in zip_ref.namelist() if binary_name in f]
if not binary_files:
zip_ref.extract(ffmpeg_files[0], temp_dir) raise DownloadError(f"{binary_name} not found in archive")
extracted_path = Path(temp_dir) / ffmpeg_files[0]
shutil.copy2(extracted_path, self.ffmpeg_path) zip_ref.extract(binary_files[0], temp_dir)
extracted_path = Path(temp_dir) / binary_files[0]
target_path = self.base_dir / binary_name
shutil.copy2(extracted_path, target_path)
def _extract_tar(self, archive_path: Path, temp_dir: str): def _extract_tar(self, archive_path: Path, temp_dir: str):
"""Extract from tar archive (Linux/macOS)""" """Extract from tar archive (Linux/macOS)"""
with tarfile.open(archive_path, "r:xz") as tar_ref: with tarfile.open(archive_path, "r:xz") as tar_ref:
ffmpeg_files = [f for f in tar_ref.getnames() if f.endswith("/ffmpeg")] binary_names = self._get_binary_names()
if not ffmpeg_files: for binary_name in binary_names:
raise DownloadError("FFmpeg binary not found in archive") binary_files = [f for f in tar_ref.getnames() if f.endswith(f"/{binary_name}")]
if not binary_files:
tar_ref.extract(ffmpeg_files[0], temp_dir) raise DownloadError(f"{binary_name} not found in archive")
extracted_path = Path(temp_dir) / ffmpeg_files[0]
shutil.copy2(extracted_path, self.ffmpeg_path) tar_ref.extract(binary_files[0], temp_dir)
extracted_path = Path(temp_dir) / binary_files[0]
target_path = self.base_dir / binary_name
shutil.copy2(extracted_path, target_path)
def verify(self) -> bool: def verify(self) -> bool:
"""Verify FFmpeg binary works""" """Verify FFmpeg and FFprobe binaries work"""
try: try:
if not self.ffmpeg_path.exists(): if not self.ffmpeg_path.exists() or not self.ffprobe_path.exists():
return False return False
# Ensure proper permissions # Ensure proper permissions
os.chmod(str(self.ffmpeg_path), 0o755) os.chmod(str(self.ffmpeg_path), 0o755)
os.chmod(str(self.ffprobe_path), 0o755)
# Test FFmpeg functionality # Test FFmpeg functionality
result = subprocess.run( ffmpeg_result = subprocess.run(
[str(self.ffmpeg_path), "-version"], [str(self.ffmpeg_path), "-version"],
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
timeout=5 timeout=5
) )
if result.returncode == 0: # Test FFprobe functionality
version = result.stdout.decode().split('\n')[0] ffprobe_result = subprocess.run(
logger.info(f"FFmpeg verification successful: {version}") [str(self.ffprobe_path), "-version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=5
)
if ffmpeg_result.returncode == 0 and ffprobe_result.returncode == 0:
ffmpeg_version = ffmpeg_result.stdout.decode().split('\n')[0]
ffprobe_version = ffprobe_result.stdout.decode().split('\n')[0]
logger.info(f"FFmpeg verification successful: {ffmpeg_version}")
logger.info(f"FFprobe verification successful: {ffprobe_version}")
return True return True
else: else:
logger.error(f"FFmpeg verification failed: {result.stderr.decode()}") if ffmpeg_result.returncode != 0:
logger.error(f"FFmpeg verification failed: {ffmpeg_result.stderr.decode()}")
if ffprobe_result.returncode != 0:
logger.error(f"FFprobe verification failed: {ffprobe_result.stderr.decode()}")
return False return False
except Exception as e: except Exception as e:
logger.error(f"FFmpeg verification failed: {e}") logger.error(f"Binary verification failed: {e}")
return False return False

View File

@@ -316,15 +316,22 @@ class VideoProcessor:
if monitored_channels and message.channel.id not in monitored_channels: if monitored_channels and message.channel.id not in monitored_channels:
return return
# Find all video URLs in message with improved pattern matching # Find all video URLs in message using yt-dlp simulation
logger.info(f"Checking message {message.id} for video URLs...") logger.info(f"Checking message {message.id} for video URLs...")
urls = [] urls = []
if message.guild.id in self.components: if message.guild.id in self.components:
downloader = self.components[message.guild.id]["downloader"] downloader = self.components[message.guild.id]["downloader"]
if downloader: if downloader:
# Check each word in the message
for word in message.content.split(): for word in message.content.split():
if downloader.is_supported_url(word): # Use yt-dlp simulation to check if URL is supported
urls.append(word) try:
if downloader.is_supported_url(word):
logger.info(f"Found supported URL: {word}")
urls.append(word)
except Exception as e:
logger.error(f"Error checking URL {word}: {str(e)}")
continue
if urls: if urls:
logger.info(f"Found {len(urls)} video URLs in message {message.id}") logger.info(f"Found {len(urls)} video URLs in message {message.id}")

View File

@@ -36,7 +36,6 @@ class VideoDownloader:
# Ensure download path exists with proper permissions # Ensure download path exists with proper permissions
self.download_path = Path(download_path) self.download_path = Path(download_path)
self.download_path.mkdir(parents=True, exist_ok=True) self.download_path.mkdir(parents=True, exist_ok=True)
# Ensure directory has rwx permissions for user and rx for group/others
os.chmod(str(self.download_path), 0o755) os.chmod(str(self.download_path), 0o755)
logger.info(f"Initialized download directory: {self.download_path}") logger.info(f"Initialized download directory: {self.download_path}")
@@ -44,14 +43,10 @@ class VideoDownloader:
self.max_quality = max_quality self.max_quality = max_quality
self.max_file_size = max_file_size self.max_file_size = max_file_size
self.enabled_sites = enabled_sites self.enabled_sites = enabled_sites
self.url_patterns = self._get_url_patterns()
# Initialize FFmpeg manager # Initialize FFmpeg manager
self.ffmpeg_mgr = FFmpegManager() self.ffmpeg_mgr = FFmpegManager()
ffmpeg_path = self.ffmpeg_mgr.get_ffmpeg_path() logger.info(f"FFmpeg path: {self.ffmpeg_mgr.get_ffmpeg_path()}")
if not os.path.exists(ffmpeg_path):
raise FileNotFoundError(f"FFmpeg not found at {ffmpeg_path}")
logger.info(f"Using FFmpeg from: {ffmpeg_path}")
# Create thread pool for this instance # Create thread pool for this instance
self.download_pool = ThreadPoolExecutor( self.download_pool = ThreadPoolExecutor(
@@ -63,7 +58,7 @@ class VideoDownloader:
self.active_downloads: Dict[str, str] = {} self.active_downloads: Dict[str, str] = {}
self._downloads_lock = asyncio.Lock() self._downloads_lock = asyncio.Lock()
# Configure yt-dlp options with absolute FFmpeg path # Configure yt-dlp options
self.ydl_opts = { self.ydl_opts = {
"format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best", # More flexible format "format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best", # More flexible format
"outtmpl": "%(title)s.%(ext)s", # Base filename only, path added later "outtmpl": "%(title)s.%(ext)s", # Base filename only, path added later
@@ -79,55 +74,55 @@ class VideoDownloader:
"extractor_retries": self.MAX_RETRIES, "extractor_retries": self.MAX_RETRIES,
"postprocessor_hooks": [self._check_file_size], "postprocessor_hooks": [self._check_file_size],
"progress_hooks": [self._progress_hook], "progress_hooks": [self._progress_hook],
"ffmpeg_location": str(ffmpeg_path), # Convert Path to string "ffmpeg_location": self.ffmpeg_mgr.get_ffmpeg_path(),
"prefer_ffmpeg": True, # Force use of FFmpeg
"hls_prefer_ffmpeg": True, # Use FFmpeg for HLS
"logger": logger, # Use our logger "logger": logger, # Use our logger
"ignoreerrors": True, # Don't stop on download errors "ignoreerrors": True, # Don't stop on download errors
"no_color": True, # Disable ANSI colors in output "no_color": True, # Disable ANSI colors in output
"geo_bypass": True, # Try to bypass geo-restrictions "geo_bypass": True, # Try to bypass geo-restrictions
"socket_timeout": 30, # Increase timeout "socket_timeout": 30, # Increase timeout
"external_downloader": {
"m3u8": "ffmpeg", # Use FFmpeg for m3u8 downloads
},
"external_downloader_args": {
"ffmpeg": ["-v", "warning"], # Reduce FFmpeg verbosity
}
} }
logger.info("VideoDownloader initialized successfully")
def __del__(self): def is_supported_url(self, url: str) -> bool:
"""Ensure thread pool is shutdown and files are cleaned up""" """Check if URL is supported by attempting a simulated download"""
try: try:
# Cancel all active downloads # Configure yt-dlp for simulation
for file_path in self.active_downloads.values(): simulate_opts = {
**self.ydl_opts,
"simulate": True, # Only simulate download
"quiet": True, # Reduce output noise
"no_warnings": True,
"extract_flat": True, # Don't download video info
"skip_download": True, # Skip actual download
"format": "best", # Don't spend time finding best format
}
# Create a new yt-dlp instance for simulation
with yt_dlp.YoutubeDL(simulate_opts) as ydl:
try: try:
secure_delete_file(file_path) # Try to extract info without downloading
info = ydl.extract_info(url, download=False)
if info is None:
logger.debug(f"URL not supported: {url}")
return False
# Check if site is enabled (if enabled_sites is configured)
if self.enabled_sites:
extractor = info.get('extractor', '').lower()
if not any(site.lower() in extractor for site in self.enabled_sites):
logger.info(f"Site {extractor} not in enabled sites list")
return False
logger.info(f"URL supported: {url} (Extractor: {info.get('extractor', 'unknown')})")
return True
except Exception as e: except Exception as e:
logger.error(f"Error deleting file during cleanup: {str(e)}") if "Unsupported URL" not in str(e):
self.active_downloads.clear() logger.error(f"Error checking URL {url}: {str(e)}")
return False
# Shutdown thread pool
if hasattr(self, "download_pool"):
self.download_pool.shutdown(wait=True)
except Exception as e: except Exception as e:
logger.error(f"Error during VideoDownloader cleanup: {str(e)}") logger.error(f"Error during URL check: {str(e)}")
return False
def _get_url_patterns(self) -> List[Tuple[str, str]]:
"""Get URL patterns and names for supported sites"""
patterns = []
try:
with yt_dlp.YoutubeDL() as ydl:
for ie in ydl._ies:
if hasattr(ie, "_VALID_URL") and ie._VALID_URL:
if not self.enabled_sites or any(
site.lower() in ie.IE_NAME.lower()
for site in self.enabled_sites
):
patterns.append((ie._VALID_URL, ie.IE_NAME))
except Exception as e:
logger.error(f"Error getting URL patterns: {str(e)}")
return patterns
def _check_file_size(self, info): def _check_file_size(self, info):
"""Check if file size is within limits""" """Check if file size is within limits"""
@@ -342,16 +337,3 @@ class VideoDownloader:
return False return False
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1)) await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
return False return False
def is_supported_url(self, url: str) -> bool:
"""Check if URL is supported using regex patterns"""
try:
# Try each pattern
for pattern, site_name in self.url_patterns:
if re.match(pattern, url):
logger.debug(f"URL matched pattern for {site_name}")
return True
return False
except Exception as e:
logger.error(f"Error checking URL support: {str(e)}")
return False