Switching from regex patterns to yt-dlp simulation for URL detection:

Removed the regex-based URL detection
Added proper yt-dlp simulation to check if URLs are supported
Added better error handling for URL checking
Added detailed logging of URL detection results
Improving FFmpeg integration:

Added proper ffprobe binary download alongside FFmpeg
Added better verification of both binaries
Added retry mechanisms for binary downloads
Added proper cleanup of failed downloads
Enhancing error handling and logging:

Added detailed logging throughout the system
Added better error recovery mechanisms
Added proper cleanup of temporary files
Added better tracking of failed operations
This commit is contained in:
pacnpal
2024-11-15 03:40:53 +00:00
parent 8503fc6fdd
commit 3e50faec75
3 changed files with 128 additions and 111 deletions

View File

@@ -12,7 +12,7 @@ import platform
import hashlib
from pathlib import Path
from contextlib import contextmanager
from typing import Optional
from typing import Optional, Dict, List
import time
from .exceptions import DownloadError
@@ -37,31 +37,31 @@ class FFmpegDownloader:
"Windows": {
"x86_64": {
"url": "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip",
"bin_name": "ffmpeg.exe",
"bin_names": ["ffmpeg.exe", "ffprobe.exe"],
}
},
"Linux": {
"x86_64": {
"url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz",
"bin_name": "ffmpeg",
"bin_names": ["ffmpeg", "ffprobe"],
},
"aarch64": {
"url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-arm64-static.tar.xz",
"bin_name": "ffmpeg",
"bin_names": ["ffmpeg", "ffprobe"],
},
"armv7l": {
"url": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-armhf-static.tar.xz",
"bin_name": "ffmpeg",
"bin_names": ["ffmpeg", "ffprobe"],
},
},
"Darwin": {
"x86_64": {
"url": "https://evermeet.cx/ffmpeg/getrelease/zip",
"bin_name": "ffmpeg",
"bin_names": ["ffmpeg", "ffprobe"],
},
"arm64": {
"url": "https://evermeet.cx/ffmpeg/getrelease/zip",
"bin_name": "ffmpeg",
"bin_names": ["ffmpeg", "ffprobe"],
},
},
}
@@ -73,15 +73,17 @@ class FFmpegDownloader:
if self.machine == "arm64":
self.machine = "aarch64" # Normalize ARM64 naming
self.base_dir = base_dir
self.ffmpeg_path = self.base_dir / self._get_binary_name()
self.ffmpeg_path = self.base_dir / self._get_binary_names()[0]
self.ffprobe_path = self.base_dir / self._get_binary_names()[1]
logger.info(f"Initialized FFmpeg downloader for {system}/{machine}")
logger.info(f"FFmpeg binary path: {self.ffmpeg_path}")
logger.info(f"FFprobe binary path: {self.ffprobe_path}")
def _get_binary_name(self) -> str:
"""Get the appropriate binary name for the current system"""
def _get_binary_names(self) -> List[str]:
"""Get the appropriate binary names for the current system"""
try:
return self.FFMPEG_URLS[self.system][self.machine]["bin_name"]
return self.FFMPEG_URLS[self.system][self.machine]["bin_names"]
except KeyError:
raise DownloadError(f"Unsupported system/architecture: {self.system}/{self.machine}")
@@ -92,8 +94,8 @@ class FFmpegDownloader:
except KeyError:
raise DownloadError(f"Unsupported system/architecture: {self.system}/{self.machine}")
def download(self) -> Path:
"""Download and set up FFmpeg binary with retries"""
def download(self) -> Dict[str, Path]:
"""Download and set up FFmpeg and FFprobe binaries with retries"""
max_retries = 3
retry_delay = 5
last_error = None
@@ -106,12 +108,13 @@ class FFmpegDownloader:
self.base_dir.mkdir(parents=True, exist_ok=True)
os.chmod(str(self.base_dir), 0o777)
# Clean up any existing file or directory
if self.ffmpeg_path.exists():
if self.ffmpeg_path.is_dir():
shutil.rmtree(str(self.ffmpeg_path))
# Clean up any existing files
for binary_path in [self.ffmpeg_path, self.ffprobe_path]:
if binary_path.exists():
if binary_path.is_dir():
shutil.rmtree(str(binary_path))
else:
self.ffmpeg_path.unlink()
binary_path.unlink()
with temp_path_context() as temp_dir:
# Download archive
@@ -121,18 +124,23 @@ class FFmpegDownloader:
if not self._verify_download(archive_path):
raise DownloadError("Downloaded file verification failed")
# Extract FFmpeg binary
self._extract_binary(archive_path, temp_dir)
# Extract binaries
self._extract_binaries(archive_path, temp_dir)
# Set proper permissions
os.chmod(str(self.ffmpeg_path), 0o755)
for binary_path in [self.ffmpeg_path, self.ffprobe_path]:
os.chmod(str(binary_path), 0o755)
# Verify binary
# Verify binaries
if not self.verify():
raise DownloadError("FFmpeg binary verification failed")
raise DownloadError("Binary verification failed")
logger.info(f"Successfully downloaded FFmpeg to {self.ffmpeg_path}")
return self.ffmpeg_path
logger.info(f"Successfully downloaded FFprobe to {self.ffprobe_path}")
return {
"ffmpeg": self.ffmpeg_path,
"ffprobe": self.ffprobe_path
}
except Exception as e:
last_error = str(e)
@@ -193,9 +201,9 @@ class FFmpegDownloader:
logger.error(f"Download verification failed: {str(e)}")
return False
def _extract_binary(self, archive_path: Path, temp_dir: str):
"""Extract FFmpeg binary from archive"""
logger.info("Extracting FFmpeg binary")
def _extract_binaries(self, archive_path: Path, temp_dir: str):
"""Extract FFmpeg and FFprobe binaries from archive"""
logger.info("Extracting FFmpeg and FFprobe binaries")
if self.system == "Windows":
self._extract_zip(archive_path, temp_dir)
@@ -205,50 +213,70 @@ class FFmpegDownloader:
def _extract_zip(self, archive_path: Path, temp_dir: str):
"""Extract from zip archive (Windows)"""
with zipfile.ZipFile(archive_path, "r") as zip_ref:
ffmpeg_files = [f for f in zip_ref.namelist() if self._get_binary_name() in f]
if not ffmpeg_files:
raise DownloadError("FFmpeg binary not found in archive")
binary_names = self._get_binary_names()
for binary_name in binary_names:
binary_files = [f for f in zip_ref.namelist() if binary_name in f]
if not binary_files:
raise DownloadError(f"{binary_name} not found in archive")
zip_ref.extract(ffmpeg_files[0], temp_dir)
extracted_path = Path(temp_dir) / ffmpeg_files[0]
shutil.copy2(extracted_path, self.ffmpeg_path)
zip_ref.extract(binary_files[0], temp_dir)
extracted_path = Path(temp_dir) / binary_files[0]
target_path = self.base_dir / binary_name
shutil.copy2(extracted_path, target_path)
def _extract_tar(self, archive_path: Path, temp_dir: str):
"""Extract from tar archive (Linux/macOS)"""
with tarfile.open(archive_path, "r:xz") as tar_ref:
ffmpeg_files = [f for f in tar_ref.getnames() if f.endswith("/ffmpeg")]
if not ffmpeg_files:
raise DownloadError("FFmpeg binary not found in archive")
binary_names = self._get_binary_names()
for binary_name in binary_names:
binary_files = [f for f in tar_ref.getnames() if f.endswith(f"/{binary_name}")]
if not binary_files:
raise DownloadError(f"{binary_name} not found in archive")
tar_ref.extract(ffmpeg_files[0], temp_dir)
extracted_path = Path(temp_dir) / ffmpeg_files[0]
shutil.copy2(extracted_path, self.ffmpeg_path)
tar_ref.extract(binary_files[0], temp_dir)
extracted_path = Path(temp_dir) / binary_files[0]
target_path = self.base_dir / binary_name
shutil.copy2(extracted_path, target_path)
def verify(self) -> bool:
"""Verify FFmpeg binary works"""
"""Verify FFmpeg and FFprobe binaries work"""
try:
if not self.ffmpeg_path.exists():
if not self.ffmpeg_path.exists() or not self.ffprobe_path.exists():
return False
# Ensure proper permissions
os.chmod(str(self.ffmpeg_path), 0o755)
os.chmod(str(self.ffprobe_path), 0o755)
# Test FFmpeg functionality
result = subprocess.run(
ffmpeg_result = subprocess.run(
[str(self.ffmpeg_path), "-version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=5
)
if result.returncode == 0:
version = result.stdout.decode().split('\n')[0]
logger.info(f"FFmpeg verification successful: {version}")
# Test FFprobe functionality
ffprobe_result = subprocess.run(
[str(self.ffprobe_path), "-version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=5
)
if ffmpeg_result.returncode == 0 and ffprobe_result.returncode == 0:
ffmpeg_version = ffmpeg_result.stdout.decode().split('\n')[0]
ffprobe_version = ffprobe_result.stdout.decode().split('\n')[0]
logger.info(f"FFmpeg verification successful: {ffmpeg_version}")
logger.info(f"FFprobe verification successful: {ffprobe_version}")
return True
else:
logger.error(f"FFmpeg verification failed: {result.stderr.decode()}")
if ffmpeg_result.returncode != 0:
logger.error(f"FFmpeg verification failed: {ffmpeg_result.stderr.decode()}")
if ffprobe_result.returncode != 0:
logger.error(f"FFprobe verification failed: {ffprobe_result.stderr.decode()}")
return False
except Exception as e:
logger.error(f"FFmpeg verification failed: {e}")
logger.error(f"Binary verification failed: {e}")
return False

View File

@@ -316,15 +316,22 @@ class VideoProcessor:
if monitored_channels and message.channel.id not in monitored_channels:
return
# Find all video URLs in message with improved pattern matching
# Find all video URLs in message using yt-dlp simulation
logger.info(f"Checking message {message.id} for video URLs...")
urls = []
if message.guild.id in self.components:
downloader = self.components[message.guild.id]["downloader"]
if downloader:
# Check each word in the message
for word in message.content.split():
# Use yt-dlp simulation to check if URL is supported
try:
if downloader.is_supported_url(word):
logger.info(f"Found supported URL: {word}")
urls.append(word)
except Exception as e:
logger.error(f"Error checking URL {word}: {str(e)}")
continue
if urls:
logger.info(f"Found {len(urls)} video URLs in message {message.id}")

View File

@@ -36,7 +36,6 @@ class VideoDownloader:
# Ensure download path exists with proper permissions
self.download_path = Path(download_path)
self.download_path.mkdir(parents=True, exist_ok=True)
# Ensure directory has rwx permissions for user and rx for group/others
os.chmod(str(self.download_path), 0o755)
logger.info(f"Initialized download directory: {self.download_path}")
@@ -44,14 +43,10 @@ class VideoDownloader:
self.max_quality = max_quality
self.max_file_size = max_file_size
self.enabled_sites = enabled_sites
self.url_patterns = self._get_url_patterns()
# Initialize FFmpeg manager
self.ffmpeg_mgr = FFmpegManager()
ffmpeg_path = self.ffmpeg_mgr.get_ffmpeg_path()
if not os.path.exists(ffmpeg_path):
raise FileNotFoundError(f"FFmpeg not found at {ffmpeg_path}")
logger.info(f"Using FFmpeg from: {ffmpeg_path}")
logger.info(f"FFmpeg path: {self.ffmpeg_mgr.get_ffmpeg_path()}")
# Create thread pool for this instance
self.download_pool = ThreadPoolExecutor(
@@ -63,7 +58,7 @@ class VideoDownloader:
self.active_downloads: Dict[str, str] = {}
self._downloads_lock = asyncio.Lock()
# Configure yt-dlp options with absolute FFmpeg path
# Configure yt-dlp options
self.ydl_opts = {
"format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best", # More flexible format
"outtmpl": "%(title)s.%(ext)s", # Base filename only, path added later
@@ -79,55 +74,55 @@ class VideoDownloader:
"extractor_retries": self.MAX_RETRIES,
"postprocessor_hooks": [self._check_file_size],
"progress_hooks": [self._progress_hook],
"ffmpeg_location": str(ffmpeg_path), # Convert Path to string
"prefer_ffmpeg": True, # Force use of FFmpeg
"hls_prefer_ffmpeg": True, # Use FFmpeg for HLS
"ffmpeg_location": self.ffmpeg_mgr.get_ffmpeg_path(),
"logger": logger, # Use our logger
"ignoreerrors": True, # Don't stop on download errors
"no_color": True, # Disable ANSI colors in output
"geo_bypass": True, # Try to bypass geo-restrictions
"socket_timeout": 30, # Increase timeout
"external_downloader": {
"m3u8": "ffmpeg", # Use FFmpeg for m3u8 downloads
},
"external_downloader_args": {
"ffmpeg": ["-v", "warning"], # Reduce FFmpeg verbosity
}
def is_supported_url(self, url: str) -> bool:
"""Check if URL is supported by attempting a simulated download"""
try:
# Configure yt-dlp for simulation
simulate_opts = {
**self.ydl_opts,
"simulate": True, # Only simulate download
"quiet": True, # Reduce output noise
"no_warnings": True,
"extract_flat": True, # Don't download video info
"skip_download": True, # Skip actual download
"format": "best", # Don't spend time finding best format
}
logger.info("VideoDownloader initialized successfully")
def __del__(self):
"""Ensure thread pool is shutdown and files are cleaned up"""
# Create a new yt-dlp instance for simulation
with yt_dlp.YoutubeDL(simulate_opts) as ydl:
try:
# Cancel all active downloads
for file_path in self.active_downloads.values():
try:
secure_delete_file(file_path)
except Exception as e:
logger.error(f"Error deleting file during cleanup: {str(e)}")
self.active_downloads.clear()
# Try to extract info without downloading
info = ydl.extract_info(url, download=False)
if info is None:
logger.debug(f"URL not supported: {url}")
return False
# Shutdown thread pool
if hasattr(self, "download_pool"):
self.download_pool.shutdown(wait=True)
except Exception as e:
logger.error(f"Error during VideoDownloader cleanup: {str(e)}")
# Check if site is enabled (if enabled_sites is configured)
if self.enabled_sites:
extractor = info.get('extractor', '').lower()
if not any(site.lower() in extractor for site in self.enabled_sites):
logger.info(f"Site {extractor} not in enabled sites list")
return False
logger.info(f"URL supported: {url} (Extractor: {info.get('extractor', 'unknown')})")
return True
def _get_url_patterns(self) -> List[Tuple[str, str]]:
"""Get URL patterns and names for supported sites"""
patterns = []
try:
with yt_dlp.YoutubeDL() as ydl:
for ie in ydl._ies:
if hasattr(ie, "_VALID_URL") and ie._VALID_URL:
if not self.enabled_sites or any(
site.lower() in ie.IE_NAME.lower()
for site in self.enabled_sites
):
patterns.append((ie._VALID_URL, ie.IE_NAME))
except Exception as e:
logger.error(f"Error getting URL patterns: {str(e)}")
return patterns
if "Unsupported URL" not in str(e):
logger.error(f"Error checking URL {url}: {str(e)}")
return False
except Exception as e:
logger.error(f"Error during URL check: {str(e)}")
return False
def _check_file_size(self, info):
"""Check if file size is within limits"""
@@ -342,16 +337,3 @@ class VideoDownloader:
return False
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
return False
def is_supported_url(self, url: str) -> bool:
"""Check if URL is supported using regex patterns"""
try:
# Try each pattern
for pattern, site_name in self.url_patterns:
if re.match(pattern, url):
logger.debug(f"URL matched pattern for {site_name}")
return True
return False
except Exception as e:
logger.error(f"Error checking URL support: {str(e)}")
return False