fix: Change URL validation to use regex patterns

- Replaced extractor.suitable() with direct regex pattern matching
- Store both pattern and site name for better debugging
- Added debug logging for URL matching
- Improved error handling
This commit is contained in:
pacnpal
2024-11-15 01:59:03 +00:00
parent 63664e3c94
commit 0a46cee479

View File

@@ -1,6 +1,7 @@
"""Video download and processing utilities""" """Video download and processing utilities"""
import os import os
import re
import logging import logging
import asyncio import asyncio
import ffmpeg import ffmpeg
@@ -87,18 +88,18 @@ class VideoDownloader:
except Exception as e: except Exception as e:
logger.error(f"Error during VideoDownloader cleanup: {str(e)}") logger.error(f"Error during VideoDownloader cleanup: {str(e)}")
def _get_url_patterns(self) -> List[str]: def _get_url_patterns(self) -> List[Tuple[str, str]]:
"""Get URL patterns for supported sites""" """Get URL patterns and names for supported sites"""
patterns = [] patterns = []
try: try:
with yt_dlp.YoutubeDL() as ydl: with yt_dlp.YoutubeDL() as ydl:
for extractor in ydl._ies: for ie in ydl._ies:
if hasattr(extractor, '_VALID_URL') and extractor._VALID_URL: if hasattr(ie, '_VALID_URL') and ie._VALID_URL:
if not self.enabled_sites or any( if not self.enabled_sites or any(
site.lower() in extractor.IE_NAME.lower() site.lower() in ie.IE_NAME.lower()
for site in self.enabled_sites for site in self.enabled_sites
): ):
patterns.append(extractor._VALID_URL) patterns.append((ie._VALID_URL, ie.IE_NAME))
except Exception as e: except Exception as e:
logger.error(f"Error getting URL patterns: {str(e)}") logger.error(f"Error getting URL patterns: {str(e)}")
return patterns return patterns
@@ -299,25 +300,14 @@ class VideoDownloader:
return False return False
def is_supported_url(self, url: str) -> bool: def is_supported_url(self, url: str) -> bool:
"""Check if URL is supported""" """Check if URL is supported using regex patterns"""
try: try:
with yt_dlp.YoutubeDL() as ydl: # Try each pattern
# Get extractors for pattern, site_name in self.url_patterns:
extractors = ydl._ies if re.match(pattern, url):
# Try each extractor logger.debug(f"URL matched pattern for {site_name}")
for ie in extractors: return True
# Skip if site is not enabled return False
if self.enabled_sites and not any(
site.lower() in ie.IE_NAME.lower()
for site in self.enabled_sites
):
continue
# Create an instance of the extractor
extractor = ie(ydl)
# Try to match URL
if extractor.suitable(url):
return True
return False
except Exception as e: except Exception as e:
logger.error(f"Error checking URL support: {str(e)}") logger.error(f"Error checking URL support: {str(e)}")
return False return False