From 1bd8980f6f98cc0d4116ffab925a59ac4c4c236e Mon Sep 17 00:00:00 2001 From: pacnpal <183241239+pacnpal@users.noreply.github.com> Date: Fri, 15 Nov 2024 00:55:27 +0000 Subject: [PATCH] fix: Improve URL validation and extraction - Use suitable() method for URL validation - Add word-based URL detection - Fix URL extraction errors --- videoarchiver/processor.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/videoarchiver/processor.py b/videoarchiver/processor.py index 725c23b..491fa3c 100644 --- a/videoarchiver/processor.py +++ b/videoarchiver/processor.py @@ -257,7 +257,8 @@ class VideoProcessor: settings = await self.config.get_guild_settings(message.guild.id) # Check if message is in a monitored channel - if message.channel.id not in settings["monitored_channels"]: + monitored_channels = settings.get("monitored_channels", []) + if monitored_channels and message.channel.id not in monitored_channels: return # Find all video URLs in message with improved pattern matching @@ -282,13 +283,16 @@ class VideoProcessor: """Extract video URLs from message content with improved pattern matching""" urls = [] try: + # Create a YoutubeDL instance to get extractors with yt_dlp.YoutubeDL() as ydl: - for ie in ydl._ies: - if ie._VALID_URL: - # Use more specific pattern matching - pattern = f"(?P{ie._VALID_URL})" - matches = re.finditer(pattern, content, re.IGNORECASE) - urls.extend(match.group("url") for match in matches) + # Split content into words and check each for URLs + words = content.split() + for word in words: + # Try each extractor + for ie in ydl._ies: + if ie.suitable(word): + urls.append(word) + break # Stop once we find a matching extractor except Exception as e: logger.error(f"URL extraction error: {str(e)}") return list(set(urls)) # Remove duplicates