Core Systems:

Component-based architecture with lifecycle management
Enhanced error handling and recovery mechanisms
Comprehensive state management and tracking
Event-driven architecture with monitoring
Queue Management:

Multiple processing strategies for different scenarios
Advanced state management with recovery
Comprehensive metrics and health monitoring
Sophisticated cleanup system with multiple strategies
Processing Pipeline:

Enhanced message handling with validation
Improved URL extraction and processing
Better queue management and monitoring
Advanced cleanup mechanisms
Overall Benefits:

Better code organization and maintainability
Improved error handling and recovery
Enhanced monitoring and reporting
More robust and reliable system
This commit is contained in:
pacnpal
2024-11-16 05:01:29 +00:00
parent 537a325807
commit a4ca6e8ea6
47 changed files with 11085 additions and 2110 deletions

View File

@@ -1,130 +1,256 @@
"""Message processing and URL extraction for VideoProcessor"""
import logging
import asyncio
from enum import Enum
from typing import Optional, Dict, Any, List, Tuple
from datetime import datetime
import discord
from typing import List, Tuple, Optional
from videoarchiver.utils.video_downloader import is_video_url_pattern
from .url_extractor import URLExtractor
from .message_validator import MessageValidator
from .queue_processor import QueueProcessor
from .reactions import REACTIONS
logger = logging.getLogger("VideoArchiver")
class MessageState(Enum):
"""Possible states of message processing"""
RECEIVED = "received"
VALIDATING = "validating"
EXTRACTING = "extracting"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
IGNORED = "ignored"
class ProcessingStage(Enum):
"""Message processing stages"""
VALIDATION = "validation"
EXTRACTION = "extraction"
QUEUEING = "queueing"
COMPLETION = "completion"
class MessageCache:
"""Caches message validation results"""
def __init__(self, max_size: int = 1000):
self.max_size = max_size
self._cache: Dict[int, Dict[str, Any]] = {}
self._access_times: Dict[int, datetime] = {}
def add(self, message_id: int, result: Dict[str, Any]) -> None:
"""Add a result to cache"""
if len(self._cache) >= self.max_size:
self._cleanup_oldest()
self._cache[message_id] = result
self._access_times[message_id] = datetime.utcnow()
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
"""Get a cached result"""
if message_id in self._cache:
self._access_times[message_id] = datetime.utcnow()
return self._cache[message_id]
return None
def _cleanup_oldest(self) -> None:
"""Remove oldest cache entries"""
if not self._access_times:
return
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
del self._cache[oldest]
del self._access_times[oldest]
class ProcessingTracker:
"""Tracks message processing state and progress"""
def __init__(self):
self.states: Dict[int, MessageState] = {}
self.stages: Dict[int, ProcessingStage] = {}
self.errors: Dict[int, str] = {}
self.start_times: Dict[int, datetime] = {}
self.end_times: Dict[int, datetime] = {}
def start_processing(self, message_id: int) -> None:
"""Start tracking a message"""
self.states[message_id] = MessageState.RECEIVED
self.start_times[message_id] = datetime.utcnow()
def update_state(
self,
message_id: int,
state: MessageState,
stage: Optional[ProcessingStage] = None,
error: Optional[str] = None
) -> None:
"""Update message state"""
self.states[message_id] = state
if stage:
self.stages[message_id] = stage
if error:
self.errors[message_id] = error
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
self.end_times[message_id] = datetime.utcnow()
def get_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
return {
"state": self.states.get(message_id),
"stage": self.stages.get(message_id),
"error": self.errors.get(message_id),
"start_time": self.start_times.get(message_id),
"end_time": self.end_times.get(message_id),
"duration": (
(self.end_times[message_id] - self.start_times[message_id]).total_seconds()
if message_id in self.end_times and message_id in self.start_times
else None
)
}
class MessageHandler:
"""Handles processing of messages for video content"""
def __init__(self, bot, config_manager, queue_manager):
self.bot = bot
self.config_manager = config_manager
self.queue_manager = queue_manager
self.url_extractor = URLExtractor()
self.message_validator = MessageValidator()
self.queue_processor = QueueProcessor(queue_manager)
# Initialize tracking and caching
self.tracker = ProcessingTracker()
self.validation_cache = MessageCache()
self._processing_lock = asyncio.Lock()
async def process_message(self, message: discord.Message) -> None:
"""Process a message for video content"""
# Start tracking
self.tracker.start_processing(message.id)
try:
# Check if message contains any content to process
if not message.content and not message.attachments:
logger.debug(f"No content or attachments in message {message.id}")
return
# Get guild settings
settings = await self.config_manager.get_guild_settings(message.guild.id)
if not settings:
logger.warning(f"No settings found for guild {message.guild.id}")
return
# Check if video archiving is enabled for this guild
if not settings.get("enabled", False):
logger.debug(f"Video archiving is disabled for guild {message.guild.id}")
return
# Log settings for debugging
logger.debug(f"Guild {message.guild.id} settings: {settings}")
# Check if channel is enabled (empty list means all channels)
enabled_channels = settings.get("enabled_channels", [])
if enabled_channels and message.channel.id not in enabled_channels:
logger.debug(f"Channel {message.channel.id} not in enabled channels: {enabled_channels}")
return
# Check if user has allowed role (empty list means all roles)
allowed_roles = settings.get("allowed_roles", [])
if allowed_roles:
user_roles = [role.id for role in message.author.roles]
if not any(role_id in allowed_roles for role_id in user_roles):
logger.debug(f"User {message.author.id} does not have any allowed roles")
return
# Extract URLs from message
urls = await self._extract_urls(message, settings)
if not urls:
logger.debug("No valid URLs found in message")
return
# Process each URL
await self._process_urls(message, urls)
async with self._processing_lock:
await self._process_message_internal(message)
except Exception as e:
logger.error(f"Error processing message: {str(e)}", exc_info=True)
self.tracker.update_state(
message.id,
MessageState.FAILED,
error=str(e)
)
try:
await message.add_reaction(REACTIONS["error"])
except:
pass
async def _extract_urls(self, message: discord.Message, settings: dict) -> List[str]:
"""Extract video URLs from message content and attachments"""
urls = []
# Extract from message content
if message.content:
logger.debug(f"Processing message content: {message.content}")
enabled_sites = settings.get("enabled_sites", [])
logger.debug(f"Enabled sites: {enabled_sites}")
async def _process_message_internal(self, message: discord.Message) -> None:
"""Internal message processing logic"""
try:
# Get guild settings
settings = await self.config_manager.get_guild_settings(message.guild.id)
if not settings:
logger.warning(f"No settings found for guild {message.guild.id}")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
for word in message.content.split():
logger.debug(f"Checking word: {word}")
if is_video_url_pattern(word):
# If enabled_sites is empty or None, allow all sites
if not enabled_sites or any(site in word.lower() for site in enabled_sites):
logger.debug(f"Found matching URL: {word}")
urls.append(word)
else:
logger.debug(f"URL {word} doesn't match any enabled sites")
else:
logger.debug(f"Word {word} is not a valid video URL")
# Extract from attachments
for attachment in message.attachments:
logger.debug(f"Checking attachment: {attachment.filename}")
if any(attachment.filename.lower().endswith(ext) for ext in ['.mp4', '.mov', '.avi', '.webm']):
logger.debug(f"Found video attachment: {attachment.url}")
urls.append(attachment.url)
return urls
async def _process_urls(self, message: discord.Message, urls: List[str]) -> None:
"""Process extracted URLs by adding them to the queue"""
for url in urls:
try:
logger.info(f"Adding URL to queue: {url}")
await message.add_reaction(REACTIONS['queued'])
await self.queue_manager.add_to_queue(
url=url,
message_id=message.id,
channel_id=message.channel.id,
guild_id=message.guild.id,
author_id=message.author.id,
priority=0
# Check cache for validation
cached_validation = self.validation_cache.get(message.id)
if cached_validation:
is_valid = cached_validation["valid"]
reason = cached_validation["reason"]
else:
# Validate message
self.tracker.update_state(
message.id,
MessageState.VALIDATING,
ProcessingStage.VALIDATION
)
logger.info(f"Successfully added video to queue: {url}")
except Exception as e:
logger.error(f"Failed to add video to queue: {str(e)}")
await message.add_reaction(REACTIONS['error'])
continue
is_valid, reason = await self.message_validator.validate_message(
message,
settings
)
# Cache result
self.validation_cache.add(message.id, {
"valid": is_valid,
"reason": reason
})
async def format_archive_message(self, author: Optional[discord.Member],
channel: discord.TextChannel,
url: str) -> str:
if not is_valid:
logger.debug(f"Message validation failed: {reason}")
self.tracker.update_state(
message.id,
MessageState.IGNORED,
error=reason
)
return
# Extract URLs
self.tracker.update_state(
message.id,
MessageState.EXTRACTING,
ProcessingStage.EXTRACTION
)
urls = await self.url_extractor.extract_urls(
message,
enabled_sites=settings.get("enabled_sites")
)
if not urls:
logger.debug("No valid URLs found in message")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
# Process URLs
self.tracker.update_state(
message.id,
MessageState.PROCESSING,
ProcessingStage.QUEUEING
)
await self.queue_processor.process_urls(message, urls)
# Mark completion
self.tracker.update_state(
message.id,
MessageState.COMPLETED,
ProcessingStage.COMPLETION
)
except Exception as e:
self.tracker.update_state(
message.id,
MessageState.FAILED,
error=str(e)
)
raise
async def format_archive_message(
self,
author: Optional[discord.Member],
channel: discord.TextChannel,
url: str
) -> str:
"""Format message for archive channel"""
author_mention = author.mention if author else "Unknown User"
channel_mention = channel.mention if channel else "Unknown Channel"
return (f"Video archived from {author_mention} in {channel_mention}\n"
f"Original URL: {url}")
return await self.queue_processor.format_archive_message(
author,
channel,
url
)
def get_message_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
return self.tracker.get_status(message_id)
def is_healthy(self) -> bool:
"""Check if handler is healthy"""
# Check for any stuck messages
current_time = datetime.utcnow()
for message_id, start_time in self.tracker.start_times.items():
if (
message_id in self.tracker.states and
self.tracker.states[message_id] not in (
MessageState.COMPLETED,
MessageState.FAILED,
MessageState.IGNORED
) and
(current_time - start_time).total_seconds() > 300 # 5 minutes timeout
):
return False
return True