Files
Pac-cogs/videoarchiver/processor/message_handler.py
pacnpal a4ca6e8ea6 Core Systems:
Component-based architecture with lifecycle management
Enhanced error handling and recovery mechanisms
Comprehensive state management and tracking
Event-driven architecture with monitoring
Queue Management:

Multiple processing strategies for different scenarios
Advanced state management with recovery
Comprehensive metrics and health monitoring
Sophisticated cleanup system with multiple strategies
Processing Pipeline:

Enhanced message handling with validation
Improved URL extraction and processing
Better queue management and monitoring
Advanced cleanup mechanisms
Overall Benefits:

Better code organization and maintainability
Improved error handling and recovery
Enhanced monitoring and reporting
More robust and reliable system
2024-11-16 05:01:29 +00:00

257 lines
8.7 KiB
Python

"""Message processing and URL extraction for VideoProcessor"""
import logging
import asyncio
from enum import Enum
from typing import Optional, Dict, Any, List, Tuple
from datetime import datetime
import discord
from .url_extractor import URLExtractor
from .message_validator import MessageValidator
from .queue_processor import QueueProcessor
from .reactions import REACTIONS
logger = logging.getLogger("VideoArchiver")
class MessageState(Enum):
"""Possible states of message processing"""
RECEIVED = "received"
VALIDATING = "validating"
EXTRACTING = "extracting"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
IGNORED = "ignored"
class ProcessingStage(Enum):
"""Message processing stages"""
VALIDATION = "validation"
EXTRACTION = "extraction"
QUEUEING = "queueing"
COMPLETION = "completion"
class MessageCache:
"""Caches message validation results"""
def __init__(self, max_size: int = 1000):
self.max_size = max_size
self._cache: Dict[int, Dict[str, Any]] = {}
self._access_times: Dict[int, datetime] = {}
def add(self, message_id: int, result: Dict[str, Any]) -> None:
"""Add a result to cache"""
if len(self._cache) >= self.max_size:
self._cleanup_oldest()
self._cache[message_id] = result
self._access_times[message_id] = datetime.utcnow()
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
"""Get a cached result"""
if message_id in self._cache:
self._access_times[message_id] = datetime.utcnow()
return self._cache[message_id]
return None
def _cleanup_oldest(self) -> None:
"""Remove oldest cache entries"""
if not self._access_times:
return
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
del self._cache[oldest]
del self._access_times[oldest]
class ProcessingTracker:
"""Tracks message processing state and progress"""
def __init__(self):
self.states: Dict[int, MessageState] = {}
self.stages: Dict[int, ProcessingStage] = {}
self.errors: Dict[int, str] = {}
self.start_times: Dict[int, datetime] = {}
self.end_times: Dict[int, datetime] = {}
def start_processing(self, message_id: int) -> None:
"""Start tracking a message"""
self.states[message_id] = MessageState.RECEIVED
self.start_times[message_id] = datetime.utcnow()
def update_state(
self,
message_id: int,
state: MessageState,
stage: Optional[ProcessingStage] = None,
error: Optional[str] = None
) -> None:
"""Update message state"""
self.states[message_id] = state
if stage:
self.stages[message_id] = stage
if error:
self.errors[message_id] = error
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
self.end_times[message_id] = datetime.utcnow()
def get_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
return {
"state": self.states.get(message_id),
"stage": self.stages.get(message_id),
"error": self.errors.get(message_id),
"start_time": self.start_times.get(message_id),
"end_time": self.end_times.get(message_id),
"duration": (
(self.end_times[message_id] - self.start_times[message_id]).total_seconds()
if message_id in self.end_times and message_id in self.start_times
else None
)
}
class MessageHandler:
"""Handles processing of messages for video content"""
def __init__(self, bot, config_manager, queue_manager):
self.bot = bot
self.config_manager = config_manager
self.url_extractor = URLExtractor()
self.message_validator = MessageValidator()
self.queue_processor = QueueProcessor(queue_manager)
# Initialize tracking and caching
self.tracker = ProcessingTracker()
self.validation_cache = MessageCache()
self._processing_lock = asyncio.Lock()
async def process_message(self, message: discord.Message) -> None:
"""Process a message for video content"""
# Start tracking
self.tracker.start_processing(message.id)
try:
async with self._processing_lock:
await self._process_message_internal(message)
except Exception as e:
logger.error(f"Error processing message: {str(e)}", exc_info=True)
self.tracker.update_state(
message.id,
MessageState.FAILED,
error=str(e)
)
try:
await message.add_reaction(REACTIONS["error"])
except:
pass
async def _process_message_internal(self, message: discord.Message) -> None:
"""Internal message processing logic"""
try:
# Get guild settings
settings = await self.config_manager.get_guild_settings(message.guild.id)
if not settings:
logger.warning(f"No settings found for guild {message.guild.id}")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
# Check cache for validation
cached_validation = self.validation_cache.get(message.id)
if cached_validation:
is_valid = cached_validation["valid"]
reason = cached_validation["reason"]
else:
# Validate message
self.tracker.update_state(
message.id,
MessageState.VALIDATING,
ProcessingStage.VALIDATION
)
is_valid, reason = await self.message_validator.validate_message(
message,
settings
)
# Cache result
self.validation_cache.add(message.id, {
"valid": is_valid,
"reason": reason
})
if not is_valid:
logger.debug(f"Message validation failed: {reason}")
self.tracker.update_state(
message.id,
MessageState.IGNORED,
error=reason
)
return
# Extract URLs
self.tracker.update_state(
message.id,
MessageState.EXTRACTING,
ProcessingStage.EXTRACTION
)
urls = await self.url_extractor.extract_urls(
message,
enabled_sites=settings.get("enabled_sites")
)
if not urls:
logger.debug("No valid URLs found in message")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
# Process URLs
self.tracker.update_state(
message.id,
MessageState.PROCESSING,
ProcessingStage.QUEUEING
)
await self.queue_processor.process_urls(message, urls)
# Mark completion
self.tracker.update_state(
message.id,
MessageState.COMPLETED,
ProcessingStage.COMPLETION
)
except Exception as e:
self.tracker.update_state(
message.id,
MessageState.FAILED,
error=str(e)
)
raise
async def format_archive_message(
self,
author: Optional[discord.Member],
channel: discord.TextChannel,
url: str
) -> str:
"""Format message for archive channel"""
return await self.queue_processor.format_archive_message(
author,
channel,
url
)
def get_message_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
return self.tracker.get_status(message_id)
def is_healthy(self) -> bool:
"""Check if handler is healthy"""
# Check for any stuck messages
current_time = datetime.utcnow()
for message_id, start_time in self.tracker.start_times.items():
if (
message_id in self.tracker.states and
self.tracker.states[message_id] not in (
MessageState.COMPLETED,
MessageState.FAILED,
MessageState.IGNORED
) and
(current_time - start_time).total_seconds() > 300 # 5 minutes timeout
):
return False
return True