mirror of
https://github.com/pacnpal/Pac-cogs.git
synced 2025-12-20 10:51:05 -05:00
Component-based architecture with lifecycle management Enhanced error handling and recovery mechanisms Comprehensive state management and tracking Event-driven architecture with monitoring Queue Management: Multiple processing strategies for different scenarios Advanced state management with recovery Comprehensive metrics and health monitoring Sophisticated cleanup system with multiple strategies Processing Pipeline: Enhanced message handling with validation Improved URL extraction and processing Better queue management and monitoring Advanced cleanup mechanisms Overall Benefits: Better code organization and maintainability Improved error handling and recovery Enhanced monitoring and reporting More robust and reliable system
257 lines
8.7 KiB
Python
257 lines
8.7 KiB
Python
"""Message processing and URL extraction for VideoProcessor"""
|
|
|
|
import logging
|
|
import asyncio
|
|
from enum import Enum
|
|
from typing import Optional, Dict, Any, List, Tuple
|
|
from datetime import datetime
|
|
import discord
|
|
|
|
from .url_extractor import URLExtractor
|
|
from .message_validator import MessageValidator
|
|
from .queue_processor import QueueProcessor
|
|
from .reactions import REACTIONS
|
|
|
|
logger = logging.getLogger("VideoArchiver")
|
|
|
|
class MessageState(Enum):
|
|
"""Possible states of message processing"""
|
|
RECEIVED = "received"
|
|
VALIDATING = "validating"
|
|
EXTRACTING = "extracting"
|
|
PROCESSING = "processing"
|
|
COMPLETED = "completed"
|
|
FAILED = "failed"
|
|
IGNORED = "ignored"
|
|
|
|
class ProcessingStage(Enum):
|
|
"""Message processing stages"""
|
|
VALIDATION = "validation"
|
|
EXTRACTION = "extraction"
|
|
QUEUEING = "queueing"
|
|
COMPLETION = "completion"
|
|
|
|
class MessageCache:
|
|
"""Caches message validation results"""
|
|
|
|
def __init__(self, max_size: int = 1000):
|
|
self.max_size = max_size
|
|
self._cache: Dict[int, Dict[str, Any]] = {}
|
|
self._access_times: Dict[int, datetime] = {}
|
|
|
|
def add(self, message_id: int, result: Dict[str, Any]) -> None:
|
|
"""Add a result to cache"""
|
|
if len(self._cache) >= self.max_size:
|
|
self._cleanup_oldest()
|
|
self._cache[message_id] = result
|
|
self._access_times[message_id] = datetime.utcnow()
|
|
|
|
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
|
|
"""Get a cached result"""
|
|
if message_id in self._cache:
|
|
self._access_times[message_id] = datetime.utcnow()
|
|
return self._cache[message_id]
|
|
return None
|
|
|
|
def _cleanup_oldest(self) -> None:
|
|
"""Remove oldest cache entries"""
|
|
if not self._access_times:
|
|
return
|
|
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
|
|
del self._cache[oldest]
|
|
del self._access_times[oldest]
|
|
|
|
class ProcessingTracker:
|
|
"""Tracks message processing state and progress"""
|
|
|
|
def __init__(self):
|
|
self.states: Dict[int, MessageState] = {}
|
|
self.stages: Dict[int, ProcessingStage] = {}
|
|
self.errors: Dict[int, str] = {}
|
|
self.start_times: Dict[int, datetime] = {}
|
|
self.end_times: Dict[int, datetime] = {}
|
|
|
|
def start_processing(self, message_id: int) -> None:
|
|
"""Start tracking a message"""
|
|
self.states[message_id] = MessageState.RECEIVED
|
|
self.start_times[message_id] = datetime.utcnow()
|
|
|
|
def update_state(
|
|
self,
|
|
message_id: int,
|
|
state: MessageState,
|
|
stage: Optional[ProcessingStage] = None,
|
|
error: Optional[str] = None
|
|
) -> None:
|
|
"""Update message state"""
|
|
self.states[message_id] = state
|
|
if stage:
|
|
self.stages[message_id] = stage
|
|
if error:
|
|
self.errors[message_id] = error
|
|
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
|
|
self.end_times[message_id] = datetime.utcnow()
|
|
|
|
def get_status(self, message_id: int) -> Dict[str, Any]:
|
|
"""Get processing status for a message"""
|
|
return {
|
|
"state": self.states.get(message_id),
|
|
"stage": self.stages.get(message_id),
|
|
"error": self.errors.get(message_id),
|
|
"start_time": self.start_times.get(message_id),
|
|
"end_time": self.end_times.get(message_id),
|
|
"duration": (
|
|
(self.end_times[message_id] - self.start_times[message_id]).total_seconds()
|
|
if message_id in self.end_times and message_id in self.start_times
|
|
else None
|
|
)
|
|
}
|
|
|
|
class MessageHandler:
|
|
"""Handles processing of messages for video content"""
|
|
|
|
def __init__(self, bot, config_manager, queue_manager):
|
|
self.bot = bot
|
|
self.config_manager = config_manager
|
|
self.url_extractor = URLExtractor()
|
|
self.message_validator = MessageValidator()
|
|
self.queue_processor = QueueProcessor(queue_manager)
|
|
|
|
# Initialize tracking and caching
|
|
self.tracker = ProcessingTracker()
|
|
self.validation_cache = MessageCache()
|
|
self._processing_lock = asyncio.Lock()
|
|
|
|
async def process_message(self, message: discord.Message) -> None:
|
|
"""Process a message for video content"""
|
|
# Start tracking
|
|
self.tracker.start_processing(message.id)
|
|
|
|
try:
|
|
async with self._processing_lock:
|
|
await self._process_message_internal(message)
|
|
except Exception as e:
|
|
logger.error(f"Error processing message: {str(e)}", exc_info=True)
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.FAILED,
|
|
error=str(e)
|
|
)
|
|
try:
|
|
await message.add_reaction(REACTIONS["error"])
|
|
except:
|
|
pass
|
|
|
|
async def _process_message_internal(self, message: discord.Message) -> None:
|
|
"""Internal message processing logic"""
|
|
try:
|
|
# Get guild settings
|
|
settings = await self.config_manager.get_guild_settings(message.guild.id)
|
|
if not settings:
|
|
logger.warning(f"No settings found for guild {message.guild.id}")
|
|
self.tracker.update_state(message.id, MessageState.IGNORED)
|
|
return
|
|
|
|
# Check cache for validation
|
|
cached_validation = self.validation_cache.get(message.id)
|
|
if cached_validation:
|
|
is_valid = cached_validation["valid"]
|
|
reason = cached_validation["reason"]
|
|
else:
|
|
# Validate message
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.VALIDATING,
|
|
ProcessingStage.VALIDATION
|
|
)
|
|
is_valid, reason = await self.message_validator.validate_message(
|
|
message,
|
|
settings
|
|
)
|
|
# Cache result
|
|
self.validation_cache.add(message.id, {
|
|
"valid": is_valid,
|
|
"reason": reason
|
|
})
|
|
|
|
if not is_valid:
|
|
logger.debug(f"Message validation failed: {reason}")
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.IGNORED,
|
|
error=reason
|
|
)
|
|
return
|
|
|
|
# Extract URLs
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.EXTRACTING,
|
|
ProcessingStage.EXTRACTION
|
|
)
|
|
urls = await self.url_extractor.extract_urls(
|
|
message,
|
|
enabled_sites=settings.get("enabled_sites")
|
|
)
|
|
if not urls:
|
|
logger.debug("No valid URLs found in message")
|
|
self.tracker.update_state(message.id, MessageState.IGNORED)
|
|
return
|
|
|
|
# Process URLs
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.PROCESSING,
|
|
ProcessingStage.QUEUEING
|
|
)
|
|
await self.queue_processor.process_urls(message, urls)
|
|
|
|
# Mark completion
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.COMPLETED,
|
|
ProcessingStage.COMPLETION
|
|
)
|
|
|
|
except Exception as e:
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.FAILED,
|
|
error=str(e)
|
|
)
|
|
raise
|
|
|
|
async def format_archive_message(
|
|
self,
|
|
author: Optional[discord.Member],
|
|
channel: discord.TextChannel,
|
|
url: str
|
|
) -> str:
|
|
"""Format message for archive channel"""
|
|
return await self.queue_processor.format_archive_message(
|
|
author,
|
|
channel,
|
|
url
|
|
)
|
|
|
|
def get_message_status(self, message_id: int) -> Dict[str, Any]:
|
|
"""Get processing status for a message"""
|
|
return self.tracker.get_status(message_id)
|
|
|
|
def is_healthy(self) -> bool:
|
|
"""Check if handler is healthy"""
|
|
# Check for any stuck messages
|
|
current_time = datetime.utcnow()
|
|
for message_id, start_time in self.tracker.start_times.items():
|
|
if (
|
|
message_id in self.tracker.states and
|
|
self.tracker.states[message_id] not in (
|
|
MessageState.COMPLETED,
|
|
MessageState.FAILED,
|
|
MessageState.IGNORED
|
|
) and
|
|
(current_time - start_time).total_seconds() > 300 # 5 minutes timeout
|
|
):
|
|
return False
|
|
return True
|