mirror of
https://github.com/pacnpal/Pac-cogs.git
synced 2025-12-20 02:41:06 -05:00
Core Systems:
Component-based architecture with lifecycle management Enhanced error handling and recovery mechanisms Comprehensive state management and tracking Event-driven architecture with monitoring Queue Management: Multiple processing strategies for different scenarios Advanced state management with recovery Comprehensive metrics and health monitoring Sophisticated cleanup system with multiple strategies Processing Pipeline: Enhanced message handling with validation Improved URL extraction and processing Better queue management and monitoring Advanced cleanup mechanisms Overall Benefits: Better code organization and maintainability Improved error handling and recovery Enhanced monitoring and reporting More robust and reliable system
This commit is contained in:
@@ -1,130 +1,256 @@
|
||||
"""Message processing and URL extraction for VideoProcessor"""
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from enum import Enum
|
||||
from typing import Optional, Dict, Any, List, Tuple
|
||||
from datetime import datetime
|
||||
import discord
|
||||
from typing import List, Tuple, Optional
|
||||
from videoarchiver.utils.video_downloader import is_video_url_pattern
|
||||
|
||||
from .url_extractor import URLExtractor
|
||||
from .message_validator import MessageValidator
|
||||
from .queue_processor import QueueProcessor
|
||||
from .reactions import REACTIONS
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class MessageState(Enum):
|
||||
"""Possible states of message processing"""
|
||||
RECEIVED = "received"
|
||||
VALIDATING = "validating"
|
||||
EXTRACTING = "extracting"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
IGNORED = "ignored"
|
||||
|
||||
class ProcessingStage(Enum):
|
||||
"""Message processing stages"""
|
||||
VALIDATION = "validation"
|
||||
EXTRACTION = "extraction"
|
||||
QUEUEING = "queueing"
|
||||
COMPLETION = "completion"
|
||||
|
||||
class MessageCache:
|
||||
"""Caches message validation results"""
|
||||
|
||||
def __init__(self, max_size: int = 1000):
|
||||
self.max_size = max_size
|
||||
self._cache: Dict[int, Dict[str, Any]] = {}
|
||||
self._access_times: Dict[int, datetime] = {}
|
||||
|
||||
def add(self, message_id: int, result: Dict[str, Any]) -> None:
|
||||
"""Add a result to cache"""
|
||||
if len(self._cache) >= self.max_size:
|
||||
self._cleanup_oldest()
|
||||
self._cache[message_id] = result
|
||||
self._access_times[message_id] = datetime.utcnow()
|
||||
|
||||
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get a cached result"""
|
||||
if message_id in self._cache:
|
||||
self._access_times[message_id] = datetime.utcnow()
|
||||
return self._cache[message_id]
|
||||
return None
|
||||
|
||||
def _cleanup_oldest(self) -> None:
|
||||
"""Remove oldest cache entries"""
|
||||
if not self._access_times:
|
||||
return
|
||||
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
|
||||
del self._cache[oldest]
|
||||
del self._access_times[oldest]
|
||||
|
||||
class ProcessingTracker:
|
||||
"""Tracks message processing state and progress"""
|
||||
|
||||
def __init__(self):
|
||||
self.states: Dict[int, MessageState] = {}
|
||||
self.stages: Dict[int, ProcessingStage] = {}
|
||||
self.errors: Dict[int, str] = {}
|
||||
self.start_times: Dict[int, datetime] = {}
|
||||
self.end_times: Dict[int, datetime] = {}
|
||||
|
||||
def start_processing(self, message_id: int) -> None:
|
||||
"""Start tracking a message"""
|
||||
self.states[message_id] = MessageState.RECEIVED
|
||||
self.start_times[message_id] = datetime.utcnow()
|
||||
|
||||
def update_state(
|
||||
self,
|
||||
message_id: int,
|
||||
state: MessageState,
|
||||
stage: Optional[ProcessingStage] = None,
|
||||
error: Optional[str] = None
|
||||
) -> None:
|
||||
"""Update message state"""
|
||||
self.states[message_id] = state
|
||||
if stage:
|
||||
self.stages[message_id] = stage
|
||||
if error:
|
||||
self.errors[message_id] = error
|
||||
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
|
||||
self.end_times[message_id] = datetime.utcnow()
|
||||
|
||||
def get_status(self, message_id: int) -> Dict[str, Any]:
|
||||
"""Get processing status for a message"""
|
||||
return {
|
||||
"state": self.states.get(message_id),
|
||||
"stage": self.stages.get(message_id),
|
||||
"error": self.errors.get(message_id),
|
||||
"start_time": self.start_times.get(message_id),
|
||||
"end_time": self.end_times.get(message_id),
|
||||
"duration": (
|
||||
(self.end_times[message_id] - self.start_times[message_id]).total_seconds()
|
||||
if message_id in self.end_times and message_id in self.start_times
|
||||
else None
|
||||
)
|
||||
}
|
||||
|
||||
class MessageHandler:
|
||||
"""Handles processing of messages for video content"""
|
||||
|
||||
def __init__(self, bot, config_manager, queue_manager):
|
||||
self.bot = bot
|
||||
self.config_manager = config_manager
|
||||
self.queue_manager = queue_manager
|
||||
self.url_extractor = URLExtractor()
|
||||
self.message_validator = MessageValidator()
|
||||
self.queue_processor = QueueProcessor(queue_manager)
|
||||
|
||||
# Initialize tracking and caching
|
||||
self.tracker = ProcessingTracker()
|
||||
self.validation_cache = MessageCache()
|
||||
self._processing_lock = asyncio.Lock()
|
||||
|
||||
async def process_message(self, message: discord.Message) -> None:
|
||||
"""Process a message for video content"""
|
||||
# Start tracking
|
||||
self.tracker.start_processing(message.id)
|
||||
|
||||
try:
|
||||
# Check if message contains any content to process
|
||||
if not message.content and not message.attachments:
|
||||
logger.debug(f"No content or attachments in message {message.id}")
|
||||
return
|
||||
|
||||
# Get guild settings
|
||||
settings = await self.config_manager.get_guild_settings(message.guild.id)
|
||||
if not settings:
|
||||
logger.warning(f"No settings found for guild {message.guild.id}")
|
||||
return
|
||||
|
||||
# Check if video archiving is enabled for this guild
|
||||
if not settings.get("enabled", False):
|
||||
logger.debug(f"Video archiving is disabled for guild {message.guild.id}")
|
||||
return
|
||||
|
||||
# Log settings for debugging
|
||||
logger.debug(f"Guild {message.guild.id} settings: {settings}")
|
||||
|
||||
# Check if channel is enabled (empty list means all channels)
|
||||
enabled_channels = settings.get("enabled_channels", [])
|
||||
if enabled_channels and message.channel.id not in enabled_channels:
|
||||
logger.debug(f"Channel {message.channel.id} not in enabled channels: {enabled_channels}")
|
||||
return
|
||||
|
||||
# Check if user has allowed role (empty list means all roles)
|
||||
allowed_roles = settings.get("allowed_roles", [])
|
||||
if allowed_roles:
|
||||
user_roles = [role.id for role in message.author.roles]
|
||||
if not any(role_id in allowed_roles for role_id in user_roles):
|
||||
logger.debug(f"User {message.author.id} does not have any allowed roles")
|
||||
return
|
||||
|
||||
# Extract URLs from message
|
||||
urls = await self._extract_urls(message, settings)
|
||||
if not urls:
|
||||
logger.debug("No valid URLs found in message")
|
||||
return
|
||||
|
||||
# Process each URL
|
||||
await self._process_urls(message, urls)
|
||||
|
||||
async with self._processing_lock:
|
||||
await self._process_message_internal(message)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing message: {str(e)}", exc_info=True)
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.FAILED,
|
||||
error=str(e)
|
||||
)
|
||||
try:
|
||||
await message.add_reaction(REACTIONS["error"])
|
||||
except:
|
||||
pass
|
||||
|
||||
async def _extract_urls(self, message: discord.Message, settings: dict) -> List[str]:
|
||||
"""Extract video URLs from message content and attachments"""
|
||||
urls = []
|
||||
|
||||
# Extract from message content
|
||||
if message.content:
|
||||
logger.debug(f"Processing message content: {message.content}")
|
||||
enabled_sites = settings.get("enabled_sites", [])
|
||||
logger.debug(f"Enabled sites: {enabled_sites}")
|
||||
async def _process_message_internal(self, message: discord.Message) -> None:
|
||||
"""Internal message processing logic"""
|
||||
try:
|
||||
# Get guild settings
|
||||
settings = await self.config_manager.get_guild_settings(message.guild.id)
|
||||
if not settings:
|
||||
logger.warning(f"No settings found for guild {message.guild.id}")
|
||||
self.tracker.update_state(message.id, MessageState.IGNORED)
|
||||
return
|
||||
|
||||
for word in message.content.split():
|
||||
logger.debug(f"Checking word: {word}")
|
||||
if is_video_url_pattern(word):
|
||||
# If enabled_sites is empty or None, allow all sites
|
||||
if not enabled_sites or any(site in word.lower() for site in enabled_sites):
|
||||
logger.debug(f"Found matching URL: {word}")
|
||||
urls.append(word)
|
||||
else:
|
||||
logger.debug(f"URL {word} doesn't match any enabled sites")
|
||||
else:
|
||||
logger.debug(f"Word {word} is not a valid video URL")
|
||||
|
||||
# Extract from attachments
|
||||
for attachment in message.attachments:
|
||||
logger.debug(f"Checking attachment: {attachment.filename}")
|
||||
if any(attachment.filename.lower().endswith(ext) for ext in ['.mp4', '.mov', '.avi', '.webm']):
|
||||
logger.debug(f"Found video attachment: {attachment.url}")
|
||||
urls.append(attachment.url)
|
||||
|
||||
return urls
|
||||
|
||||
async def _process_urls(self, message: discord.Message, urls: List[str]) -> None:
|
||||
"""Process extracted URLs by adding them to the queue"""
|
||||
for url in urls:
|
||||
try:
|
||||
logger.info(f"Adding URL to queue: {url}")
|
||||
await message.add_reaction(REACTIONS['queued'])
|
||||
await self.queue_manager.add_to_queue(
|
||||
url=url,
|
||||
message_id=message.id,
|
||||
channel_id=message.channel.id,
|
||||
guild_id=message.guild.id,
|
||||
author_id=message.author.id,
|
||||
priority=0
|
||||
# Check cache for validation
|
||||
cached_validation = self.validation_cache.get(message.id)
|
||||
if cached_validation:
|
||||
is_valid = cached_validation["valid"]
|
||||
reason = cached_validation["reason"]
|
||||
else:
|
||||
# Validate message
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.VALIDATING,
|
||||
ProcessingStage.VALIDATION
|
||||
)
|
||||
logger.info(f"Successfully added video to queue: {url}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to add video to queue: {str(e)}")
|
||||
await message.add_reaction(REACTIONS['error'])
|
||||
continue
|
||||
is_valid, reason = await self.message_validator.validate_message(
|
||||
message,
|
||||
settings
|
||||
)
|
||||
# Cache result
|
||||
self.validation_cache.add(message.id, {
|
||||
"valid": is_valid,
|
||||
"reason": reason
|
||||
})
|
||||
|
||||
async def format_archive_message(self, author: Optional[discord.Member],
|
||||
channel: discord.TextChannel,
|
||||
url: str) -> str:
|
||||
if not is_valid:
|
||||
logger.debug(f"Message validation failed: {reason}")
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.IGNORED,
|
||||
error=reason
|
||||
)
|
||||
return
|
||||
|
||||
# Extract URLs
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.EXTRACTING,
|
||||
ProcessingStage.EXTRACTION
|
||||
)
|
||||
urls = await self.url_extractor.extract_urls(
|
||||
message,
|
||||
enabled_sites=settings.get("enabled_sites")
|
||||
)
|
||||
if not urls:
|
||||
logger.debug("No valid URLs found in message")
|
||||
self.tracker.update_state(message.id, MessageState.IGNORED)
|
||||
return
|
||||
|
||||
# Process URLs
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.PROCESSING,
|
||||
ProcessingStage.QUEUEING
|
||||
)
|
||||
await self.queue_processor.process_urls(message, urls)
|
||||
|
||||
# Mark completion
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.COMPLETED,
|
||||
ProcessingStage.COMPLETION
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.FAILED,
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
async def format_archive_message(
|
||||
self,
|
||||
author: Optional[discord.Member],
|
||||
channel: discord.TextChannel,
|
||||
url: str
|
||||
) -> str:
|
||||
"""Format message for archive channel"""
|
||||
author_mention = author.mention if author else "Unknown User"
|
||||
channel_mention = channel.mention if channel else "Unknown Channel"
|
||||
|
||||
return (f"Video archived from {author_mention} in {channel_mention}\n"
|
||||
f"Original URL: {url}")
|
||||
return await self.queue_processor.format_archive_message(
|
||||
author,
|
||||
channel,
|
||||
url
|
||||
)
|
||||
|
||||
def get_message_status(self, message_id: int) -> Dict[str, Any]:
|
||||
"""Get processing status for a message"""
|
||||
return self.tracker.get_status(message_id)
|
||||
|
||||
def is_healthy(self) -> bool:
|
||||
"""Check if handler is healthy"""
|
||||
# Check for any stuck messages
|
||||
current_time = datetime.utcnow()
|
||||
for message_id, start_time in self.tracker.start_times.items():
|
||||
if (
|
||||
message_id in self.tracker.states and
|
||||
self.tracker.states[message_id] not in (
|
||||
MessageState.COMPLETED,
|
||||
MessageState.FAILED,
|
||||
MessageState.IGNORED
|
||||
) and
|
||||
(current_time - start_time).total_seconds() > 300 # 5 minutes timeout
|
||||
):
|
||||
return False
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user