mirror of
https://github.com/pacnpal/Pac-cogs.git
synced 2025-12-20 02:41:06 -05:00
Updated all files to import REACTIONS from constants.py instead of reactions.py: queue_processor.py message_handler.py reactions.py (now only contains reaction handling functions) events.py (now imports REACTIONS from constants.py and handle_archived_reaction from reactions.py) core.py queue_handler.py processor/init.py
257 lines
8.7 KiB
Python
257 lines
8.7 KiB
Python
"""Message processing and URL extraction for VideoProcessor"""
|
|
|
|
import logging
|
|
import asyncio
|
|
from enum import Enum
|
|
from typing import Optional, Dict, Any, List, Tuple
|
|
from datetime import datetime
|
|
import discord
|
|
|
|
from .url_extractor import URLExtractor
|
|
from .message_validator import MessageValidator
|
|
from .queue_processor import QueueProcessor
|
|
from .constants import REACTIONS
|
|
|
|
logger = logging.getLogger("VideoArchiver")
|
|
|
|
class MessageState(Enum):
|
|
"""Possible states of message processing"""
|
|
RECEIVED = "received"
|
|
VALIDATING = "validating"
|
|
EXTRACTING = "extracting"
|
|
PROCESSING = "processing"
|
|
COMPLETED = "completed"
|
|
FAILED = "failed"
|
|
IGNORED = "ignored"
|
|
|
|
class ProcessingStage(Enum):
|
|
"""Message processing stages"""
|
|
VALIDATION = "validation"
|
|
EXTRACTION = "extraction"
|
|
QUEUEING = "queueing"
|
|
COMPLETION = "completion"
|
|
|
|
class MessageCache:
|
|
"""Caches message validation results"""
|
|
|
|
def __init__(self, max_size: int = 1000):
|
|
self.max_size = max_size
|
|
self._cache: Dict[int, Dict[str, Any]] = {}
|
|
self._access_times: Dict[int, datetime] = {}
|
|
|
|
def add(self, message_id: int, result: Dict[str, Any]) -> None:
|
|
"""Add a result to cache"""
|
|
if len(self._cache) >= self.max_size:
|
|
self._cleanup_oldest()
|
|
self._cache[message_id] = result
|
|
self._access_times[message_id] = datetime.utcnow()
|
|
|
|
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
|
|
"""Get a cached result"""
|
|
if message_id in self._cache:
|
|
self._access_times[message_id] = datetime.utcnow()
|
|
return self._cache[message_id]
|
|
return None
|
|
|
|
def _cleanup_oldest(self) -> None:
|
|
"""Remove oldest cache entries"""
|
|
if not self._access_times:
|
|
return
|
|
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
|
|
del self._cache[oldest]
|
|
del self._access_times[oldest]
|
|
|
|
class ProcessingTracker:
|
|
"""Tracks message processing state and progress"""
|
|
|
|
def __init__(self):
|
|
self.states: Dict[int, MessageState] = {}
|
|
self.stages: Dict[int, ProcessingStage] = {}
|
|
self.errors: Dict[int, str] = {}
|
|
self.start_times: Dict[int, datetime] = {}
|
|
self.end_times: Dict[int, datetime] = {}
|
|
|
|
def start_processing(self, message_id: int) -> None:
|
|
"""Start tracking a message"""
|
|
self.states[message_id] = MessageState.RECEIVED
|
|
self.start_times[message_id] = datetime.utcnow()
|
|
|
|
def update_state(
|
|
self,
|
|
message_id: int,
|
|
state: MessageState,
|
|
stage: Optional[ProcessingStage] = None,
|
|
error: Optional[str] = None
|
|
) -> None:
|
|
"""Update message state"""
|
|
self.states[message_id] = state
|
|
if stage:
|
|
self.stages[message_id] = stage
|
|
if error:
|
|
self.errors[message_id] = error
|
|
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
|
|
self.end_times[message_id] = datetime.utcnow()
|
|
|
|
def get_status(self, message_id: int) -> Dict[str, Any]:
|
|
"""Get processing status for a message"""
|
|
return {
|
|
"state": self.states.get(message_id),
|
|
"stage": self.stages.get(message_id),
|
|
"error": self.errors.get(message_id),
|
|
"start_time": self.start_times.get(message_id),
|
|
"end_time": self.end_times.get(message_id),
|
|
"duration": (
|
|
(self.end_times[message_id] - self.start_times[message_id]).total_seconds()
|
|
if message_id in self.end_times and message_id in self.start_times
|
|
else None
|
|
)
|
|
}
|
|
|
|
class MessageHandler:
|
|
"""Handles processing of messages for video content"""
|
|
|
|
def __init__(self, bot, config_manager, queue_manager):
|
|
self.bot = bot
|
|
self.config_manager = config_manager
|
|
self.url_extractor = URLExtractor()
|
|
self.message_validator = MessageValidator()
|
|
self.queue_processor = QueueProcessor(queue_manager)
|
|
|
|
# Initialize tracking and caching
|
|
self.tracker = ProcessingTracker()
|
|
self.validation_cache = MessageCache()
|
|
self._processing_lock = asyncio.Lock()
|
|
|
|
async def process_message(self, message: discord.Message) -> None:
|
|
"""Process a message for video content"""
|
|
# Start tracking
|
|
self.tracker.start_processing(message.id)
|
|
|
|
try:
|
|
async with self._processing_lock:
|
|
await self._process_message_internal(message)
|
|
except Exception as e:
|
|
logger.error(f"Error processing message: {str(e)}", exc_info=True)
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.FAILED,
|
|
error=str(e)
|
|
)
|
|
try:
|
|
await message.add_reaction(REACTIONS["error"])
|
|
except:
|
|
pass
|
|
|
|
async def _process_message_internal(self, message: discord.Message) -> None:
|
|
"""Internal message processing logic"""
|
|
try:
|
|
# Get guild settings
|
|
settings = await self.config_manager.get_guild_settings(message.guild.id)
|
|
if not settings:
|
|
logger.warning(f"No settings found for guild {message.guild.id}")
|
|
self.tracker.update_state(message.id, MessageState.IGNORED)
|
|
return
|
|
|
|
# Check cache for validation
|
|
cached_validation = self.validation_cache.get(message.id)
|
|
if cached_validation:
|
|
is_valid = cached_validation["valid"]
|
|
reason = cached_validation["reason"]
|
|
else:
|
|
# Validate message
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.VALIDATING,
|
|
ProcessingStage.VALIDATION
|
|
)
|
|
is_valid, reason = await self.message_validator.validate_message(
|
|
message,
|
|
settings
|
|
)
|
|
# Cache result
|
|
self.validation_cache.add(message.id, {
|
|
"valid": is_valid,
|
|
"reason": reason
|
|
})
|
|
|
|
if not is_valid:
|
|
logger.debug(f"Message validation failed: {reason}")
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.IGNORED,
|
|
error=reason
|
|
)
|
|
return
|
|
|
|
# Extract URLs
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.EXTRACTING,
|
|
ProcessingStage.EXTRACTION
|
|
)
|
|
urls = await self.url_extractor.extract_urls(
|
|
message,
|
|
enabled_sites=settings.get("enabled_sites")
|
|
)
|
|
if not urls:
|
|
logger.debug("No valid URLs found in message")
|
|
self.tracker.update_state(message.id, MessageState.IGNORED)
|
|
return
|
|
|
|
# Process URLs
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.PROCESSING,
|
|
ProcessingStage.QUEUEING
|
|
)
|
|
await self.queue_processor.process_urls(message, urls)
|
|
|
|
# Mark completion
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.COMPLETED,
|
|
ProcessingStage.COMPLETION
|
|
)
|
|
|
|
except Exception as e:
|
|
self.tracker.update_state(
|
|
message.id,
|
|
MessageState.FAILED,
|
|
error=str(e)
|
|
)
|
|
raise
|
|
|
|
async def format_archive_message(
|
|
self,
|
|
author: Optional[discord.Member],
|
|
channel: discord.TextChannel,
|
|
url: str
|
|
) -> str:
|
|
"""Format message for archive channel"""
|
|
return await self.queue_processor.format_archive_message(
|
|
author,
|
|
channel,
|
|
url
|
|
)
|
|
|
|
def get_message_status(self, message_id: int) -> Dict[str, Any]:
|
|
"""Get processing status for a message"""
|
|
return self.tracker.get_status(message_id)
|
|
|
|
def is_healthy(self) -> bool:
|
|
"""Check if handler is healthy"""
|
|
# Check for any stuck messages
|
|
current_time = datetime.utcnow()
|
|
for message_id, start_time in self.tracker.start_times.items():
|
|
if (
|
|
message_id in self.tracker.states and
|
|
self.tracker.states[message_id] not in (
|
|
MessageState.COMPLETED,
|
|
MessageState.FAILED,
|
|
MessageState.IGNORED
|
|
) and
|
|
(current_time - start_time).total_seconds() > 300 # 5 minutes timeout
|
|
):
|
|
return False
|
|
return True
|