This commit is contained in:
pacnpal
2024-11-16 22:32:08 +00:00
parent b7d99490cf
commit dac21f2fcd
30 changed files with 5854 additions and 2279 deletions

View File

@@ -2,52 +2,85 @@
import logging
import asyncio
from enum import Enum
from typing import Optional, Dict, Any, List, Tuple
from datetime import datetime
from enum import Enum, auto
from typing import Optional, Dict, Any, List, Tuple, Set, TypedDict, ClassVar
from datetime import datetime, timedelta
import discord
from discord.ext import commands
from .url_extractor import URLExtractor
from .message_validator import MessageValidator
from .queue_processor import QueueProcessor
from .url_extractor import URLExtractor, URLMetadata
from .message_validator import MessageValidator, ValidationError
from .queue_processor import QueueProcessor, QueuePriority
from .constants import REACTIONS
from ..queue.manager import EnhancedVideoQueueManager
from ..config_manager import ConfigManager
from ..utils.exceptions import MessageHandlerError
logger = logging.getLogger("VideoArchiver")
class MessageState(Enum):
"""Possible states of message processing"""
RECEIVED = "received"
VALIDATING = "validating"
EXTRACTING = "extracting"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
IGNORED = "ignored"
RECEIVED = auto()
VALIDATING = auto()
EXTRACTING = auto()
PROCESSING = auto()
COMPLETED = auto()
FAILED = auto()
IGNORED = auto()
class ProcessingStage(Enum):
"""Message processing stages"""
VALIDATION = "validation"
EXTRACTION = "extraction"
QUEUEING = "queueing"
COMPLETION = "completion"
VALIDATION = auto()
EXTRACTION = auto()
QUEUEING = auto()
COMPLETION = auto()
class MessageCacheEntry(TypedDict):
"""Type definition for message cache entry"""
valid: bool
reason: Optional[str]
timestamp: str
class MessageStatus(TypedDict):
"""Type definition for message status"""
state: Optional[MessageState]
stage: Optional[ProcessingStage]
error: Optional[str]
start_time: Optional[datetime]
end_time: Optional[datetime]
duration: Optional[float]
class MessageCache:
"""Caches message validation results"""
def __init__(self, max_size: int = 1000):
def __init__(self, max_size: int = 1000) -> None:
self.max_size = max_size
self._cache: Dict[int, Dict[str, Any]] = {}
self._cache: Dict[int, MessageCacheEntry] = {}
self._access_times: Dict[int, datetime] = {}
def add(self, message_id: int, result: Dict[str, Any]) -> None:
"""Add a result to cache"""
def add(self, message_id: int, result: MessageCacheEntry) -> None:
"""
Add a result to cache.
Args:
message_id: Discord message ID
result: Validation result entry
"""
if len(self._cache) >= self.max_size:
self._cleanup_oldest()
self._cache[message_id] = result
self._access_times[message_id] = datetime.utcnow()
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
"""Get a cached result"""
def get(self, message_id: int) -> Optional[MessageCacheEntry]:
"""
Get a cached result.
Args:
message_id: Discord message ID
Returns:
Cached validation entry or None if not found
"""
if message_id in self._cache:
self._access_times[message_id] = datetime.utcnow()
return self._cache[message_id]
@@ -64,7 +97,9 @@ class MessageCache:
class ProcessingTracker:
"""Tracks message processing state and progress"""
def __init__(self):
MAX_PROCESSING_TIME: ClassVar[int] = 300 # 5 minutes in seconds
def __init__(self) -> None:
self.states: Dict[int, MessageState] = {}
self.stages: Dict[int, ProcessingStage] = {}
self.errors: Dict[int, str] = {}
@@ -72,7 +107,12 @@ class ProcessingTracker:
self.end_times: Dict[int, datetime] = {}
def start_processing(self, message_id: int) -> None:
"""Start tracking a message"""
"""
Start tracking a message.
Args:
message_id: Discord message ID
"""
self.states[message_id] = MessageState.RECEIVED
self.start_times[message_id] = datetime.utcnow()
@@ -83,7 +123,15 @@ class ProcessingTracker:
stage: Optional[ProcessingStage] = None,
error: Optional[str] = None
) -> None:
"""Update message state"""
"""
Update message state.
Args:
message_id: Discord message ID
state: New message state
stage: Optional processing stage
error: Optional error message
"""
self.states[message_id] = state
if stage:
self.stages[message_id] = stage
@@ -92,25 +140,61 @@ class ProcessingTracker:
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
self.end_times[message_id] = datetime.utcnow()
def get_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
return {
"state": self.states.get(message_id),
"stage": self.stages.get(message_id),
"error": self.errors.get(message_id),
"start_time": self.start_times.get(message_id),
"end_time": self.end_times.get(message_id),
"duration": (
(self.end_times[message_id] - self.start_times[message_id]).total_seconds()
if message_id in self.end_times and message_id in self.start_times
def get_status(self, message_id: int) -> MessageStatus:
"""
Get processing status for a message.
Args:
message_id: Discord message ID
Returns:
Dictionary containing message status information
"""
end_time = self.end_times.get(message_id)
start_time = self.start_times.get(message_id)
return MessageStatus(
state=self.states.get(message_id),
stage=self.stages.get(message_id),
error=self.errors.get(message_id),
start_time=start_time,
end_time=end_time,
duration=(
(end_time - start_time).total_seconds()
if end_time and start_time
else None
)
}
)
def is_message_stuck(self, message_id: int) -> bool:
"""
Check if a message is stuck in processing.
Args:
message_id: Discord message ID
Returns:
True if message is stuck, False otherwise
"""
if message_id not in self.states or message_id not in self.start_times:
return False
state = self.states[message_id]
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
return False
processing_time = (datetime.utcnow() - self.start_times[message_id]).total_seconds()
return processing_time > self.MAX_PROCESSING_TIME
class MessageHandler:
"""Handles processing of messages for video content"""
def __init__(self, bot, config_manager, queue_manager):
def __init__(
self,
bot: discord.Client,
config_manager: ConfigManager,
queue_manager: EnhancedVideoQueueManager
) -> None:
self.bot = bot
self.config_manager = config_manager
self.url_extractor = URLExtractor()
@@ -123,7 +207,15 @@ class MessageHandler:
self._processing_lock = asyncio.Lock()
async def process_message(self, message: discord.Message) -> None:
"""Process a message for video content"""
"""
Process a message for video content.
Args:
message: Discord message to process
Raises:
MessageHandlerError: If there's an error during processing
"""
# Start tracking
self.tracker.start_processing(message.id)
@@ -139,11 +231,19 @@ class MessageHandler:
)
try:
await message.add_reaction(REACTIONS["error"])
except:
pass
except Exception as react_error:
logger.error(f"Failed to add error reaction: {react_error}")
async def _process_message_internal(self, message: discord.Message) -> None:
"""Internal message processing logic"""
"""
Internal message processing logic.
Args:
message: Discord message to process
Raises:
MessageHandlerError: If there's an error during processing
"""
try:
# Get guild settings
settings = await self.config_manager.get_guild_settings(message.guild.id)
@@ -164,15 +264,19 @@ class MessageHandler:
MessageState.VALIDATING,
ProcessingStage.VALIDATION
)
is_valid, reason = await self.message_validator.validate_message(
message,
settings
)
# Cache result
self.validation_cache.add(message.id, {
"valid": is_valid,
"reason": reason
})
try:
is_valid, reason = await self.message_validator.validate_message(
message,
settings
)
# Cache result
self.validation_cache.add(message.id, MessageCacheEntry(
valid=is_valid,
reason=reason,
timestamp=datetime.utcnow().isoformat()
))
except ValidationError as e:
raise MessageHandlerError(f"Validation failed: {str(e)}")
if not is_valid:
logger.debug(f"Message validation failed: {reason}")
@@ -189,14 +293,17 @@ class MessageHandler:
MessageState.EXTRACTING,
ProcessingStage.EXTRACTION
)
urls = await self.url_extractor.extract_urls(
message,
enabled_sites=settings.get("enabled_sites")
)
if not urls:
logger.debug("No valid URLs found in message")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
try:
urls: List[URLMetadata] = await self.url_extractor.extract_urls(
message,
enabled_sites=settings.get("enabled_sites")
)
if not urls:
logger.debug("No valid URLs found in message")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
except Exception as e:
raise MessageHandlerError(f"URL extraction failed: {str(e)}")
# Process URLs
self.tracker.update_state(
@@ -204,7 +311,14 @@ class MessageHandler:
MessageState.PROCESSING,
ProcessingStage.QUEUEING
)
await self.queue_processor.process_urls(message, urls)
try:
await self.queue_processor.process_urls(
message,
urls,
priority=QueuePriority.NORMAL
)
except Exception as e:
raise MessageHandlerError(f"Queue processing failed: {str(e)}")
# Mark completion
self.tracker.update_state(
@@ -213,13 +327,10 @@ class MessageHandler:
ProcessingStage.COMPLETION
)
except Exception as e:
self.tracker.update_state(
message.id,
MessageState.FAILED,
error=str(e)
)
except MessageHandlerError:
raise
except Exception as e:
raise MessageHandlerError(f"Unexpected error: {str(e)}")
async def format_archive_message(
self,
@@ -227,30 +338,49 @@ class MessageHandler:
channel: discord.TextChannel,
url: str
) -> str:
"""Format message for archive channel"""
"""
Format message for archive channel.
Args:
author: Optional message author
channel: Channel the message was posted in
url: URL being archived
Returns:
Formatted message string
"""
return await self.queue_processor.format_archive_message(
author,
channel,
url
)
def get_message_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
def get_message_status(self, message_id: int) -> MessageStatus:
"""
Get processing status for a message.
Args:
message_id: Discord message ID
Returns:
Dictionary containing message status information
"""
return self.tracker.get_status(message_id)
def is_healthy(self) -> bool:
"""Check if handler is healthy"""
# Check for any stuck messages
current_time = datetime.utcnow()
for message_id, start_time in self.tracker.start_times.items():
if (
message_id in self.tracker.states and
self.tracker.states[message_id] not in (
MessageState.COMPLETED,
MessageState.FAILED,
MessageState.IGNORED
) and
(current_time - start_time).total_seconds() > 300 # 5 minutes timeout
):
return False
return True
"""
Check if handler is healthy.
Returns:
True if handler is healthy, False otherwise
"""
try:
# Check for any stuck messages
for message_id in self.tracker.states:
if self.tracker.is_message_stuck(message_id):
logger.warning(f"Message {message_id} appears to be stuck in processing")
return False
return True
except Exception as e:
logger.error(f"Error checking health: {e}")
return False