This commit is contained in:
pacnpal
2024-11-16 22:32:08 +00:00
parent b7d99490cf
commit dac21f2fcd
30 changed files with 5854 additions and 2279 deletions

View File

@@ -1,45 +1,212 @@
"""Video processing module for VideoArchiver"""
from typing import Dict, Any, Optional, Union, List, Tuple
import discord
from .core import VideoProcessor
from .constants import REACTIONS
from .progress_tracker import ProgressTracker
from .constants import (
REACTIONS,
ReactionType,
ReactionEmojis,
ProgressEmojis,
get_reaction,
get_progress_emoji
)
from .url_extractor import (
URLExtractor,
URLMetadata,
URLPattern,
URLType,
URLPatternManager,
URLValidator,
URLMetadataExtractor
)
from .message_validator import (
MessageValidator,
ValidationContext,
ValidationRule,
ValidationResult,
ValidationRuleManager,
ValidationCache,
ValidationStats,
ValidationCacheEntry,
ValidationError
)
from .message_handler import MessageHandler
from .queue_handler import QueueHandler
from .reactions import (
handle_archived_reaction,
update_queue_position_reaction,
update_progress_reaction,
update_download_progress_reaction
)
# Export public classes and constants
__all__ = [
# Core components
"VideoProcessor",
"REACTIONS",
"ProgressTracker",
"MessageHandler",
"QueueHandler",
# URL Extraction
"URLExtractor",
"URLMetadata",
"URLPattern",
"URLType",
"URLPatternManager",
"URLValidator",
"URLMetadataExtractor",
# Message Validation
"MessageValidator",
"ValidationContext",
"ValidationRule",
"ValidationResult",
"ValidationRuleManager",
"ValidationCache",
"ValidationStats",
"ValidationCacheEntry",
"ValidationError",
# Constants and enums
"REACTIONS",
"ReactionType",
"ReactionEmojis",
"ProgressEmojis",
# Helper functions
"get_reaction",
"get_progress_emoji",
"extract_urls",
"validate_message",
"update_download_progress",
"complete_download",
"increment_download_retries",
"get_download_progress",
"get_active_operations",
"get_validation_stats",
"clear_caches",
# Reaction handlers
"handle_archived_reaction",
"update_queue_position_reaction",
"update_progress_reaction",
"update_download_progress_reaction",
]
# Create a shared progress tracker instance for module-level access
progress_tracker = ProgressTracker()
# Version information
__version__ = "1.0.0"
__author__ = "VideoArchiver Team"
__description__ = "Video processing module for archiving Discord videos"
# Create shared instances for module-level access
url_extractor = URLExtractor()
message_validator = MessageValidator()
# Export progress tracking functions that wrap the instance methods
def update_download_progress(url, progress_data):
"""Update download progress for a specific URL"""
# URL extraction helper functions
async def extract_urls(
message: discord.Message,
enabled_sites: Optional[List[str]] = None
) -> List[URLMetadata]:
"""
Extract video URLs from a Discord message.
Args:
message: Discord message to extract URLs from
enabled_sites: Optional list of enabled site identifiers
Returns:
List of URLMetadata objects for extracted URLs
"""
return await url_extractor.extract_urls(message, enabled_sites)
async def validate_message(
message: discord.Message,
settings: Dict[str, Any]
) -> Tuple[bool, Optional[str]]:
"""
Validate a Discord message.
Args:
message: Discord message to validate
settings: Guild settings dictionary
Returns:
Tuple of (is_valid, reason)
Raises:
ValidationError: If validation fails unexpectedly
"""
return await message_validator.validate_message(message, settings)
# Progress tracking helper functions
def update_download_progress(url: str, progress_data: Dict[str, Any]) -> None:
"""
Update download progress for a specific URL.
Args:
url: The URL being downloaded
progress_data: Dictionary containing progress information
"""
progress_tracker.update_download_progress(url, progress_data)
def complete_download(url):
"""Mark a download as complete"""
def complete_download(url: str) -> None:
"""
Mark a download as complete.
Args:
url: The URL that completed downloading
"""
progress_tracker.complete_download(url)
def increment_download_retries(url):
"""Increment retry count for a download"""
def increment_download_retries(url: str) -> None:
"""
Increment retry count for a download.
Args:
url: The URL being retried
"""
progress_tracker.increment_download_retries(url)
def get_download_progress(url=None):
"""Get download progress for a specific URL or all downloads"""
def get_download_progress(url: Optional[str] = None) -> Union[Dict[str, Any], Dict[str, Dict[str, Any]]]:
"""
Get download progress for a specific URL or all downloads.
Args:
url: Optional URL to get progress for. If None, returns all download progress.
Returns:
Dictionary containing progress information for one or all downloads
"""
return progress_tracker.get_download_progress(url)
def get_active_operations():
"""Get all active operations"""
def get_active_operations() -> Dict[str, Dict[str, Any]]:
"""
Get all active operations.
Returns:
Dictionary containing information about all active operations
"""
return progress_tracker.get_active_operations()
def get_validation_stats() -> ValidationStats:
"""
Get message validation statistics.
Returns:
Dictionary containing validation statistics and rule information
"""
return message_validator.get_stats()
def clear_caches(message_id: Optional[int] = None) -> None:
"""
Clear URL and validation caches.
Args:
message_id: Optional message ID to clear caches for. If None, clears all caches.
"""
url_extractor.clear_cache(message_id)
message_validator.clear_cache(message_id)
# Initialize shared progress tracker instance
progress_tracker = ProgressTracker()

View File

@@ -2,25 +2,37 @@
import logging
import asyncio
from enum import Enum
from dataclasses import dataclass
from typing import Optional, Dict, Any, List, Set
from datetime import datetime
from enum import Enum, auto
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, List, Set, TypedDict, ClassVar, Callable, Awaitable, Tuple
from datetime import datetime, timedelta
from .queue_handler import QueueHandler
from ..ffmpeg.ffmpeg_manager import FFmpegManager
from ..utils.exceptions import CleanupError
logger = logging.getLogger("VideoArchiver")
class CleanupStage(Enum):
"""Cleanup stages"""
QUEUE = "queue"
FFMPEG = "ffmpeg"
TASKS = "tasks"
RESOURCES = "resources"
QUEUE = auto()
FFMPEG = auto()
TASKS = auto()
RESOURCES = auto()
class CleanupStrategy(Enum):
"""Cleanup strategies"""
NORMAL = "normal"
FORCE = "force"
GRACEFUL = "graceful"
NORMAL = auto()
FORCE = auto()
GRACEFUL = auto()
class CleanupStats(TypedDict):
"""Type definition for cleanup statistics"""
total_cleanups: int
active_cleanups: int
success_rate: float
average_duration: float
stage_success_rates: Dict[str, float]
@dataclass
class CleanupResult:
@@ -29,33 +41,64 @@ class CleanupResult:
stage: CleanupStage
error: Optional[str] = None
duration: float = 0.0
timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat())
@dataclass
class CleanupOperation:
"""Represents a cleanup operation"""
stage: CleanupStage
func: Callable[[], Awaitable[None]]
force_func: Optional[Callable[[], Awaitable[None]]] = None
timeout: float = 30.0 # Default timeout in seconds
class CleanupTracker:
"""Tracks cleanup operations"""
def __init__(self):
MAX_HISTORY: ClassVar[int] = 1000 # Maximum number of cleanup operations to track
def __init__(self) -> None:
self.cleanup_history: List[Dict[str, Any]] = []
self.active_cleanups: Set[str] = set()
self.start_times: Dict[str, datetime] = {}
self.stage_results: Dict[str, List[CleanupResult]] = {}
def start_cleanup(self, cleanup_id: str) -> None:
"""Start tracking a cleanup operation"""
"""
Start tracking a cleanup operation.
Args:
cleanup_id: Unique identifier for the cleanup operation
"""
self.active_cleanups.add(cleanup_id)
self.start_times[cleanup_id] = datetime.utcnow()
self.stage_results[cleanup_id] = []
# Cleanup old history if needed
if len(self.cleanup_history) >= self.MAX_HISTORY:
self.cleanup_history = self.cleanup_history[-self.MAX_HISTORY:]
def record_stage_result(
self,
cleanup_id: str,
result: CleanupResult
) -> None:
"""Record result of a cleanup stage"""
"""
Record result of a cleanup stage.
Args:
cleanup_id: Cleanup operation identifier
result: Result of the cleanup stage
"""
if cleanup_id in self.stage_results:
self.stage_results[cleanup_id].append(result)
def end_cleanup(self, cleanup_id: str) -> None:
"""End tracking a cleanup operation"""
"""
End tracking a cleanup operation.
Args:
cleanup_id: Cleanup operation identifier
"""
if cleanup_id in self.active_cleanups:
end_time = datetime.utcnow()
self.cleanup_history.append({
@@ -69,15 +112,20 @@ class CleanupTracker:
self.start_times.pop(cleanup_id)
self.stage_results.pop(cleanup_id)
def get_cleanup_stats(self) -> Dict[str, Any]:
"""Get cleanup statistics"""
return {
"total_cleanups": len(self.cleanup_history),
"active_cleanups": len(self.active_cleanups),
"success_rate": self._calculate_success_rate(),
"average_duration": self._calculate_average_duration(),
"stage_success_rates": self._calculate_stage_success_rates()
}
def get_cleanup_stats(self) -> CleanupStats:
"""
Get cleanup statistics.
Returns:
Dictionary containing cleanup statistics
"""
return CleanupStats(
total_cleanups=len(self.cleanup_history),
active_cleanups=len(self.active_cleanups),
success_rate=self._calculate_success_rate(),
average_duration=self._calculate_average_duration(),
stage_success_rates=self._calculate_stage_success_rates()
)
def _calculate_success_rate(self) -> float:
"""Calculate overall cleanup success rate"""
@@ -116,20 +164,49 @@ class CleanupTracker:
class CleanupManager:
"""Manages cleanup operations for the video processor"""
CLEANUP_TIMEOUT: ClassVar[int] = 60 # Default timeout for entire cleanup operation
def __init__(
self,
queue_handler,
ffmpeg_mgr: Optional[object] = None,
queue_handler: QueueHandler,
ffmpeg_mgr: Optional[FFmpegManager] = None,
strategy: CleanupStrategy = CleanupStrategy.NORMAL
):
) -> None:
self.queue_handler = queue_handler
self.ffmpeg_mgr = ffmpeg_mgr
self.strategy = strategy
self._queue_task: Optional[asyncio.Task] = None
self.tracker = CleanupTracker()
# Define cleanup operations
self.cleanup_operations: List[CleanupOperation] = [
CleanupOperation(
stage=CleanupStage.QUEUE,
func=self._cleanup_queue,
force_func=self._force_cleanup_queue,
timeout=30.0
),
CleanupOperation(
stage=CleanupStage.FFMPEG,
func=self._cleanup_ffmpeg,
force_func=self._force_cleanup_ffmpeg,
timeout=15.0
),
CleanupOperation(
stage=CleanupStage.TASKS,
func=self._cleanup_tasks,
force_func=self._force_cleanup_tasks,
timeout=15.0
)
]
async def cleanup(self) -> None:
"""Perform normal cleanup of resources"""
"""
Perform normal cleanup of resources.
Raises:
CleanupError: If cleanup fails
"""
cleanup_id = f"cleanup_{datetime.utcnow().timestamp()}"
self.tracker.start_cleanup(cleanup_id)
@@ -137,35 +214,45 @@ class CleanupManager:
logger.info("Starting normal cleanup...")
# Clean up in stages
stages = [
(CleanupStage.QUEUE, self._cleanup_queue),
(CleanupStage.FFMPEG, self._cleanup_ffmpeg),
(CleanupStage.TASKS, self._cleanup_tasks)
]
for stage, cleanup_func in stages:
for operation in self.cleanup_operations:
try:
start_time = datetime.utcnow()
await cleanup_func()
await asyncio.wait_for(
operation.func(),
timeout=operation.timeout
)
duration = (datetime.utcnow() - start_time).total_seconds()
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(True, stage, duration=duration)
CleanupResult(True, operation.stage, duration=duration)
)
except Exception as e:
logger.error(f"Error in {stage.value} cleanup: {e}")
except asyncio.TimeoutError:
error = f"Cleanup stage {operation.stage.value} timed out"
logger.error(error)
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(False, stage, str(e))
CleanupResult(False, operation.stage, error)
)
if self.strategy != CleanupStrategy.GRACEFUL:
raise
raise CleanupError(error)
except Exception as e:
error = f"Error in {operation.stage.value} cleanup: {e}"
logger.error(error)
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(False, operation.stage, str(e))
)
if self.strategy != CleanupStrategy.GRACEFUL:
raise CleanupError(error)
logger.info("Normal cleanup completed successfully")
except Exception as e:
logger.error(f"Error during normal cleanup: {str(e)}", exc_info=True)
except CleanupError:
raise
except Exception as e:
error = f"Unexpected error during cleanup: {str(e)}"
logger.error(error, exc_info=True)
raise CleanupError(error)
finally:
self.tracker.end_cleanup(cleanup_id)
@@ -178,26 +265,26 @@ class CleanupManager:
logger.info("Starting force cleanup...")
# Force cleanup in stages
stages = [
(CleanupStage.QUEUE, self._force_cleanup_queue),
(CleanupStage.FFMPEG, self._force_cleanup_ffmpeg),
(CleanupStage.TASKS, self._force_cleanup_tasks)
]
for operation in self.cleanup_operations:
if not operation.force_func:
continue
for stage, cleanup_func in stages:
try:
start_time = datetime.utcnow()
await cleanup_func()
await asyncio.wait_for(
operation.force_func(),
timeout=operation.timeout
)
duration = (datetime.utcnow() - start_time).total_seconds()
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(True, stage, duration=duration)
CleanupResult(True, operation.stage, duration=duration)
)
except Exception as e:
logger.error(f"Error in force {stage.value} cleanup: {e}")
logger.error(f"Error in force {operation.stage.value} cleanup: {e}")
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(False, stage, str(e))
CleanupResult(False, operation.stage, str(e))
)
logger.info("Force cleanup completed")
@@ -209,6 +296,8 @@ class CleanupManager:
async def _cleanup_queue(self) -> None:
"""Clean up queue handler"""
if not self.queue_handler:
raise CleanupError("Queue handler not initialized")
await self.queue_handler.cleanup()
async def _cleanup_ffmpeg(self) -> None:
@@ -224,15 +313,22 @@ class CleanupManager:
await self._queue_task
except asyncio.CancelledError:
pass
except Exception as e:
raise CleanupError(f"Error cleaning up queue task: {str(e)}")
async def _force_cleanup_queue(self) -> None:
"""Force clean up queue handler"""
if not self.queue_handler:
raise CleanupError("Queue handler not initialized")
await self.queue_handler.force_cleanup()
async def _force_cleanup_ffmpeg(self) -> None:
"""Force clean up FFmpeg manager"""
if self.ffmpeg_mgr:
self.ffmpeg_mgr.kill_all_processes()
try:
self.ffmpeg_mgr.kill_all_processes()
except Exception as e:
logger.error(f"Error force cleaning FFmpeg processes: {e}")
async def _force_cleanup_tasks(self) -> None:
"""Force clean up tasks"""
@@ -240,13 +336,31 @@ class CleanupManager:
self._queue_task.cancel()
def set_queue_task(self, task: asyncio.Task) -> None:
"""Set the queue processing task for cleanup purposes"""
"""
Set the queue processing task for cleanup purposes.
Args:
task: Queue processing task to track
"""
self._queue_task = task
def get_cleanup_stats(self) -> Dict[str, Any]:
"""Get cleanup statistics"""
"""
Get cleanup statistics.
Returns:
Dictionary containing cleanup statistics and status
"""
return {
"stats": self.tracker.get_cleanup_stats(),
"strategy": self.strategy.value,
"active_cleanups": len(self.tracker.active_cleanups)
"active_cleanups": len(self.tracker.active_cleanups),
"operations": [
{
"stage": op.stage.value,
"timeout": op.timeout,
"has_force_cleanup": op.force_func is not None
}
for op in self.cleanup_operations
]
}

View File

@@ -1,13 +1,77 @@
"""Constants for VideoProcessor"""
# Reaction emojis
REACTIONS = {
'queued': '📹',
'processing': '⚙️',
'success': '',
'error': '',
'archived': '🔄', # New reaction for already archived videos
'numbers': ['1', '2', '3', '4', '5'],
'progress': ['', '🟨', '🟩'],
'download': ['0', '2', '4', '6', '8', '🔟']
from typing import Dict, List, Union
from dataclasses import dataclass
from enum import Enum
class ReactionType(Enum):
"""Types of reactions used in the processor"""
QUEUED = 'queued'
PROCESSING = 'processing'
SUCCESS = 'success'
ERROR = 'error'
ARCHIVED = 'archived'
NUMBERS = 'numbers'
PROGRESS = 'progress'
DOWNLOAD = 'download'
@dataclass(frozen=True)
class ReactionEmojis:
"""Emoji constants for different reaction types"""
QUEUED: str = '📹'
PROCESSING: str = '⚙️'
SUCCESS: str = ''
ERROR: str = ''
ARCHIVED: str = '🔄'
@dataclass(frozen=True)
class ProgressEmojis:
"""Emoji sequences for progress indicators"""
NUMBERS: List[str] = ('1', '2', '3', '4', '5')
PROGRESS: List[str] = ('', '🟨', '🟩')
DOWNLOAD: List[str] = ('0', '2', '4', '6', '8', '🔟')
# Main reactions dictionary with type hints
REACTIONS: Dict[str, Union[str, List[str]]] = {
ReactionType.QUEUED.value: ReactionEmojis.QUEUED,
ReactionType.PROCESSING.value: ReactionEmojis.PROCESSING,
ReactionType.SUCCESS.value: ReactionEmojis.SUCCESS,
ReactionType.ERROR.value: ReactionEmojis.ERROR,
ReactionType.ARCHIVED.value: ReactionEmojis.ARCHIVED,
ReactionType.NUMBERS.value: ProgressEmojis.NUMBERS,
ReactionType.PROGRESS.value: ProgressEmojis.PROGRESS,
ReactionType.DOWNLOAD.value: ProgressEmojis.DOWNLOAD
}
def get_reaction(reaction_type: Union[ReactionType, str]) -> Union[str, List[str]]:
"""
Get reaction emoji(s) for a given reaction type.
Args:
reaction_type: The type of reaction to get, either as ReactionType enum or string
Returns:
Either a single emoji string or a list of emoji strings
Raises:
KeyError: If the reaction type doesn't exist
"""
key = reaction_type.value if isinstance(reaction_type, ReactionType) else reaction_type
return REACTIONS[key]
def get_progress_emoji(progress: float, emoji_list: List[str]) -> str:
"""
Get the appropriate progress emoji based on a progress value.
Args:
progress: Progress value between 0 and 1
emoji_list: List of emojis to choose from
Returns:
The emoji representing the current progress
"""
if not 0 <= progress <= 1:
raise ValueError("Progress must be between 0 and 1")
index = int(progress * (len(emoji_list) - 1))
return emoji_list[index]

View File

@@ -2,43 +2,76 @@
import logging
import asyncio
from enum import Enum
from typing import Optional, Tuple, Dict, Any, List
from datetime import datetime
from enum import Enum, auto
from typing import Optional, Tuple, Dict, Any, List, TypedDict, ClassVar
from datetime import datetime, timedelta
import discord
from discord.ext import commands
from .message_handler import MessageHandler
from .queue_handler import QueueHandler
from .progress_tracker import ProgressTracker
from ..utils.progress_tracker import ProgressTracker
from .status_display import StatusDisplay
from .cleanup_manager import CleanupManager
from .cleanup_manager import CleanupManager, CleanupStrategy
from .constants import REACTIONS
from ..queue.manager import EnhancedVideoQueueManager
from ..ffmpeg.ffmpeg_manager import FFmpegManager
from ..database.video_archive_db import VideoArchiveDB
from ..config_manager import ConfigManager
from ..utils.exceptions import ProcessorError
logger = logging.getLogger("VideoArchiver")
class ProcessorState(Enum):
"""Possible states of the video processor"""
INITIALIZING = "initializing"
READY = "ready"
PROCESSING = "processing"
PAUSED = "paused"
ERROR = "error"
SHUTDOWN = "shutdown"
INITIALIZING = auto()
READY = auto()
PROCESSING = auto()
PAUSED = auto()
ERROR = auto()
SHUTDOWN = auto()
class OperationType(Enum):
"""Types of processor operations"""
MESSAGE_PROCESSING = "message_processing"
VIDEO_PROCESSING = "video_processing"
QUEUE_MANAGEMENT = "queue_management"
CLEANUP = "cleanup"
MESSAGE_PROCESSING = auto()
VIDEO_PROCESSING = auto()
QUEUE_MANAGEMENT = auto()
CLEANUP = auto()
class OperationDetails(TypedDict):
"""Type definition for operation details"""
type: str
start_time: datetime
end_time: Optional[datetime]
status: str
details: Dict[str, Any]
error: Optional[str]
class OperationStats(TypedDict):
"""Type definition for operation statistics"""
total_operations: int
active_operations: int
success_count: int
error_count: int
success_rate: float
class ProcessorStatus(TypedDict):
"""Type definition for processor status"""
state: str
health: bool
operations: OperationStats
active_operations: Dict[str, OperationDetails]
last_health_check: Optional[str]
health_status: Dict[str, bool]
class OperationTracker:
"""Tracks processor operations"""
def __init__(self):
self.operations: Dict[str, Dict[str, Any]] = {}
self.operation_history: List[Dict[str, Any]] = []
MAX_HISTORY: ClassVar[int] = 1000 # Maximum number of operations to track
def __init__(self) -> None:
self.operations: Dict[str, OperationDetails] = {}
self.operation_history: List[OperationDetails] = []
self.error_count = 0
self.success_count = 0
@@ -47,14 +80,25 @@ class OperationTracker:
op_type: OperationType,
details: Dict[str, Any]
) -> str:
"""Start tracking an operation"""
"""
Start tracking an operation.
Args:
op_type: Type of operation
details: Operation details
Returns:
Operation ID string
"""
op_id = f"{op_type.value}_{datetime.utcnow().timestamp()}"
self.operations[op_id] = {
"type": op_type.value,
"start_time": datetime.utcnow(),
"status": "running",
"details": details
}
self.operations[op_id] = OperationDetails(
type=op_type.value,
start_time=datetime.utcnow(),
end_time=None,
status="running",
details=details,
error=None
)
return op_id
def end_operation(
@@ -63,7 +107,14 @@ class OperationTracker:
success: bool,
error: Optional[str] = None
) -> None:
"""End tracking an operation"""
"""
End tracking an operation.
Args:
op_id: Operation ID
success: Whether operation succeeded
error: Optional error message
"""
if op_id in self.operations:
self.operations[op_id].update({
"end_time": datetime.utcnow(),
@@ -78,28 +129,43 @@ class OperationTracker:
else:
self.error_count += 1
def get_active_operations(self) -> Dict[str, Dict[str, Any]]:
"""Get currently active operations"""
# Cleanup old history if needed
if len(self.operation_history) > self.MAX_HISTORY:
self.operation_history = self.operation_history[-self.MAX_HISTORY:]
def get_active_operations(self) -> Dict[str, OperationDetails]:
"""
Get currently active operations.
Returns:
Dictionary of active operations
"""
return self.operations.copy()
def get_operation_stats(self) -> Dict[str, Any]:
"""Get operation statistics"""
return {
"total_operations": len(self.operation_history) + len(self.operations),
"active_operations": len(self.operations),
"success_count": self.success_count,
"error_count": self.error_count,
"success_rate": (
self.success_count / (self.success_count + self.error_count)
if (self.success_count + self.error_count) > 0
else 0
)
}
def get_operation_stats(self) -> OperationStats:
"""
Get operation statistics.
Returns:
Dictionary containing operation statistics
"""
total = self.success_count + self.error_count
return OperationStats(
total_operations=len(self.operation_history) + len(self.operations),
active_operations=len(self.operations),
success_count=self.success_count,
error_count=self.error_count,
success_rate=self.success_count / total if total > 0 else 0.0
)
class HealthMonitor:
"""Monitors processor health"""
def __init__(self, processor: 'VideoProcessor'):
HEALTH_CHECK_INTERVAL: ClassVar[int] = 60 # Seconds between health checks
ERROR_CHECK_INTERVAL: ClassVar[int] = 30 # Seconds between checks after error
SUCCESS_RATE_THRESHOLD: ClassVar[float] = 0.9 # 90% success rate threshold
def __init__(self, processor: 'VideoProcessor') -> None:
self.processor = processor
self.last_check: Optional[datetime] = None
self.health_status: Dict[str, bool] = {}
@@ -117,6 +183,8 @@ class HealthMonitor:
await self._monitor_task
except asyncio.CancelledError:
pass
except Exception as e:
logger.error(f"Error stopping health monitor: {e}")
async def _monitor_health(self) -> None:
"""Monitor processor health"""
@@ -134,17 +202,22 @@ class HealthMonitor:
# Check operation health
op_stats = self.processor.operation_tracker.get_operation_stats()
self.health_status["operations"] = (
op_stats["success_rate"] >= 0.9 # 90% success rate threshold
op_stats["success_rate"] >= self.SUCCESS_RATE_THRESHOLD
)
await asyncio.sleep(60) # Check every minute
await asyncio.sleep(self.HEALTH_CHECK_INTERVAL)
except Exception as e:
logger.error(f"Health monitoring error: {e}")
await asyncio.sleep(30) # Shorter interval on error
logger.error(f"Health monitoring error: {e}", exc_info=True)
await asyncio.sleep(self.ERROR_CHECK_INTERVAL)
def is_healthy(self) -> bool:
"""Check if processor is healthy"""
"""
Check if processor is healthy.
Returns:
True if all components are healthy, False otherwise
"""
return all(self.health_status.values())
class VideoProcessor:
@@ -152,13 +225,13 @@ class VideoProcessor:
def __init__(
self,
bot,
config_manager,
components,
queue_manager=None,
ffmpeg_mgr=None,
db=None
):
bot: commands.Bot,
config_manager: ConfigManager,
components: Dict[int, Dict[str, Any]],
queue_manager: Optional[EnhancedVideoQueueManager] = None,
ffmpeg_mgr: Optional[FFmpegManager] = None,
db: Optional[VideoArchiveDB] = None
) -> None:
self.bot = bot
self.config = config_manager
self.components = components
@@ -171,29 +244,61 @@ class VideoProcessor:
self.operation_tracker = OperationTracker()
self.health_monitor = HealthMonitor(self)
# Initialize handlers
self.queue_handler = QueueHandler(bot, config_manager, components)
self.message_handler = MessageHandler(bot, config_manager, queue_manager)
self.progress_tracker = ProgressTracker()
self.cleanup_manager = CleanupManager(self.queue_handler, ffmpeg_mgr)
try:
# Initialize handlers
self.queue_handler = QueueHandler(bot, config_manager, components)
self.message_handler = MessageHandler(bot, config_manager, queue_manager)
self.progress_tracker = ProgressTracker()
self.cleanup_manager = CleanupManager(
self.queue_handler,
ffmpeg_mgr,
CleanupStrategy.NORMAL
)
# Pass db to queue handler if it exists
if self.db:
self.queue_handler.db = self.db
# Pass db to queue handler if it exists
if self.db:
self.queue_handler.db = self.db
# Store queue task reference
self._queue_task = None
# Mark as ready
self.state = ProcessorState.READY
logger.info("VideoProcessor initialized successfully")
# Store queue task reference
self._queue_task: Optional[asyncio.Task] = None
# Mark as ready
self.state = ProcessorState.READY
logger.info("VideoProcessor initialized successfully")
except Exception as e:
self.state = ProcessorState.ERROR
logger.error(f"Error initializing VideoProcessor: {e}", exc_info=True)
raise ProcessorError(f"Failed to initialize processor: {str(e)}")
async def start(self) -> None:
"""Start processor operations"""
await self.health_monitor.start_monitoring()
"""
Start processor operations.
Raises:
ProcessorError: If startup fails
"""
try:
await self.health_monitor.start_monitoring()
logger.info("VideoProcessor started successfully")
except Exception as e:
error = f"Failed to start processor: {str(e)}"
logger.error(error, exc_info=True)
raise ProcessorError(error)
async def process_video(self, item) -> Tuple[bool, Optional[str]]:
"""Process a video from the queue"""
async def process_video(self, item: Any) -> Tuple[bool, Optional[str]]:
"""
Process a video from the queue.
Args:
item: Queue item to process
Returns:
Tuple of (success, error_message)
Raises:
ProcessorError: If processing fails
"""
op_id = self.operation_tracker.start_operation(
OperationType.VIDEO_PROCESSING,
{"item": str(item)}
@@ -207,13 +312,23 @@ class VideoProcessor:
self.operation_tracker.end_operation(op_id, success, error)
return result
except Exception as e:
self.operation_tracker.end_operation(op_id, False, str(e))
raise
error = f"Video processing failed: {str(e)}"
self.operation_tracker.end_operation(op_id, False, error)
logger.error(error, exc_info=True)
raise ProcessorError(error)
finally:
self.state = ProcessorState.READY
async def process_message(self, message: discord.Message) -> None:
"""Process a message for video content"""
"""
Process a message for video content.
Args:
message: Discord message to process
Raises:
ProcessorError: If processing fails
"""
op_id = self.operation_tracker.start_operation(
OperationType.MESSAGE_PROCESSING,
{"message_id": message.id}
@@ -223,11 +338,18 @@ class VideoProcessor:
await self.message_handler.process_message(message)
self.operation_tracker.end_operation(op_id, True)
except Exception as e:
self.operation_tracker.end_operation(op_id, False, str(e))
raise
error = f"Message processing failed: {str(e)}"
self.operation_tracker.end_operation(op_id, False, error)
logger.error(error, exc_info=True)
raise ProcessorError(error)
async def cleanup(self) -> None:
"""Clean up resources and stop processing"""
"""
Clean up resources and stop processing.
Raises:
ProcessorError: If cleanup fails
"""
op_id = self.operation_tracker.start_operation(
OperationType.CLEANUP,
{"type": "normal"}
@@ -239,12 +361,18 @@ class VideoProcessor:
await self.cleanup_manager.cleanup()
self.operation_tracker.end_operation(op_id, True)
except Exception as e:
self.operation_tracker.end_operation(op_id, False, str(e))
logger.error(f"Error during cleanup: {e}", exc_info=True)
raise
error = f"Cleanup failed: {str(e)}"
self.operation_tracker.end_operation(op_id, False, error)
logger.error(error, exc_info=True)
raise ProcessorError(error)
async def force_cleanup(self) -> None:
"""Force cleanup of resources"""
"""
Force cleanup of resources.
Raises:
ProcessorError: If force cleanup fails
"""
op_id = self.operation_tracker.start_operation(
OperationType.CLEANUP,
{"type": "force"}
@@ -256,11 +384,18 @@ class VideoProcessor:
await self.cleanup_manager.force_cleanup()
self.operation_tracker.end_operation(op_id, True)
except Exception as e:
self.operation_tracker.end_operation(op_id, False, str(e))
raise
error = f"Force cleanup failed: {str(e)}"
self.operation_tracker.end_operation(op_id, False, error)
logger.error(error, exc_info=True)
raise ProcessorError(error)
async def show_queue_details(self, ctx: commands.Context) -> None:
"""Display detailed queue status"""
"""
Display detailed queue status.
Args:
ctx: Command context
"""
try:
if not self.queue_manager:
await ctx.send("Queue manager is not initialized.")
@@ -280,25 +415,36 @@ class VideoProcessor:
await ctx.send(embed=embed)
except Exception as e:
logger.error(f"Error showing queue details: {e}", exc_info=True)
error = f"Failed to show queue details: {str(e)}"
logger.error(error, exc_info=True)
await ctx.send(f"Error getting queue details: {str(e)}")
def set_queue_task(self, task: asyncio.Task) -> None:
"""Set the queue processing task"""
"""
Set the queue processing task.
Args:
task: Queue processing task
"""
self._queue_task = task
self.cleanup_manager.set_queue_task(task)
def get_status(self) -> Dict[str, Any]:
"""Get processor status"""
return {
"state": self.state.value,
"health": self.health_monitor.is_healthy(),
"operations": self.operation_tracker.get_operation_stats(),
"active_operations": self.operation_tracker.get_active_operations(),
"last_health_check": (
def get_status(self) -> ProcessorStatus:
"""
Get processor status.
Returns:
Dictionary containing processor status information
"""
return ProcessorStatus(
state=self.state.value,
health=self.health_monitor.is_healthy(),
operations=self.operation_tracker.get_operation_stats(),
active_operations=self.operation_tracker.get_active_operations(),
last_health_check=(
self.health_monitor.last_check.isoformat()
if self.health_monitor.last_check
else None
),
"health_status": self.health_monitor.health_status
}
health_status=self.health_monitor.health_status
)

View File

@@ -2,52 +2,85 @@
import logging
import asyncio
from enum import Enum
from typing import Optional, Dict, Any, List, Tuple
from datetime import datetime
from enum import Enum, auto
from typing import Optional, Dict, Any, List, Tuple, Set, TypedDict, ClassVar
from datetime import datetime, timedelta
import discord
from discord.ext import commands
from .url_extractor import URLExtractor
from .message_validator import MessageValidator
from .queue_processor import QueueProcessor
from .url_extractor import URLExtractor, URLMetadata
from .message_validator import MessageValidator, ValidationError
from .queue_processor import QueueProcessor, QueuePriority
from .constants import REACTIONS
from ..queue.manager import EnhancedVideoQueueManager
from ..config_manager import ConfigManager
from ..utils.exceptions import MessageHandlerError
logger = logging.getLogger("VideoArchiver")
class MessageState(Enum):
"""Possible states of message processing"""
RECEIVED = "received"
VALIDATING = "validating"
EXTRACTING = "extracting"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
IGNORED = "ignored"
RECEIVED = auto()
VALIDATING = auto()
EXTRACTING = auto()
PROCESSING = auto()
COMPLETED = auto()
FAILED = auto()
IGNORED = auto()
class ProcessingStage(Enum):
"""Message processing stages"""
VALIDATION = "validation"
EXTRACTION = "extraction"
QUEUEING = "queueing"
COMPLETION = "completion"
VALIDATION = auto()
EXTRACTION = auto()
QUEUEING = auto()
COMPLETION = auto()
class MessageCacheEntry(TypedDict):
"""Type definition for message cache entry"""
valid: bool
reason: Optional[str]
timestamp: str
class MessageStatus(TypedDict):
"""Type definition for message status"""
state: Optional[MessageState]
stage: Optional[ProcessingStage]
error: Optional[str]
start_time: Optional[datetime]
end_time: Optional[datetime]
duration: Optional[float]
class MessageCache:
"""Caches message validation results"""
def __init__(self, max_size: int = 1000):
def __init__(self, max_size: int = 1000) -> None:
self.max_size = max_size
self._cache: Dict[int, Dict[str, Any]] = {}
self._cache: Dict[int, MessageCacheEntry] = {}
self._access_times: Dict[int, datetime] = {}
def add(self, message_id: int, result: Dict[str, Any]) -> None:
"""Add a result to cache"""
def add(self, message_id: int, result: MessageCacheEntry) -> None:
"""
Add a result to cache.
Args:
message_id: Discord message ID
result: Validation result entry
"""
if len(self._cache) >= self.max_size:
self._cleanup_oldest()
self._cache[message_id] = result
self._access_times[message_id] = datetime.utcnow()
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
"""Get a cached result"""
def get(self, message_id: int) -> Optional[MessageCacheEntry]:
"""
Get a cached result.
Args:
message_id: Discord message ID
Returns:
Cached validation entry or None if not found
"""
if message_id in self._cache:
self._access_times[message_id] = datetime.utcnow()
return self._cache[message_id]
@@ -64,7 +97,9 @@ class MessageCache:
class ProcessingTracker:
"""Tracks message processing state and progress"""
def __init__(self):
MAX_PROCESSING_TIME: ClassVar[int] = 300 # 5 minutes in seconds
def __init__(self) -> None:
self.states: Dict[int, MessageState] = {}
self.stages: Dict[int, ProcessingStage] = {}
self.errors: Dict[int, str] = {}
@@ -72,7 +107,12 @@ class ProcessingTracker:
self.end_times: Dict[int, datetime] = {}
def start_processing(self, message_id: int) -> None:
"""Start tracking a message"""
"""
Start tracking a message.
Args:
message_id: Discord message ID
"""
self.states[message_id] = MessageState.RECEIVED
self.start_times[message_id] = datetime.utcnow()
@@ -83,7 +123,15 @@ class ProcessingTracker:
stage: Optional[ProcessingStage] = None,
error: Optional[str] = None
) -> None:
"""Update message state"""
"""
Update message state.
Args:
message_id: Discord message ID
state: New message state
stage: Optional processing stage
error: Optional error message
"""
self.states[message_id] = state
if stage:
self.stages[message_id] = stage
@@ -92,25 +140,61 @@ class ProcessingTracker:
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
self.end_times[message_id] = datetime.utcnow()
def get_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
return {
"state": self.states.get(message_id),
"stage": self.stages.get(message_id),
"error": self.errors.get(message_id),
"start_time": self.start_times.get(message_id),
"end_time": self.end_times.get(message_id),
"duration": (
(self.end_times[message_id] - self.start_times[message_id]).total_seconds()
if message_id in self.end_times and message_id in self.start_times
def get_status(self, message_id: int) -> MessageStatus:
"""
Get processing status for a message.
Args:
message_id: Discord message ID
Returns:
Dictionary containing message status information
"""
end_time = self.end_times.get(message_id)
start_time = self.start_times.get(message_id)
return MessageStatus(
state=self.states.get(message_id),
stage=self.stages.get(message_id),
error=self.errors.get(message_id),
start_time=start_time,
end_time=end_time,
duration=(
(end_time - start_time).total_seconds()
if end_time and start_time
else None
)
}
)
def is_message_stuck(self, message_id: int) -> bool:
"""
Check if a message is stuck in processing.
Args:
message_id: Discord message ID
Returns:
True if message is stuck, False otherwise
"""
if message_id not in self.states or message_id not in self.start_times:
return False
state = self.states[message_id]
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
return False
processing_time = (datetime.utcnow() - self.start_times[message_id]).total_seconds()
return processing_time > self.MAX_PROCESSING_TIME
class MessageHandler:
"""Handles processing of messages for video content"""
def __init__(self, bot, config_manager, queue_manager):
def __init__(
self,
bot: discord.Client,
config_manager: ConfigManager,
queue_manager: EnhancedVideoQueueManager
) -> None:
self.bot = bot
self.config_manager = config_manager
self.url_extractor = URLExtractor()
@@ -123,7 +207,15 @@ class MessageHandler:
self._processing_lock = asyncio.Lock()
async def process_message(self, message: discord.Message) -> None:
"""Process a message for video content"""
"""
Process a message for video content.
Args:
message: Discord message to process
Raises:
MessageHandlerError: If there's an error during processing
"""
# Start tracking
self.tracker.start_processing(message.id)
@@ -139,11 +231,19 @@ class MessageHandler:
)
try:
await message.add_reaction(REACTIONS["error"])
except:
pass
except Exception as react_error:
logger.error(f"Failed to add error reaction: {react_error}")
async def _process_message_internal(self, message: discord.Message) -> None:
"""Internal message processing logic"""
"""
Internal message processing logic.
Args:
message: Discord message to process
Raises:
MessageHandlerError: If there's an error during processing
"""
try:
# Get guild settings
settings = await self.config_manager.get_guild_settings(message.guild.id)
@@ -164,15 +264,19 @@ class MessageHandler:
MessageState.VALIDATING,
ProcessingStage.VALIDATION
)
is_valid, reason = await self.message_validator.validate_message(
message,
settings
)
# Cache result
self.validation_cache.add(message.id, {
"valid": is_valid,
"reason": reason
})
try:
is_valid, reason = await self.message_validator.validate_message(
message,
settings
)
# Cache result
self.validation_cache.add(message.id, MessageCacheEntry(
valid=is_valid,
reason=reason,
timestamp=datetime.utcnow().isoformat()
))
except ValidationError as e:
raise MessageHandlerError(f"Validation failed: {str(e)}")
if not is_valid:
logger.debug(f"Message validation failed: {reason}")
@@ -189,14 +293,17 @@ class MessageHandler:
MessageState.EXTRACTING,
ProcessingStage.EXTRACTION
)
urls = await self.url_extractor.extract_urls(
message,
enabled_sites=settings.get("enabled_sites")
)
if not urls:
logger.debug("No valid URLs found in message")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
try:
urls: List[URLMetadata] = await self.url_extractor.extract_urls(
message,
enabled_sites=settings.get("enabled_sites")
)
if not urls:
logger.debug("No valid URLs found in message")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
except Exception as e:
raise MessageHandlerError(f"URL extraction failed: {str(e)}")
# Process URLs
self.tracker.update_state(
@@ -204,7 +311,14 @@ class MessageHandler:
MessageState.PROCESSING,
ProcessingStage.QUEUEING
)
await self.queue_processor.process_urls(message, urls)
try:
await self.queue_processor.process_urls(
message,
urls,
priority=QueuePriority.NORMAL
)
except Exception as e:
raise MessageHandlerError(f"Queue processing failed: {str(e)}")
# Mark completion
self.tracker.update_state(
@@ -213,13 +327,10 @@ class MessageHandler:
ProcessingStage.COMPLETION
)
except Exception as e:
self.tracker.update_state(
message.id,
MessageState.FAILED,
error=str(e)
)
except MessageHandlerError:
raise
except Exception as e:
raise MessageHandlerError(f"Unexpected error: {str(e)}")
async def format_archive_message(
self,
@@ -227,30 +338,49 @@ class MessageHandler:
channel: discord.TextChannel,
url: str
) -> str:
"""Format message for archive channel"""
"""
Format message for archive channel.
Args:
author: Optional message author
channel: Channel the message was posted in
url: URL being archived
Returns:
Formatted message string
"""
return await self.queue_processor.format_archive_message(
author,
channel,
url
)
def get_message_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
def get_message_status(self, message_id: int) -> MessageStatus:
"""
Get processing status for a message.
Args:
message_id: Discord message ID
Returns:
Dictionary containing message status information
"""
return self.tracker.get_status(message_id)
def is_healthy(self) -> bool:
"""Check if handler is healthy"""
# Check for any stuck messages
current_time = datetime.utcnow()
for message_id, start_time in self.tracker.start_times.items():
if (
message_id in self.tracker.states and
self.tracker.states[message_id] not in (
MessageState.COMPLETED,
MessageState.FAILED,
MessageState.IGNORED
) and
(current_time - start_time).total_seconds() > 300 # 5 minutes timeout
):
return False
return True
"""
Check if handler is healthy.
Returns:
True if handler is healthy, False otherwise
"""
try:
# Check for any stuck messages
for message_id in self.tracker.states:
if self.tracker.is_message_stuck(message_id):
logger.warning(f"Message {message_id} appears to be stuck in processing")
return False
return True
except Exception as e:
logger.error(f"Error checking health: {e}")
return False

View File

@@ -1,19 +1,36 @@
"""Message validation functionality for video processing"""
import logging
from enum import Enum
from dataclasses import dataclass
from typing import Dict, Optional, Tuple, List, Any, Callable, Set
from enum import Enum, auto
from dataclasses import dataclass, field
from typing import Dict, Optional, Tuple, List, Any, Callable, Set, TypedDict, ClassVar
from datetime import datetime
import discord
from ..utils.exceptions import ValidationError
logger = logging.getLogger("VideoArchiver")
class ValidationResult(Enum):
"""Possible validation results"""
VALID = "valid"
INVALID = "invalid"
IGNORED = "ignored"
VALID = auto()
INVALID = auto()
IGNORED = auto()
class ValidationStats(TypedDict):
"""Type definition for validation statistics"""
total: int
valid: int
invalid: int
ignored: int
cached: int
class ValidationCacheEntry(TypedDict):
"""Type definition for validation cache entry"""
valid: bool
reason: Optional[str]
rule: Optional[str]
timestamp: str
@dataclass
class ValidationContext:
@@ -28,22 +45,43 @@ class ValidationContext:
attachment_count: int
is_bot: bool
timestamp: datetime
validation_time: str = field(default_factory=lambda: datetime.utcnow().isoformat())
@classmethod
def from_message(cls, message: discord.Message, settings: Dict[str, Any]) -> 'ValidationContext':
"""Create context from message"""
return cls(
message=message,
settings=settings,
guild_id=message.guild.id,
channel_id=message.channel.id,
author_id=message.author.id,
roles={role.id for role in message.author.roles},
content_length=len(message.content) if message.content else 0,
attachment_count=len(message.attachments),
is_bot=message.author.bot,
timestamp=message.created_at
)
"""
Create context from message.
Args:
message: Discord message to validate
settings: Guild settings dictionary
Returns:
ValidationContext instance
Raises:
ValidationError: If message or settings are invalid
"""
if not message.guild:
raise ValidationError("Message must be from a guild")
if not settings:
raise ValidationError("Settings dictionary cannot be empty")
try:
return cls(
message=message,
settings=settings,
guild_id=message.guild.id,
channel_id=message.channel.id,
author_id=message.author.id,
roles={role.id for role in message.author.roles},
content_length=len(message.content) if message.content else 0,
attachment_count=len(message.attachments),
is_bot=message.author.bot,
timestamp=message.created_at
)
except Exception as e:
raise ValidationError(f"Failed to create validation context: {str(e)}")
@dataclass
class ValidationRule:
@@ -53,24 +91,48 @@ class ValidationRule:
validate: Callable[[ValidationContext], Tuple[bool, Optional[str]]]
enabled: bool = True
priority: int = 0
error_count: int = field(default=0)
last_error: Optional[str] = field(default=None)
last_run: Optional[str] = field(default=None)
def __post_init__(self) -> None:
"""Validate rule after initialization"""
if not callable(self.validate):
raise ValueError("Validate must be a callable")
if self.priority < 0:
raise ValueError("Priority must be non-negative")
class ValidationCache:
"""Caches validation results"""
def __init__(self, max_size: int = 1000):
def __init__(self, max_size: int = 1000) -> None:
self.max_size = max_size
self._cache: Dict[int, Dict[str, Any]] = {}
self._cache: Dict[int, ValidationCacheEntry] = {}
self._access_times: Dict[int, datetime] = {}
def add(self, message_id: int, result: Dict[str, Any]) -> None:
"""Add validation result to cache"""
def add(self, message_id: int, result: ValidationCacheEntry) -> None:
"""
Add validation result to cache.
Args:
message_id: Discord message ID
result: Validation result entry
"""
if len(self._cache) >= self.max_size:
self._cleanup_oldest()
self._cache[message_id] = result
self._access_times[message_id] = datetime.utcnow()
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
"""Get cached validation result"""
def get(self, message_id: int) -> Optional[ValidationCacheEntry]:
"""
Get cached validation result.
Args:
message_id: Discord message ID
Returns:
Cached validation entry or None if not found
"""
if message_id in self._cache:
self._access_times[message_id] = datetime.utcnow()
return self._cache[message_id]
@@ -87,33 +149,28 @@ class ValidationCache:
class ValidationRuleManager:
"""Manages validation rules"""
def __init__(self):
self.rules: List[ValidationRule] = [
ValidationRule(
name="content_check",
description="Check if message has content to process",
validate=self._validate_content,
priority=1
),
ValidationRule(
name="guild_enabled",
description="Check if archiving is enabled for guild",
validate=self._validate_guild_enabled,
priority=2
),
ValidationRule(
name="channel_enabled",
description="Check if channel is enabled for archiving",
validate=self._validate_channel,
priority=3
),
ValidationRule(
name="user_roles",
description="Check if user has required roles",
validate=self._validate_user_roles,
priority=4
)
]
DEFAULT_RULES: ClassVar[List[Tuple[str, str, int]]] = [
("content_check", "Check if message has content to process", 1),
("guild_enabled", "Check if archiving is enabled for guild", 2),
("channel_enabled", "Check if channel is enabled for archiving", 3),
("user_roles", "Check if user has required roles", 4)
]
def __init__(self) -> None:
self.rules: List[ValidationRule] = []
self._initialize_rules()
def _initialize_rules(self) -> None:
"""Initialize default validation rules"""
for name, description, priority in self.DEFAULT_RULES:
validate_method = getattr(self, f"_validate_{name}", None)
if validate_method:
self.rules.append(ValidationRule(
name=name,
description=description,
validate=validate_method,
priority=priority
))
self.rules.sort(key=lambda x: x.priority)
def _validate_content(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
@@ -145,10 +202,10 @@ class ValidationRuleManager:
class MessageValidator:
"""Handles validation of messages for video processing"""
def __init__(self):
def __init__(self) -> None:
self.rule_manager = ValidationRuleManager()
self.cache = ValidationCache()
self.validation_stats: Dict[str, int] = {
self.validation_stats: ValidationStats = {
"total": 0,
"valid": 0,
"invalid": 0,
@@ -159,50 +216,80 @@ class MessageValidator:
async def validate_message(
self,
message: discord.Message,
settings: Dict
settings: Dict[str, Any]
) -> Tuple[bool, Optional[str]]:
"""Validate if a message should be processed"""
self.validation_stats["total"] += 1
"""
Validate if a message should be processed.
Args:
message: Discord message to validate
settings: Guild settings dictionary
Returns:
Tuple of (is_valid, reason)
Raises:
ValidationError: If validation fails unexpectedly
"""
try:
self.validation_stats["total"] += 1
# Check cache
cached = self.cache.get(message.id)
if cached:
self.validation_stats["cached"] += 1
return cached["valid"], cached.get("reason")
# Check cache
cached = self.cache.get(message.id)
if cached:
self.validation_stats["cached"] += 1
return cached["valid"], cached.get("reason")
# Create validation context
ctx = ValidationContext.from_message(message, settings)
# Create validation context
ctx = ValidationContext.from_message(message, settings)
# Run validation rules
for rule in self.rule_manager.rules:
if not rule.enabled:
continue
# Run validation rules
for rule in self.rule_manager.rules:
if not rule.enabled:
continue
try:
valid, reason = rule.validate(ctx)
if not valid:
self.validation_stats["invalid"] += 1
# Cache result
self.cache.add(message.id, {
"valid": False,
"reason": reason,
"rule": rule.name
})
return False, reason
except Exception as e:
logger.error(f"Error in validation rule {rule.name}: {e}")
return False, f"Validation error: {str(e)}"
try:
rule.last_run = datetime.utcnow().isoformat()
valid, reason = rule.validate(ctx)
if not valid:
self.validation_stats["invalid"] += 1
# Cache result
self.cache.add(message.id, ValidationCacheEntry(
valid=False,
reason=reason,
rule=rule.name,
timestamp=datetime.utcnow().isoformat()
))
return False, reason
except Exception as e:
rule.error_count += 1
rule.last_error = str(e)
logger.error(f"Error in validation rule {rule.name}: {e}", exc_info=True)
raise ValidationError(f"Validation rule {rule.name} failed: {str(e)}")
# Message passed all rules
self.validation_stats["valid"] += 1
self.cache.add(message.id, {
"valid": True,
"reason": None
})
return True, None
# Message passed all rules
self.validation_stats["valid"] += 1
self.cache.add(message.id, ValidationCacheEntry(
valid=True,
reason=None,
rule=None,
timestamp=datetime.utcnow().isoformat()
))
return True, None
except ValidationError:
raise
except Exception as e:
logger.error(f"Unexpected error in message validation: {e}", exc_info=True)
raise ValidationError(f"Validation failed: {str(e)}")
def get_stats(self) -> Dict[str, Any]:
"""Get validation statistics"""
"""
Get validation statistics.
Returns:
Dictionary containing validation statistics and rule information
"""
return {
"validation_stats": self.validation_stats.copy(),
"rules": [
@@ -210,16 +297,27 @@ class MessageValidator:
"name": rule.name,
"description": rule.description,
"enabled": rule.enabled,
"priority": rule.priority
"priority": rule.priority,
"error_count": rule.error_count,
"last_error": rule.last_error,
"last_run": rule.last_run
}
for rule in self.rule_manager.rules
]
}
def clear_cache(self, message_id: Optional[int] = None) -> None:
"""Clear validation cache"""
if message_id:
self.cache._cache.pop(message_id, None)
self.cache._access_times.pop(message_id, None)
else:
self.cache = ValidationCache(self.cache.max_size)
"""
Clear validation cache.
Args:
message_id: Optional message ID to clear cache for. If None, clears all cache.
"""
try:
if message_id:
self.cache._cache.pop(message_id, None)
self.cache._access_times.pop(message_id, None)
else:
self.cache = ValidationCache(self.cache.max_size)
except Exception as e:
logger.error(f"Error clearing validation cache: {e}", exc_info=True)

View File

@@ -1,21 +1,55 @@
"""Queue processing and video handling operations"""
"""Queue handling functionality for video processing"""
import os
import logging
import asyncio
import discord
from typing import Dict, Optional, Tuple, Any
import os
from enum import Enum, auto
from typing import Optional, Dict, Any, List, Tuple, Set, TypedDict, ClassVar, Callable
from datetime import datetime
import discord
from ..utils.progress_tracker import ProgressTracker
from ..database.video_archive_db import VideoArchiveDB
from ..utils.download_manager import DownloadManager
from ..utils.message_manager import MessageManager
from ..utils.exceptions import QueueHandlerError
from ..queue.models import QueueItem
from ..config_manager import ConfigManager
from .constants import REACTIONS
from .progress_tracker import ProgressTracker
logger = logging.getLogger("VideoArchiver")
class QueueItemStatus(Enum):
"""Status of a queue item"""
PENDING = auto()
PROCESSING = auto()
COMPLETED = auto()
FAILED = auto()
CANCELLED = auto()
class QueueStats(TypedDict):
"""Type definition for queue statistics"""
active_downloads: int
processing_items: int
completed_items: int
failed_items: int
average_processing_time: float
last_processed: Optional[str]
is_healthy: bool
class QueueHandler:
"""Handles queue processing and video operations"""
def __init__(self, bot, config_manager, components, db=None):
DOWNLOAD_TIMEOUT: ClassVar[int] = 3600 # 1 hour in seconds
MAX_RETRIES: ClassVar[int] = 3
def __init__(
self,
bot: discord.Client,
config_manager: ConfigManager,
components: Dict[int, Dict[str, Any]],
db: Optional[VideoArchiveDB] = None
) -> None:
self.bot = bot
self.config_manager = config_manager
self.components = components
@@ -24,101 +58,240 @@ class QueueHandler:
self._active_downloads: Dict[str, asyncio.Task] = {}
self._active_downloads_lock = asyncio.Lock()
self.progress_tracker = ProgressTracker()
self._stats: QueueStats = {
"active_downloads": 0,
"processing_items": 0,
"completed_items": 0,
"failed_items": 0,
"average_processing_time": 0.0,
"last_processed": None,
"is_healthy": True
}
async def process_video(self, item) -> Tuple[bool, Optional[str]]:
"""Process a video from the queue"""
async def process_video(self, item: QueueItem) -> Tuple[bool, Optional[str]]:
"""
Process a video from the queue.
Args:
item: Queue item to process
Returns:
Tuple of (success, error_message)
Raises:
QueueHandlerError: If there's an error during processing
"""
if self._unloading:
return False, "Processor is unloading"
file_path = None
original_message = None
download_task = None
start_time = datetime.utcnow()
try:
# Start processing
self._stats["processing_items"] += 1
item.start_processing()
logger.info(f"Started processing video: {item.url}")
# Check if video is already archived
if self.db and self.db.is_url_archived(item.url):
logger.info(f"Video already archived: {item.url}")
if original_message := await self._get_original_message(item):
await original_message.add_reaction(REACTIONS["success"])
archived_info = self.db.get_archived_video(item.url)
if archived_info:
await original_message.reply(f"This video was already archived. You can find it here: {archived_info[0]}")
item.finish_processing(True)
if self.db and await self._check_archived_video(item):
self._update_stats(True, start_time)
return True, None
guild_id = item.guild_id
if guild_id not in self.components:
error = f"No components found for guild {guild_id}"
item.finish_processing(False, error)
return False, error
components = self.components[guild_id]
# Get components
components = await self._get_components(item.guild_id)
downloader = components.get("downloader")
message_manager = components.get("message_manager")
if not downloader or not message_manager:
error = f"Missing required components for guild {guild_id}"
item.finish_processing(False, error)
return False, error
raise QueueHandlerError(f"Missing required components for guild {item.guild_id}")
# Get original message and update reactions
original_message = await self._get_original_message(item)
if original_message:
await original_message.remove_reaction(REACTIONS["queued"], self.bot.user)
await original_message.add_reaction(REACTIONS["processing"])
logger.info(f"Started processing message {item.message_id}")
await self._update_message_reactions(original_message, QueueItemStatus.PROCESSING)
# Create progress callback
progress_callback = self._create_progress_callback(original_message, item.url)
# Download video
success, file_path, error = await self._download_video(
downloader, item.url, progress_callback
# Download and archive video
file_path = await self._process_video_file(
downloader, message_manager, item, original_message
)
if not success:
if original_message:
await original_message.add_reaction(REACTIONS["error"])
logger.error(f"Download failed for message {item.message_id}: {error}")
item.finish_processing(False, f"Failed to download video: {error}")
return False, f"Failed to download video: {error}"
# Archive video
success, error = await self._archive_video(
guild_id, original_message, message_manager, item.url, file_path
)
# Finish processing
item.finish_processing(success, error if not success else None)
return success, error
# Success
self._update_stats(True, start_time)
item.finish_processing(True)
if original_message:
await self._update_message_reactions(original_message, QueueItemStatus.COMPLETED)
return True, None
except QueueHandlerError as e:
logger.error(f"Queue handler error: {str(e)}")
self._handle_processing_error(item, original_message, str(e))
return False, str(e)
except Exception as e:
logger.error(f"Error processing video: {str(e)}", exc_info=True)
item.finish_processing(False, str(e))
self._handle_processing_error(item, original_message, str(e))
return False, str(e)
finally:
# Clean up downloaded file
if file_path and os.path.exists(file_path):
try:
os.unlink(file_path)
except Exception as e:
logger.error(f"Failed to clean up file {file_path}: {e}")
await self._cleanup_file(file_path)
async def _archive_video(self, guild_id: int, original_message: Optional[discord.Message],
message_manager, url: str, file_path: str) -> Tuple[bool, Optional[str]]:
"""Archive downloaded video"""
async def _check_archived_video(self, item: QueueItem) -> bool:
"""Check if video is already archived and handle accordingly"""
if not self.db:
return False
if self.db.is_url_archived(item.url):
logger.info(f"Video already archived: {item.url}")
if original_message := await self._get_original_message(item):
await self._update_message_reactions(original_message, QueueItemStatus.COMPLETED)
archived_info = self.db.get_archived_video(item.url)
if archived_info:
await original_message.reply(
f"This video was already archived. You can find it here: {archived_info[0]}"
)
item.finish_processing(True)
return True
return False
async def _get_components(
self,
guild_id: int
) -> Dict[str, Any]:
"""Get required components for processing"""
if guild_id not in self.components:
raise QueueHandlerError(f"No components found for guild {guild_id}")
return self.components[guild_id]
async def _process_video_file(
self,
downloader: DownloadManager,
message_manager: MessageManager,
item: QueueItem,
original_message: Optional[discord.Message]
) -> Optional[str]:
"""Download and process video file"""
# Create progress callback
progress_callback = self._create_progress_callback(original_message, item.url)
# Download video
success, file_path, error = await self._download_video(
downloader, item.url, progress_callback
)
if not success:
raise QueueHandlerError(f"Failed to download video: {error}")
# Archive video
success, error = await self._archive_video(
item.guild_id,
original_message,
message_manager,
item.url,
file_path
)
if not success:
raise QueueHandlerError(f"Failed to archive video: {error}")
return file_path
def _handle_processing_error(
self,
item: QueueItem,
message: Optional[discord.Message],
error: str
) -> None:
"""Handle processing error"""
self._update_stats(False, datetime.utcnow())
item.finish_processing(False, error)
if message:
asyncio.create_task(self._update_message_reactions(message, QueueItemStatus.FAILED))
def _update_stats(self, success: bool, start_time: datetime) -> None:
"""Update queue statistics"""
processing_time = (datetime.utcnow() - start_time).total_seconds()
self._stats["processing_items"] -= 1
if success:
self._stats["completed_items"] += 1
else:
self._stats["failed_items"] += 1
# Update average processing time
total_items = self._stats["completed_items"] + self._stats["failed_items"]
if total_items > 0:
current_total = self._stats["average_processing_time"] * (total_items - 1)
self._stats["average_processing_time"] = (current_total + processing_time) / total_items
self._stats["last_processed"] = datetime.utcnow().isoformat()
async def _update_message_reactions(
self,
message: discord.Message,
status: QueueItemStatus
) -> None:
"""Update message reactions based on status"""
try:
# Remove existing reactions
for reaction in [
REACTIONS["queued"],
REACTIONS["processing"],
REACTIONS["success"],
REACTIONS["error"]
]:
try:
await message.remove_reaction(reaction, self.bot.user)
except:
pass
# Add new reaction
if status == QueueItemStatus.PROCESSING:
await message.add_reaction(REACTIONS["processing"])
elif status == QueueItemStatus.COMPLETED:
await message.add_reaction(REACTIONS["success"])
elif status == QueueItemStatus.FAILED:
await message.add_reaction(REACTIONS["error"])
except Exception as e:
logger.error(f"Error updating message reactions: {e}")
async def _cleanup_file(self, file_path: Optional[str]) -> None:
"""Clean up downloaded file"""
if file_path and os.path.exists(file_path):
try:
os.unlink(file_path)
except Exception as e:
logger.error(f"Failed to clean up file {file_path}: {e}")
async def _archive_video(
self,
guild_id: int,
original_message: Optional[discord.Message],
message_manager: MessageManager,
url: str,
file_path: str
) -> Tuple[bool, Optional[str]]:
"""
Archive downloaded video.
Args:
guild_id: Discord guild ID
original_message: Original message containing the video
message_manager: Message manager instance
url: Video URL
file_path: Path to downloaded video file
Returns:
Tuple of (success, error_message)
Raises:
QueueHandlerError: If archiving fails
"""
try:
# Get archive channel
guild = self.bot.get_guild(guild_id)
if not guild:
return False, f"Guild {guild_id} not found"
raise QueueHandlerError(f"Guild {guild_id} not found")
archive_channel = await self.config_manager.get_channel(guild, "archive")
if not archive_channel:
return False, "Archive channel not configured"
raise QueueHandlerError("Archive channel not configured")
# Format message
try:
@@ -128,13 +301,16 @@ class QueueHandler:
author=author, channel=channel, url=url
)
except Exception as e:
return False, f"Failed to format message: {str(e)}"
raise QueueHandlerError(f"Failed to format message: {str(e)}")
# Upload to archive channel
if not os.path.exists(file_path):
return False, "Processed file not found"
raise QueueHandlerError("Processed file not found")
archive_message = await archive_channel.send(content=message, file=discord.File(file_path))
archive_message = await archive_channel.send(
content=message,
file=discord.File(file_path)
)
# Store in database if available
if self.db and archive_message.attachments:
@@ -148,26 +324,28 @@ class QueueHandler:
)
logger.info(f"Added video to archive database: {url} -> {discord_url}")
if original_message:
await original_message.remove_reaction(REACTIONS["processing"], self.bot.user)
await original_message.add_reaction(REACTIONS["success"])
logger.info(f"Successfully processed message {original_message.id}")
return True, None
except discord.HTTPException as e:
if original_message:
await original_message.add_reaction(REACTIONS["error"])
logger.error(f"Failed to upload to Discord: {str(e)}")
return False, f"Failed to upload to Discord: {str(e)}"
raise QueueHandlerError(f"Failed to upload to Discord: {str(e)}")
except Exception as e:
if original_message:
await original_message.add_reaction(REACTIONS["error"])
logger.error(f"Failed to archive video: {str(e)}")
return False, f"Failed to archive video: {str(e)}"
raise QueueHandlerError(f"Failed to archive video: {str(e)}")
async def _get_original_message(self, item) -> Optional[discord.Message]:
"""Retrieve the original message"""
async def _get_original_message(
self,
item: QueueItem
) -> Optional[discord.Message]:
"""
Retrieve the original message.
Args:
item: Queue item containing message details
Returns:
Original Discord message or None if not found
"""
try:
channel = self.bot.get_channel(item.channel_id)
if not channel:
@@ -179,8 +357,21 @@ class QueueHandler:
logger.error(f"Error fetching original message: {e}")
return None
def _create_progress_callback(self, message: Optional[discord.Message], url: str):
"""Create progress callback function for download tracking"""
def _create_progress_callback(
self,
message: Optional[discord.Message],
url: str
) -> Callable[[float], None]:
"""
Create progress callback function for download tracking.
Args:
message: Discord message to update with progress
url: URL being downloaded
Returns:
Callback function for progress updates
"""
def progress_callback(progress: float) -> None:
if message:
try:
@@ -204,22 +395,45 @@ class QueueHandler:
logger.error(f"Error in progress callback: {e}")
return progress_callback
async def _download_video(self, downloader, url: str, progress_callback) -> Tuple[bool, Optional[str], Optional[str]]:
"""Download video with progress tracking"""
async def _download_video(
self,
downloader: DownloadManager,
url: str,
progress_callback: Callable[[float], None]
) -> Tuple[bool, Optional[str], Optional[str]]:
"""
Download video with progress tracking.
Args:
downloader: Download manager instance
url: URL to download
progress_callback: Callback for progress updates
Returns:
Tuple of (success, file_path, error_message)
"""
download_task = asyncio.create_task(
downloader.download_video(url, progress_callback=progress_callback)
)
async with self._active_downloads_lock:
self._active_downloads[url] = download_task
self._stats["active_downloads"] += 1
try:
success, file_path, error = await download_task
success, file_path, error = await asyncio.wait_for(
download_task,
timeout=self.DOWNLOAD_TIMEOUT
)
if success:
self.progress_tracker.complete_download(url)
else:
self.progress_tracker.increment_download_retries(url)
return success, file_path, error
except asyncio.TimeoutError:
logger.error(f"Download timed out for {url}")
return False, None, "Download timed out"
except asyncio.CancelledError:
logger.info(f"Download cancelled for {url}")
return False, None, "Download cancelled"
@@ -229,9 +443,15 @@ class QueueHandler:
finally:
async with self._active_downloads_lock:
self._active_downloads.pop(url, None)
self._stats["active_downloads"] -= 1
async def cleanup(self):
"""Clean up resources and stop processing"""
async def cleanup(self) -> None:
"""
Clean up resources and stop processing.
Raises:
QueueHandlerError: If cleanup fails
"""
try:
logger.info("Starting QueueHandler cleanup...")
self._unloading = True
@@ -248,14 +468,15 @@ class QueueHandler:
except Exception as e:
logger.error(f"Error cancelling download task for {url}: {e}")
self._active_downloads.clear()
self._stats["active_downloads"] = 0
logger.info("QueueHandler cleanup completed successfully")
except Exception as e:
logger.error(f"Error during QueueHandler cleanup: {str(e)}", exc_info=True)
raise
raise QueueHandlerError(f"Cleanup failed: {str(e)}")
async def force_cleanup(self):
async def force_cleanup(self) -> None:
"""Force cleanup of resources when normal cleanup fails"""
try:
logger.info("Starting force cleanup of QueueHandler...")
@@ -266,13 +487,18 @@ class QueueHandler:
if not task.done():
task.cancel()
self._active_downloads.clear()
self._stats["active_downloads"] = 0
logger.info("QueueHandler force cleanup completed")
except Exception as e:
logger.error(f"Error during QueueHandler force cleanup: {str(e)}", exc_info=True)
async def _update_download_progress_reaction(self, message: discord.Message, progress: float):
async def _update_download_progress_reaction(
self,
message: discord.Message,
progress: float
) -> None:
"""Update download progress reaction on message"""
if not message:
return
@@ -307,12 +533,41 @@ class QueueHandler:
logger.error(f"Failed to update download progress reaction: {e}")
def is_healthy(self) -> bool:
"""Check if handler is healthy"""
# Check if any downloads are stuck
current_time = datetime.utcnow()
for url, task in self._active_downloads.items():
if not task.done() and task.get_coro().cr_frame.f_locals.get('start_time'):
start_time = task.get_coro().cr_frame.f_locals['start_time']
if (current_time - start_time).total_seconds() > 3600: # 1 hour timeout
"""
Check if handler is healthy.
Returns:
True if handler is healthy, False otherwise
"""
try:
# Check if any downloads are stuck
current_time = datetime.utcnow()
for url, task in self._active_downloads.items():
if not task.done() and task.get_coro().cr_frame.f_locals.get('start_time'):
start_time = task.get_coro().cr_frame.f_locals['start_time']
if (current_time - start_time).total_seconds() > self.DOWNLOAD_TIMEOUT:
self._stats["is_healthy"] = False
return False
# Check processing metrics
if self._stats["processing_items"] > 0:
if self._stats["average_processing_time"] > self.DOWNLOAD_TIMEOUT:
self._stats["is_healthy"] = False
return False
return True
self._stats["is_healthy"] = True
return True
except Exception as e:
logger.error(f"Error checking health: {e}")
self._stats["is_healthy"] = False
return False
def get_stats(self) -> QueueStats:
"""
Get queue handler statistics.
Returns:
Dictionary containing queue statistics
"""
return self._stats.copy()

View File

@@ -2,21 +2,24 @@
import logging
import asyncio
from enum import Enum
from typing import List, Optional, Dict, Any, Set
from enum import Enum, auto
from typing import List, Optional, Dict, Any, Set, Union, TypedDict, ClassVar
from datetime import datetime
import discord
from ..queue.models import QueueItem
from ..queue.manager import EnhancedVideoQueueManager
from .constants import REACTIONS
from .url_extractor import URLMetadata
from ..utils.exceptions import QueueProcessingError
logger = logging.getLogger("VideoArchiver")
class QueuePriority(Enum):
"""Queue item priorities"""
HIGH = 0
NORMAL = 1
LOW = 2
HIGH = auto()
NORMAL = auto()
LOW = auto()
class ProcessingStrategy(Enum):
"""Available processing strategies"""
@@ -24,10 +27,22 @@ class ProcessingStrategy(Enum):
PRIORITY = "priority" # Process by priority
SMART = "smart" # Smart processing based on various factors
class QueueStats(TypedDict):
"""Type definition for queue statistics"""
total_processed: int
successful: int
failed: int
success_rate: float
average_processing_time: float
error_counts: Dict[str, int]
last_processed: Optional[str]
class QueueMetrics:
"""Tracks queue processing metrics"""
def __init__(self):
MAX_PROCESSING_TIME: ClassVar[float] = 3600.0 # 1 hour in seconds
def __init__(self) -> None:
self.total_processed = 0
self.successful = 0
self.failed = 0
@@ -36,49 +51,67 @@ class QueueMetrics:
self.last_processed: Optional[datetime] = None
def record_success(self, processing_time: float) -> None:
"""Record successful processing"""
"""
Record successful processing.
Args:
processing_time: Time taken to process in seconds
"""
if processing_time > self.MAX_PROCESSING_TIME:
logger.warning(f"Unusually long processing time: {processing_time} seconds")
self.total_processed += 1
self.successful += 1
self.processing_times.append(processing_time)
self.last_processed = datetime.utcnow()
def record_failure(self, error: str) -> None:
"""Record processing failure"""
"""
Record processing failure.
Args:
error: Error message describing the failure
"""
self.total_processed += 1
self.failed += 1
self.errors[error] = self.errors.get(error, 0) + 1
self.last_processed = datetime.utcnow()
def get_stats(self) -> Dict[str, Any]:
"""Get queue metrics"""
def get_stats(self) -> QueueStats:
"""
Get queue metrics.
Returns:
Dictionary containing queue statistics
"""
avg_time = (
sum(self.processing_times) / len(self.processing_times)
if self.processing_times
else 0
)
return {
"total_processed": self.total_processed,
"successful": self.successful,
"failed": self.failed,
"success_rate": (
return QueueStats(
total_processed=self.total_processed,
successful=self.successful,
failed=self.failed,
success_rate=(
self.successful / self.total_processed
if self.total_processed > 0
else 0
),
"average_processing_time": avg_time,
"error_counts": self.errors.copy(),
"last_processed": self.last_processed
}
average_processing_time=avg_time,
error_counts=self.errors.copy(),
last_processed=self.last_processed.isoformat() if self.last_processed else None
)
class QueueProcessor:
"""Handles adding videos to the processing queue"""
def __init__(
self,
queue_manager,
queue_manager: EnhancedVideoQueueManager,
strategy: ProcessingStrategy = ProcessingStrategy.SMART,
max_retries: int = 3
):
) -> None:
self.queue_manager = queue_manager
self.strategy = strategy
self.max_retries = max_retries
@@ -89,16 +122,34 @@ class QueueProcessor:
async def process_urls(
self,
message: discord.Message,
urls: List[str],
urls: Union[List[str], Set[str], List[URLMetadata]],
priority: QueuePriority = QueuePriority.NORMAL
) -> None:
"""Process extracted URLs by adding them to the queue"""
for url in urls:
"""
Process extracted URLs by adding them to the queue.
Args:
message: Discord message containing the URLs
urls: List or set of URLs or URLMetadata objects to process
priority: Priority level for queue processing
Raises:
QueueProcessingError: If there's an error adding URLs to the queue
"""
processed_urls: Set[str] = set()
for url_data in urls:
url = url_data.url if isinstance(url_data, URLMetadata) else url_data
if url in processed_urls:
logger.debug(f"Skipping duplicate URL: {url}")
continue
try:
logger.info(f"Adding URL to queue: {url}")
await message.add_reaction(REACTIONS['queued'])
# Create queue item using the model from queue.models
# Create queue item
item = QueueItem(
url=url,
message_id=message.id,
@@ -111,15 +162,24 @@ class QueueProcessor:
# Add to queue with appropriate strategy
await self._add_to_queue(item)
processed_urls.add(url)
logger.info(f"Successfully added video to queue: {url}")
except Exception as e:
logger.error(f"Failed to add video to queue: {str(e)}")
logger.error(f"Failed to add video to queue: {str(e)}", exc_info=True)
await message.add_reaction(REACTIONS['error'])
continue
raise QueueProcessingError(f"Failed to add URL to queue: {str(e)}")
async def _add_to_queue(self, item: QueueItem) -> None:
"""Add item to queue using current strategy"""
"""
Add item to queue using current strategy.
Args:
item: Queue item to add
Raises:
QueueProcessingError: If there's an error adding the item
"""
async with self._processing_lock:
if item.url in self._processing:
logger.debug(f"URL already being processed: {item.url}")
@@ -136,6 +196,9 @@ class QueueProcessor:
else: # FIFO
await self._add_fifo(item)
except Exception as e:
logger.error(f"Error adding item to queue: {e}", exc_info=True)
raise QueueProcessingError(f"Failed to add item to queue: {str(e)}")
finally:
async with self._processing_lock:
self._processing.remove(item.url)
@@ -153,7 +216,6 @@ class QueueProcessor:
async def _add_with_smart_strategy(self, item: QueueItem) -> None:
"""Add item using smart processing strategy"""
# Calculate priority based on various factors
priority = await self._calculate_smart_priority(item)
await self.queue_manager.add_to_queue(
@@ -177,7 +239,15 @@ class QueueProcessor:
)
async def _calculate_smart_priority(self, item: QueueItem) -> int:
"""Calculate priority using smart strategy"""
"""
Calculate priority using smart strategy.
Args:
item: Queue item to calculate priority for
Returns:
Calculated priority value
"""
base_priority = item.priority
# Adjust based on queue metrics
@@ -203,7 +273,17 @@ class QueueProcessor:
channel: discord.TextChannel,
url: str
) -> str:
"""Format message for archive channel"""
"""
Format message for archive channel.
Args:
author: Optional message author
channel: Channel the message was posted in
url: URL being archived
Returns:
Formatted message string
"""
author_mention = author.mention if author else "Unknown User"
channel_mention = channel.mention if channel else "Unknown Channel"
@@ -213,7 +293,12 @@ class QueueProcessor:
)
def get_metrics(self) -> Dict[str, Any]:
"""Get queue processing metrics"""
"""
Get queue processing metrics.
Returns:
Dictionary containing queue metrics and status
"""
return {
"metrics": self.metrics.get_stats(),
"strategy": self.strategy.value,

View File

@@ -2,112 +2,184 @@
import logging
import asyncio
import re
from typing import List, Optional
import discord
from urllib.parse import urlparse
from .constants import REACTIONS
from .constants import REACTIONS, ReactionType, get_reaction, get_progress_emoji
from ..database.video_archive_db import VideoArchiveDB
logger = logging.getLogger("VideoArchiver")
async def handle_archived_reaction(message: discord.Message, user: discord.User, db) -> None:
"""Handle reaction to archived video message"""
async def handle_archived_reaction(
message: discord.Message,
user: discord.User,
db: VideoArchiveDB
) -> None:
"""
Handle reaction to archived video message.
Args:
message: The Discord message that was reacted to
user: The user who added the reaction
db: Database instance for checking archived videos
"""
try:
# Check if the reaction is from a user (not the bot) and is the archived reaction
if user.bot or str(message.reactions[0].emoji) != REACTIONS['archived']:
if user.bot or str(message.reactions[0].emoji) != get_reaction(ReactionType.ARCHIVED):
return
# Extract URLs from the message
urls = []
if message.content:
for word in message.content.split():
if any(s in word.lower() for s in ['http://', 'https://']):
urls.append(word)
# Extract URLs from the message using regex
url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+')
urls = url_pattern.findall(message.content) if message.content else []
# Check each URL in the database
for url in urls:
# Ensure URL has proper scheme
if url.startswith('www.'):
url = 'http://' + url
# Validate URL
try:
result = urlparse(url)
if not all([result.scheme, result.netloc]):
continue
except Exception:
continue
result = db.get_archived_video(url)
if result:
discord_url = result[0]
await message.reply(f"This video was already archived. You can find it here: {discord_url}")
await message.reply(
f"This video was already archived. You can find it here: {discord_url}"
)
return
except Exception as e:
logger.error(f"Error handling archived reaction: {e}")
logger.error(f"Error handling archived reaction: {e}", exc_info=True)
async def update_queue_position_reaction(message: discord.Message, position: int, bot_user) -> None:
"""Update queue position reaction"""
async def update_queue_position_reaction(
message: discord.Message,
position: int,
bot_user: discord.ClientUser
) -> None:
"""
Update queue position reaction.
Args:
message: The Discord message to update reactions on
position: Queue position (0-based index)
bot_user: The bot's user instance for managing reactions
"""
try:
for reaction in REACTIONS["numbers"]:
numbers = get_reaction(ReactionType.NUMBERS)
if not isinstance(numbers, list):
logger.error("Numbers reaction is not a list")
return
# Remove old reactions
for reaction in numbers:
try:
await message.remove_reaction(reaction, bot_user)
except:
pass
except discord.HTTPException as e:
logger.warning(f"Failed to remove number reaction: {e}")
except Exception as e:
logger.error(f"Unexpected error removing number reaction: {e}")
# Add new reaction if position is valid
if 0 <= position < len(numbers):
try:
await message.add_reaction(numbers[position])
logger.info(
f"Updated queue position reaction to {position + 1} for message {message.id}"
)
except discord.HTTPException as e:
logger.error(f"Failed to add queue position reaction: {e}")
if 0 <= position < len(REACTIONS["numbers"]):
await message.add_reaction(REACTIONS["numbers"][position])
logger.info(
f"Updated queue position reaction to {position + 1} for message {message.id}"
)
except Exception as e:
logger.error(f"Failed to update queue position reaction: {e}")
logger.error(f"Failed to update queue position reaction: {e}", exc_info=True)
async def update_progress_reaction(message: discord.Message, progress: float, bot_user) -> None:
"""Update progress reaction based on FFmpeg progress"""
async def update_progress_reaction(
message: discord.Message,
progress: float,
bot_user: discord.ClientUser
) -> None:
"""
Update progress reaction based on FFmpeg progress.
Args:
message: The Discord message to update reactions on
progress: Progress value between 0 and 100
bot_user: The bot's user instance for managing reactions
"""
if not message:
return
try:
# Remove old reactions in the event loop
for reaction in REACTIONS["progress"]:
progress_emojis = get_reaction(ReactionType.PROGRESS)
if not isinstance(progress_emojis, list):
logger.error("Progress reaction is not a list")
return
# Remove old reactions
for reaction in progress_emojis:
try:
await message.remove_reaction(reaction, bot_user)
except discord.HTTPException as e:
logger.warning(f"Failed to remove progress reaction: {e}")
except Exception as e:
logger.error(f"Failed to remove progress reaction: {e}")
continue
logger.error(f"Unexpected error removing progress reaction: {e}")
# Add new reaction based on progress
try:
if progress < 33:
await message.add_reaction(REACTIONS["progress"][0])
elif progress < 66:
await message.add_reaction(REACTIONS["progress"][1])
else:
await message.add_reaction(REACTIONS["progress"][2])
normalized_progress = progress / 100 # Convert to 0-1 range
emoji = get_progress_emoji(normalized_progress, progress_emojis)
await message.add_reaction(emoji)
except Exception as e:
logger.error(f"Failed to add progress reaction: {e}")
except Exception as e:
logger.error(f"Failed to update progress reaction: {e}")
logger.error(f"Failed to update progress reaction: {e}", exc_info=True)
async def update_download_progress_reaction(message: discord.Message, progress: float, bot_user) -> None:
"""Update download progress reaction"""
async def update_download_progress_reaction(
message: discord.Message,
progress: float,
bot_user: discord.ClientUser
) -> None:
"""
Update download progress reaction.
Args:
message: The Discord message to update reactions on
progress: Progress value between 0 and 100
bot_user: The bot's user instance for managing reactions
"""
if not message:
return
try:
# Remove old reactions in the event loop
for reaction in REACTIONS["download"]:
download_emojis = get_reaction(ReactionType.DOWNLOAD)
if not isinstance(download_emojis, list):
logger.error("Download reaction is not a list")
return
# Remove old reactions
for reaction in download_emojis:
try:
await message.remove_reaction(reaction, bot_user)
except discord.HTTPException as e:
logger.warning(f"Failed to remove download reaction: {e}")
except Exception as e:
logger.error(f"Failed to remove download reaction: {e}")
continue
logger.error(f"Unexpected error removing download reaction: {e}")
# Add new reaction based on progress
try:
if progress <= 20:
await message.add_reaction(REACTIONS["download"][0])
elif progress <= 40:
await message.add_reaction(REACTIONS["download"][1])
elif progress <= 60:
await message.add_reaction(REACTIONS["download"][2])
elif progress <= 80:
await message.add_reaction(REACTIONS["download"][3])
elif progress < 100:
await message.add_reaction(REACTIONS["download"][4])
else:
await message.add_reaction(REACTIONS["download"][5])
normalized_progress = progress / 100 # Convert to 0-1 range
emoji = get_progress_emoji(normalized_progress, download_emojis)
await message.add_reaction(emoji)
except Exception as e:
logger.error(f"Failed to add download reaction: {e}")
except Exception as e:
logger.error(f"Failed to update download progress reaction: {e}")
logger.error(f"Failed to update download progress reaction: {e}", exc_info=True)

View File

@@ -1,23 +1,39 @@
"""Module for handling queue status display and formatting"""
import discord
from enum import Enum
from dataclasses import dataclass
from datetime import datetime
from typing import Dict, Any, List, Optional
import logging
from enum import Enum, auto
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, Any, List, Optional, Callable, TypeVar, Union, TypedDict, ClassVar
import discord
from ..utils.exceptions import DisplayError
logger = logging.getLogger("VideoArchiver")
class DisplayTheme:
"""Defines display themes"""
DEFAULT = {
"title_color": discord.Color.blue(),
"success_color": discord.Color.green(),
"warning_color": discord.Color.gold(),
"error_color": discord.Color.red(),
"info_color": discord.Color.blurple()
}
T = TypeVar('T')
class DisplayTheme(TypedDict):
"""Type definition for display theme"""
title_color: discord.Color
success_color: discord.Color
warning_color: discord.Color
error_color: discord.Color
info_color: discord.Color
class DisplaySection(Enum):
"""Available display sections"""
QUEUE_STATS = auto()
DOWNLOADS = auto()
COMPRESSIONS = auto()
ERRORS = auto()
HARDWARE = auto()
class DisplayCondition(Enum):
"""Display conditions for sections"""
HAS_ERRORS = "has_errors"
HAS_DOWNLOADS = "has_downloads"
HAS_COMPRESSIONS = "has_compressions"
@dataclass
class DisplayTemplate:
@@ -26,48 +42,116 @@ class DisplayTemplate:
format_string: str
inline: bool = False
order: int = 0
condition: Optional[str] = None
class DisplaySection(Enum):
"""Available display sections"""
QUEUE_STATS = "queue_stats"
DOWNLOADS = "downloads"
COMPRESSIONS = "compressions"
ERRORS = "errors"
HARDWARE = "hardware"
condition: Optional[DisplayCondition] = None
formatter: Optional[Callable[[Dict[str, Any]], str]] = None
max_items: int = field(default=5) # Maximum items to display in lists
class StatusFormatter:
"""Formats status information for display"""
BYTE_UNITS: ClassVar[List[str]] = ['B', 'KB', 'MB', 'GB', 'TB']
TIME_THRESHOLDS: ClassVar[List[Tuple[float, str]]] = [
(60, 's'),
(3600, 'm'),
(float('inf'), 'h')
]
@staticmethod
def format_bytes(bytes: int) -> str:
"""Format bytes into human readable format"""
for unit in ['B', 'KB', 'MB', 'GB']:
if bytes < 1024:
return f"{bytes:.1f}{unit}"
bytes /= 1024
return f"{bytes:.1f}TB"
def format_bytes(bytes_value: Union[int, float]) -> str:
"""
Format bytes into human readable format.
Args:
bytes_value: Number of bytes to format
Returns:
Formatted string with appropriate unit
Raises:
ValueError: If bytes_value is negative
"""
if bytes_value < 0:
raise ValueError("Bytes value cannot be negative")
bytes_num = float(bytes_value)
for unit in StatusFormatter.BYTE_UNITS:
if bytes_num < 1024:
return f"{bytes_num:.1f}{unit}"
bytes_num /= 1024
return f"{bytes_num:.1f}TB"
@staticmethod
def format_time(seconds: float) -> str:
"""Format time duration"""
if seconds < 60:
return f"{seconds:.1f}s"
minutes = seconds / 60
if minutes < 60:
return f"{minutes:.1f}m"
hours = minutes / 60
return f"{hours:.1f}h"
"""
Format time duration.
Args:
seconds: Number of seconds to format
Returns:
Formatted time string
Raises:
ValueError: If seconds is negative
"""
if seconds < 0:
raise ValueError("Time value cannot be negative")
for threshold, unit in StatusFormatter.TIME_THRESHOLDS:
if seconds < threshold:
return f"{seconds:.1f}{unit}"
seconds /= 60
return f"{seconds:.1f}h"
@staticmethod
def format_percentage(value: float) -> str:
"""Format percentage value"""
"""
Format percentage value.
Args:
value: Percentage value to format (0-100)
Returns:
Formatted percentage string
Raises:
ValueError: If value is outside valid range
"""
if not 0 <= value <= 100:
raise ValueError("Percentage must be between 0 and 100")
return f"{value:.1f}%"
@staticmethod
def truncate_url(url: str, max_length: int = 50) -> str:
"""
Truncate URL to specified length.
Args:
url: URL to truncate
max_length: Maximum length for URL
Returns:
Truncated URL string
Raises:
ValueError: If max_length is less than 4
"""
if max_length < 4: # Need room for "..."
raise ValueError("max_length must be at least 4")
return f"{url[:max_length]}..." if len(url) > max_length else url
class DisplayManager:
"""Manages status display configuration"""
def __init__(self):
DEFAULT_THEME: ClassVar[DisplayTheme] = DisplayTheme(
title_color=discord.Color.blue(),
success_color=discord.Color.green(),
warning_color=discord.Color.gold(),
error_color=discord.Color.red(),
info_color=discord.Color.blurple()
)
def __init__(self) -> None:
self.templates: Dict[DisplaySection, DisplayTemplate] = {
DisplaySection.QUEUE_STATS: DisplayTemplate(
name="Queue Statistics",
@@ -96,7 +180,8 @@ class DisplayManager:
"Retries: {retries}\n"
"```"
),
order=2
order=2,
condition=DisplayCondition.HAS_DOWNLOADS
),
DisplaySection.COMPRESSIONS: DisplayTemplate(
name="Active Compressions",
@@ -112,12 +197,13 @@ class DisplayManager:
"Hardware Accel: {hardware_accel}\n"
"```"
),
order=3
order=3,
condition=DisplayCondition.HAS_COMPRESSIONS
),
DisplaySection.ERRORS: DisplayTemplate(
name="Error Statistics",
format_string="```\n{error_stats}```",
condition="has_errors",
condition=DisplayCondition.HAS_ERRORS,
order=4
),
DisplaySection.HARDWARE: DisplayTemplate(
@@ -132,63 +218,99 @@ class DisplayManager:
order=5
)
}
self.theme = DisplayTheme.DEFAULT
self.theme = self.DEFAULT_THEME.copy()
class StatusDisplay:
"""Handles formatting and display of queue status information"""
def __init__(self):
def __init__(self) -> None:
self.display_manager = DisplayManager()
self.formatter = StatusFormatter()
@classmethod
async def create_queue_status_embed(
self,
cls,
queue_status: Dict[str, Any],
active_ops: Dict[str, Any]
) -> discord.Embed:
"""Create an embed displaying queue status and active operations"""
embed = discord.Embed(
title="Queue Status Details",
color=self.display_manager.theme["title_color"],
timestamp=datetime.utcnow()
)
"""
Create an embed displaying queue status and active operations.
Args:
queue_status: Dictionary containing queue status information
active_ops: Dictionary containing active operations information
Returns:
Discord embed containing formatted status information
Raises:
DisplayError: If there's an error creating the embed
"""
try:
display = cls()
embed = discord.Embed(
title="Queue Status Details",
color=display.display_manager.theme["title_color"],
timestamp=datetime.utcnow()
)
# Add sections in order
sections = sorted(
self.display_manager.templates.items(),
key=lambda x: x[1].order
)
# Add sections in order
sections = sorted(
display.display_manager.templates.items(),
key=lambda x: x[1].order
)
for section, template in sections:
# Check condition if exists
if template.condition:
if not self._check_condition(template.condition, queue_status, active_ops):
continue
for section, template in sections:
try:
# Check condition if exists
if template.condition:
if not display._check_condition(
template.condition,
queue_status,
active_ops
):
continue
# Add section based on type
if section == DisplaySection.QUEUE_STATS:
self._add_queue_statistics(embed, queue_status, template)
elif section == DisplaySection.DOWNLOADS:
self._add_active_downloads(embed, active_ops.get('downloads', {}), template)
elif section == DisplaySection.COMPRESSIONS:
self._add_active_compressions(embed, active_ops.get('compressions', {}), template)
elif section == DisplaySection.ERRORS:
self._add_error_statistics(embed, queue_status, template)
elif section == DisplaySection.HARDWARE:
self._add_hardware_statistics(embed, queue_status, template)
# Add section based on type
if section == DisplaySection.QUEUE_STATS:
display._add_queue_statistics(embed, queue_status, template)
elif section == DisplaySection.DOWNLOADS:
display._add_active_downloads(embed, active_ops.get('downloads', {}), template)
elif section == DisplaySection.COMPRESSIONS:
display._add_active_compressions(embed, active_ops.get('compressions', {}), template)
elif section == DisplaySection.ERRORS:
display._add_error_statistics(embed, queue_status, template)
elif section == DisplaySection.HARDWARE:
display._add_hardware_statistics(embed, queue_status, template)
except Exception as e:
logger.error(f"Error adding section {section.value}: {e}")
# Continue with other sections
return embed
return embed
except Exception as e:
error = f"Error creating status embed: {str(e)}"
logger.error(error, exc_info=True)
raise DisplayError(error)
def _check_condition(
self,
condition: str,
condition: DisplayCondition,
queue_status: Dict[str, Any],
active_ops: Dict[str, Any]
) -> bool:
"""Check if condition for displaying section is met"""
if condition == "has_errors":
return bool(queue_status["metrics"]["errors_by_type"])
return True
try:
if condition == DisplayCondition.HAS_ERRORS:
return bool(queue_status.get("metrics", {}).get("errors_by_type"))
elif condition == DisplayCondition.HAS_DOWNLOADS:
return bool(active_ops.get("downloads"))
elif condition == DisplayCondition.HAS_COMPRESSIONS:
return bool(active_ops.get("compressions"))
return True
except Exception as e:
logger.error(f"Error checking condition {condition}: {e}")
return False
def _add_queue_statistics(
self,
@@ -197,22 +319,31 @@ class StatusDisplay:
template: DisplayTemplate
) -> None:
"""Add queue statistics to the embed"""
embed.add_field(
name=template.name,
value=template.format_string.format(
pending=queue_status['pending'],
processing=queue_status['processing'],
completed=queue_status['completed'],
failed=queue_status['failed'],
success_rate=self.formatter.format_percentage(
queue_status['metrics']['success_rate'] * 100
try:
metrics = queue_status.get('metrics', {})
embed.add_field(
name=template.name,
value=template.format_string.format(
pending=queue_status.get('pending', 0),
processing=queue_status.get('processing', 0),
completed=queue_status.get('completed', 0),
failed=queue_status.get('failed', 0),
success_rate=self.formatter.format_percentage(
metrics.get('success_rate', 0) * 100
),
avg_processing_time=self.formatter.format_time(
metrics.get('avg_processing_time', 0)
)
),
avg_processing_time=self.formatter.format_time(
queue_status['metrics']['avg_processing_time']
)
),
inline=template.inline
)
inline=template.inline
)
except Exception as e:
logger.error(f"Error adding queue statistics: {e}")
embed.add_field(
name=template.name,
value="```\nError displaying queue statistics```",
inline=template.inline
)
def _add_active_downloads(
self,
@@ -221,28 +352,44 @@ class StatusDisplay:
template: DisplayTemplate
) -> None:
"""Add active downloads information to the embed"""
if downloads:
content = []
for url, progress in downloads.items():
content.append(template.format_string.format(
url=url[:50] + "..." if len(url) > 50 else url,
percent=self.formatter.format_percentage(progress.get('percent', 0)),
speed=progress.get('speed', 'N/A'),
eta=progress.get('eta', 'N/A'),
size=f"{self.formatter.format_bytes(progress.get('downloaded_bytes', 0))}/"
f"{self.formatter.format_bytes(progress.get('total_bytes', 0))}",
start_time=progress.get('start_time', 'N/A'),
retries=progress.get('retries', 0)
))
try:
if downloads:
content = []
for url, progress in list(downloads.items())[:template.max_items]:
try:
content.append(template.format_string.format(
url=self.formatter.truncate_url(url),
percent=self.formatter.format_percentage(progress.get('percent', 0)),
speed=progress.get('speed', 'N/A'),
eta=progress.get('eta', 'N/A'),
size=f"{self.formatter.format_bytes(progress.get('downloaded_bytes', 0))}/"
f"{self.formatter.format_bytes(progress.get('total_bytes', 0))}",
start_time=progress.get('start_time', 'N/A'),
retries=progress.get('retries', 0)
))
except Exception as e:
logger.error(f"Error formatting download {url}: {e}")
continue
if len(downloads) > template.max_items:
content.append(f"\n... and {len(downloads) - template.max_items} more")
embed.add_field(
name=template.name,
value="".join(content) if content else "```\nNo active downloads```",
inline=template.inline
)
else:
embed.add_field(
name=template.name,
value="```\nNo active downloads```",
inline=template.inline
)
except Exception as e:
logger.error(f"Error adding active downloads: {e}")
embed.add_field(
name=template.name,
value="".join(content),
inline=template.inline
)
else:
embed.add_field(
name=template.name,
value="```\nNo active downloads```",
value="```\nError displaying downloads```",
inline=template.inline
)
@@ -253,28 +400,44 @@ class StatusDisplay:
template: DisplayTemplate
) -> None:
"""Add active compressions information to the embed"""
if compressions:
content = []
for file_id, progress in compressions.items():
content.append(template.format_string.format(
filename=progress.get('filename', 'Unknown'),
percent=self.formatter.format_percentage(progress.get('percent', 0)),
elapsed_time=progress.get('elapsed_time', 'N/A'),
input_size=self.formatter.format_bytes(progress.get('input_size', 0)),
current_size=self.formatter.format_bytes(progress.get('current_size', 0)),
target_size=self.formatter.format_bytes(progress.get('target_size', 0)),
codec=progress.get('codec', 'Unknown'),
hardware_accel=progress.get('hardware_accel', False)
))
try:
if compressions:
content = []
for file_id, progress in list(compressions.items())[:template.max_items]:
try:
content.append(template.format_string.format(
filename=progress.get('filename', 'Unknown'),
percent=self.formatter.format_percentage(progress.get('percent', 0)),
elapsed_time=progress.get('elapsed_time', 'N/A'),
input_size=self.formatter.format_bytes(progress.get('input_size', 0)),
current_size=self.formatter.format_bytes(progress.get('current_size', 0)),
target_size=self.formatter.format_bytes(progress.get('target_size', 0)),
codec=progress.get('codec', 'Unknown'),
hardware_accel=progress.get('hardware_accel', False)
))
except Exception as e:
logger.error(f"Error formatting compression {file_id}: {e}")
continue
if len(compressions) > template.max_items:
content.append(f"\n... and {len(compressions) - template.max_items} more")
embed.add_field(
name=template.name,
value="".join(content) if content else "```\nNo active compressions```",
inline=template.inline
)
else:
embed.add_field(
name=template.name,
value="```\nNo active compressions```",
inline=template.inline
)
except Exception as e:
logger.error(f"Error adding active compressions: {e}")
embed.add_field(
name=template.name,
value="".join(content),
inline=template.inline
)
else:
embed.add_field(
name=template.name,
value="```\nNo active compressions```",
value="```\nError displaying compressions```",
inline=template.inline
)
@@ -285,14 +448,26 @@ class StatusDisplay:
template: DisplayTemplate
) -> None:
"""Add error statistics to the embed"""
if queue_status["metrics"]["errors_by_type"]:
error_stats = "\n".join(
f"{error_type}: {count}"
for error_type, count in queue_status["metrics"]["errors_by_type"].items()
)
try:
metrics = queue_status.get('metrics', {})
errors_by_type = metrics.get('errors_by_type', {})
if errors_by_type:
error_stats = "\n".join(
f"{error_type}: {count}"
for error_type, count in list(errors_by_type.items())[:template.max_items]
)
if len(errors_by_type) > template.max_items:
error_stats += f"\n... and {len(errors_by_type) - template.max_items} more"
embed.add_field(
name=template.name,
value=template.format_string.format(error_stats=error_stats),
inline=template.inline
)
except Exception as e:
logger.error(f"Error adding error statistics: {e}")
embed.add_field(
name=template.name,
value=template.format_string.format(error_stats=error_stats),
value="```\nError displaying error statistics```",
inline=template.inline
)
@@ -303,14 +478,23 @@ class StatusDisplay:
template: DisplayTemplate
) -> None:
"""Add hardware statistics to the embed"""
embed.add_field(
name=template.name,
value=template.format_string.format(
hw_failures=queue_status['metrics']['hardware_accel_failures'],
comp_failures=queue_status['metrics']['compression_failures'],
memory_usage=self.formatter.format_bytes(
queue_status['metrics']['peak_memory_usage'] * 1024 * 1024 # Convert MB to bytes
)
),
inline=template.inline
)
try:
metrics = queue_status.get('metrics', {})
embed.add_field(
name=template.name,
value=template.format_string.format(
hw_failures=metrics.get('hardware_accel_failures', 0),
comp_failures=metrics.get('compression_failures', 0),
memory_usage=self.formatter.format_bytes(
metrics.get('peak_memory_usage', 0) * 1024 * 1024 # Convert MB to bytes
)
),
inline=template.inline
)
except Exception as e:
logger.error(f"Error adding hardware statistics: {e}")
embed.add_field(
name=template.name,
value="```\nError displaying hardware statistics```",
inline=template.inline
)

View File

@@ -3,10 +3,11 @@
import logging
import re
from enum import Enum
from dataclasses import dataclass
from typing import List, Dict, Optional, Set, Pattern
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Set, Pattern, ClassVar
from datetime import datetime
import discord
from urllib.parse import urlparse, parse_qs
from urllib.parse import urlparse, parse_qs, ParseResult
logger = logging.getLogger("VideoArchiver")
@@ -19,6 +20,11 @@ class URLPattern:
supports_timestamp: bool = False
supports_playlist: bool = False
def __post_init__(self) -> None:
"""Validate pattern after initialization"""
if not isinstance(self.pattern, Pattern):
raise ValueError("Pattern must be a compiled regular expression")
@dataclass
class URLMetadata:
"""Metadata about an extracted URL"""
@@ -28,6 +34,7 @@ class URLMetadata:
playlist_id: Optional[str] = None
video_id: Optional[str] = None
quality: Optional[str] = None
extraction_time: str = field(default_factory=lambda: datetime.utcnow().isoformat())
class URLType(Enum):
"""Types of video URLs"""
@@ -38,84 +45,137 @@ class URLType(Enum):
class URLPatternManager:
"""Manages URL patterns for different video sites"""
def __init__(self):
YOUTUBE_PATTERN: ClassVar[Pattern] = re.compile(
r'(?:https?://)?(?:www\.)?'
r'(?:youtube\.com/watch\?v=|youtu\.be/)'
r'([a-zA-Z0-9_-]{11})'
)
VIMEO_PATTERN: ClassVar[Pattern] = re.compile(
r'(?:https?://)?(?:www\.)?'
r'vimeo\.com/(?:channels/(?:\w+/)?|groups/(?:[^/]*/)*|)'
r'(\d+)(?:|/\w+)*'
)
TWITTER_PATTERN: ClassVar[Pattern] = re.compile(
r'(?:https?://)?(?:www\.)?'
r'(?:twitter\.com|x\.com)/\w+/status/(\d+)'
)
def __init__(self) -> None:
self.patterns: Dict[str, URLPattern] = {
"youtube": URLPattern(
site="youtube",
pattern=re.compile(
r'(?:https?://)?(?:www\.)?'
r'(?:youtube\.com/watch\?v=|youtu\.be/)'
r'([a-zA-Z0-9_-]{11})'
),
pattern=self.YOUTUBE_PATTERN,
supports_timestamp=True,
supports_playlist=True
),
"vimeo": URLPattern(
site="vimeo",
pattern=re.compile(
r'(?:https?://)?(?:www\.)?'
r'vimeo\.com/(?:channels/(?:\w+/)?|groups/(?:[^/]*/)*|)'
r'(\d+)(?:|/\w+)*'
),
pattern=self.VIMEO_PATTERN,
supports_timestamp=True
),
"twitter": URLPattern(
site="twitter",
pattern=re.compile(
r'(?:https?://)?(?:www\.)?'
r'(?:twitter\.com|x\.com)/\w+/status/(\d+)'
),
pattern=self.TWITTER_PATTERN,
requires_api=True
),
# Add more patterns as needed
)
}
self.direct_extensions = {'.mp4', '.mov', '.avi', '.webm', '.mkv'}
self.direct_extensions: Set[str] = {'.mp4', '.mov', '.avi', '.webm', '.mkv'}
def get_pattern(self, site: str) -> Optional[URLPattern]:
"""Get pattern for a site"""
"""
Get pattern for a site.
Args:
site: Site identifier
Returns:
URLPattern for the site or None if not found
"""
return self.patterns.get(site.lower())
def is_supported_site(self, url: str, enabled_sites: Optional[List[str]]) -> bool:
"""Check if URL is from a supported site"""
"""
Check if URL is from a supported site.
Args:
url: URL to check
enabled_sites: List of enabled site identifiers
Returns:
True if site is supported, False otherwise
"""
if not enabled_sites:
return True
parsed = urlparse(url.lower())
domain = parsed.netloc.replace('www.', '')
return any(site.lower() in domain for site in enabled_sites)
try:
parsed = urlparse(url.lower())
domain = parsed.netloc.replace('www.', '')
return any(site.lower() in domain for site in enabled_sites)
except Exception as e:
logger.error(f"Error checking site support for {url}: {e}")
return False
class URLValidator:
"""Validates extracted URLs"""
def __init__(self, pattern_manager: URLPatternManager):
def __init__(self, pattern_manager: URLPatternManager) -> None:
self.pattern_manager = pattern_manager
def get_url_type(self, url: str) -> URLType:
"""Determine URL type"""
parsed = urlparse(url)
if any(parsed.path.lower().endswith(ext) for ext in self.pattern_manager.direct_extensions):
return URLType.DIRECT
if any(pattern.pattern.match(url) for pattern in self.pattern_manager.patterns.values()):
return URLType.PLATFORM
return URLType.UNKNOWN
"""
Determine URL type.
Args:
url: URL to check
Returns:
URLType indicating the type of URL
"""
try:
parsed = urlparse(url)
if any(parsed.path.lower().endswith(ext) for ext in self.pattern_manager.direct_extensions):
return URLType.DIRECT
if any(pattern.pattern.match(url) for pattern in self.pattern_manager.patterns.values()):
return URLType.PLATFORM
return URLType.UNKNOWN
except Exception as e:
logger.error(f"Error determining URL type for {url}: {e}")
return URLType.UNKNOWN
def is_valid_url(self, url: str) -> bool:
"""Validate URL format"""
"""
Validate URL format.
Args:
url: URL to validate
Returns:
True if URL is valid, False otherwise
"""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except Exception:
except Exception as e:
logger.error(f"Error validating URL {url}: {e}")
return False
class URLMetadataExtractor:
"""Extracts metadata from URLs"""
def __init__(self, pattern_manager: URLPatternManager):
def __init__(self, pattern_manager: URLPatternManager) -> None:
self.pattern_manager = pattern_manager
def extract_metadata(self, url: str) -> Optional[URLMetadata]:
"""Extract metadata from URL"""
"""
Extract metadata from URL.
Args:
url: URL to extract metadata from
Returns:
URLMetadata object or None if extraction fails
"""
try:
parsed = urlparse(url)
@@ -143,33 +203,41 @@ class URLMetadataExtractor:
return None
except Exception as e:
logger.error(f"Error extracting metadata from URL {url}: {e}")
logger.error(f"Error extracting metadata from URL {url}: {e}", exc_info=True)
return None
def _extract_timestamp(self, parsed_url: urlparse) -> Optional[int]:
def _extract_timestamp(self, parsed_url: ParseResult) -> Optional[int]:
"""Extract timestamp from URL"""
try:
params = parse_qs(parsed_url.query)
if 't' in params:
return int(params['t'][0])
return None
except Exception:
except (ValueError, IndexError) as e:
logger.debug(f"Error extracting timestamp: {e}")
return None
except Exception as e:
logger.error(f"Unexpected error extracting timestamp: {e}")
return None
def _extract_playlist_id(self, parsed_url: urlparse) -> Optional[str]:
def _extract_playlist_id(self, parsed_url: ParseResult) -> Optional[str]:
"""Extract playlist ID from URL"""
try:
params = parse_qs(parsed_url.query)
if 'list' in params:
return params['list'][0]
return None
except Exception:
except (KeyError, IndexError) as e:
logger.debug(f"Error extracting playlist ID: {e}")
return None
except Exception as e:
logger.error(f"Unexpected error extracting playlist ID: {e}")
return None
class URLExtractor:
"""Handles extraction of video URLs from messages"""
def __init__(self):
def __init__(self) -> None:
self.pattern_manager = URLPatternManager()
self.validator = URLValidator(self.pattern_manager)
self.metadata_extractor = URLMetadataExtractor(self.pattern_manager)
@@ -180,85 +248,113 @@ class URLExtractor:
message: discord.Message,
enabled_sites: Optional[List[str]] = None
) -> List[URLMetadata]:
"""Extract video URLs from message content and attachments"""
urls = []
"""
Extract video URLs from message content and attachments.
# Check cache
cache_key = f"{message.id}_{'-'.join(enabled_sites) if enabled_sites else 'all'}"
if cache_key in self._url_cache:
return [
self.metadata_extractor.extract_metadata(url)
for url in self._url_cache[cache_key]
if url # Filter out None values
]
Args:
message: Discord message to extract URLs from
enabled_sites: Optional list of enabled site identifiers
Returns:
List of URLMetadata objects for extracted URLs
"""
urls: List[URLMetadata] = []
try:
# Check cache
cache_key = f"{message.id}_{'-'.join(enabled_sites) if enabled_sites else 'all'}"
if cache_key in self._url_cache:
return [
metadata for url in self._url_cache[cache_key]
if (metadata := self.metadata_extractor.extract_metadata(url))
]
# Extract URLs
content_urls = await self._extract_from_content(message.content, enabled_sites)
attachment_urls = await self._extract_from_attachments(message.attachments)
# Process all URLs
all_urls = content_urls + attachment_urls
valid_urls = []
for url in all_urls:
if not self.validator.is_valid_url(url):
logger.debug(f"Invalid URL format: {url}")
continue
if not self.pattern_manager.is_supported_site(url, enabled_sites):
logger.debug(f"URL {url} doesn't match any enabled sites")
continue
metadata = self.metadata_extractor.extract_metadata(url)
if metadata:
urls.append(metadata)
valid_urls.append(url)
else:
logger.debug(f"Could not extract metadata from URL: {url}")
# Extract URLs
content_urls = await self._extract_from_content(message.content, enabled_sites)
attachment_urls = await self._extract_from_attachments(message.attachments)
# Process all URLs
all_urls = content_urls + attachment_urls
valid_urls: Set[str] = set()
for url in all_urls:
if not self.validator.is_valid_url(url):
logger.debug(f"Invalid URL format: {url}")
continue
if not self.pattern_manager.is_supported_site(url, enabled_sites):
logger.debug(f"URL {url} doesn't match any enabled sites")
continue
metadata = self.metadata_extractor.extract_metadata(url)
if metadata:
urls.append(metadata)
valid_urls.add(url)
else:
logger.debug(f"Could not extract metadata from URL: {url}")
# Update cache
self._url_cache[cache_key] = set(valid_urls)
return urls
# Update cache
self._url_cache[cache_key] = valid_urls
return urls
except Exception as e:
logger.error(f"Error extracting URLs from message {message.id}: {e}", exc_info=True)
return []
async def _extract_from_content(
self,
content: str,
content: Optional[str],
enabled_sites: Optional[List[str]]
) -> List[str]:
"""Extract video URLs from message content"""
if not content:
return []
urls = []
for word in content.split():
if self.validator.get_url_type(word) != URLType.UNKNOWN:
urls.append(word)
return urls
try:
urls = []
for word in content.split():
if self.validator.get_url_type(word) != URLType.UNKNOWN:
urls.append(word)
return urls
except Exception as e:
logger.error(f"Error extracting URLs from content: {e}", exc_info=True)
return []
async def _extract_from_attachments(
self,
attachments: List[discord.Attachment]
) -> List[str]:
"""Extract video URLs from message attachments"""
return [
attachment.url
for attachment in attachments
if any(
attachment.filename.lower().endswith(ext)
for ext in self.pattern_manager.direct_extensions
)
]
try:
return [
attachment.url
for attachment in attachments
if any(
attachment.filename.lower().endswith(ext)
for ext in self.pattern_manager.direct_extensions
)
]
except Exception as e:
logger.error(f"Error extracting URLs from attachments: {e}", exc_info=True)
return []
def clear_cache(self, message_id: Optional[int] = None) -> None:
"""Clear URL cache"""
if message_id:
keys_to_remove = [
key for key in self._url_cache
if key.startswith(f"{message_id}_")
]
for key in keys_to_remove:
self._url_cache.pop(key, None)
else:
self._url_cache.clear()
"""
Clear URL cache.
Args:
message_id: Optional message ID to clear cache for. If None, clears all cache.
"""
try:
if message_id:
keys_to_remove = [
key for key in self._url_cache
if key.startswith(f"{message_id}_")
]
for key in keys_to_remove:
self._url_cache.pop(key, None)
else:
self._url_cache.clear()
except Exception as e:
logger.error(f"Error clearing URL cache: {e}", exc_info=True)