Core Systems:

Component-based architecture with lifecycle management
Enhanced error handling and recovery mechanisms
Comprehensive state management and tracking
Event-driven architecture with monitoring
Queue Management:

Multiple processing strategies for different scenarios
Advanced state management with recovery
Comprehensive metrics and health monitoring
Sophisticated cleanup system with multiple strategies
Processing Pipeline:

Enhanced message handling with validation
Improved URL extraction and processing
Better queue management and monitoring
Advanced cleanup mechanisms
Overall Benefits:

Better code organization and maintainability
Improved error handling and recovery
Enhanced monitoring and reporting
More robust and reliable system
This commit is contained in:
pacnpal
2024-11-16 05:01:29 +00:00
parent 537a325807
commit a4ca6e8ea6
47 changed files with 11085 additions and 2110 deletions

View File

@@ -0,0 +1,252 @@
"""Module for managing cleanup operations in the video processor"""
import logging
import asyncio
from enum import Enum
from dataclasses import dataclass
from typing import Optional, Dict, Any, List, Set
from datetime import datetime
logger = logging.getLogger("VideoArchiver")
class CleanupStage(Enum):
"""Cleanup stages"""
QUEUE = "queue"
FFMPEG = "ffmpeg"
TASKS = "tasks"
RESOURCES = "resources"
class CleanupStrategy(Enum):
"""Cleanup strategies"""
NORMAL = "normal"
FORCE = "force"
GRACEFUL = "graceful"
@dataclass
class CleanupResult:
"""Result of a cleanup operation"""
success: bool
stage: CleanupStage
error: Optional[str] = None
duration: float = 0.0
class CleanupTracker:
"""Tracks cleanup operations"""
def __init__(self):
self.cleanup_history: List[Dict[str, Any]] = []
self.active_cleanups: Set[str] = set()
self.start_times: Dict[str, datetime] = {}
self.stage_results: Dict[str, List[CleanupResult]] = {}
def start_cleanup(self, cleanup_id: str) -> None:
"""Start tracking a cleanup operation"""
self.active_cleanups.add(cleanup_id)
self.start_times[cleanup_id] = datetime.utcnow()
self.stage_results[cleanup_id] = []
def record_stage_result(
self,
cleanup_id: str,
result: CleanupResult
) -> None:
"""Record result of a cleanup stage"""
if cleanup_id in self.stage_results:
self.stage_results[cleanup_id].append(result)
def end_cleanup(self, cleanup_id: str) -> None:
"""End tracking a cleanup operation"""
if cleanup_id in self.active_cleanups:
end_time = datetime.utcnow()
self.cleanup_history.append({
"id": cleanup_id,
"start_time": self.start_times[cleanup_id],
"end_time": end_time,
"duration": (end_time - self.start_times[cleanup_id]).total_seconds(),
"results": self.stage_results[cleanup_id]
})
self.active_cleanups.remove(cleanup_id)
self.start_times.pop(cleanup_id)
self.stage_results.pop(cleanup_id)
def get_cleanup_stats(self) -> Dict[str, Any]:
"""Get cleanup statistics"""
return {
"total_cleanups": len(self.cleanup_history),
"active_cleanups": len(self.active_cleanups),
"success_rate": self._calculate_success_rate(),
"average_duration": self._calculate_average_duration(),
"stage_success_rates": self._calculate_stage_success_rates()
}
def _calculate_success_rate(self) -> float:
"""Calculate overall cleanup success rate"""
if not self.cleanup_history:
return 1.0
successful = sum(
1 for cleanup in self.cleanup_history
if all(result.success for result in cleanup["results"])
)
return successful / len(self.cleanup_history)
def _calculate_average_duration(self) -> float:
"""Calculate average cleanup duration"""
if not self.cleanup_history:
return 0.0
total_duration = sum(cleanup["duration"] for cleanup in self.cleanup_history)
return total_duration / len(self.cleanup_history)
def _calculate_stage_success_rates(self) -> Dict[str, float]:
"""Calculate success rates by stage"""
stage_attempts: Dict[str, int] = {}
stage_successes: Dict[str, int] = {}
for cleanup in self.cleanup_history:
for result in cleanup["results"]:
stage = result.stage.value
stage_attempts[stage] = stage_attempts.get(stage, 0) + 1
if result.success:
stage_successes[stage] = stage_successes.get(stage, 0) + 1
return {
stage: stage_successes.get(stage, 0) / attempts
for stage, attempts in stage_attempts.items()
}
class CleanupManager:
"""Manages cleanup operations for the video processor"""
def __init__(
self,
queue_handler,
ffmpeg_mgr: Optional[object] = None,
strategy: CleanupStrategy = CleanupStrategy.NORMAL
):
self.queue_handler = queue_handler
self.ffmpeg_mgr = ffmpeg_mgr
self.strategy = strategy
self._queue_task: Optional[asyncio.Task] = None
self.tracker = CleanupTracker()
async def cleanup(self) -> None:
"""Perform normal cleanup of resources"""
cleanup_id = f"cleanup_{datetime.utcnow().timestamp()}"
self.tracker.start_cleanup(cleanup_id)
try:
logger.info("Starting normal cleanup...")
# Clean up in stages
stages = [
(CleanupStage.QUEUE, self._cleanup_queue),
(CleanupStage.FFMPEG, self._cleanup_ffmpeg),
(CleanupStage.TASKS, self._cleanup_tasks)
]
for stage, cleanup_func in stages:
try:
start_time = datetime.utcnow()
await cleanup_func()
duration = (datetime.utcnow() - start_time).total_seconds()
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(True, stage, duration=duration)
)
except Exception as e:
logger.error(f"Error in {stage.value} cleanup: {e}")
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(False, stage, str(e))
)
if self.strategy != CleanupStrategy.GRACEFUL:
raise
logger.info("Normal cleanup completed successfully")
except Exception as e:
logger.error(f"Error during normal cleanup: {str(e)}", exc_info=True)
raise
finally:
self.tracker.end_cleanup(cleanup_id)
async def force_cleanup(self) -> None:
"""Force cleanup of resources when normal cleanup fails"""
cleanup_id = f"force_cleanup_{datetime.utcnow().timestamp()}"
self.tracker.start_cleanup(cleanup_id)
try:
logger.info("Starting force cleanup...")
# Force cleanup in stages
stages = [
(CleanupStage.QUEUE, self._force_cleanup_queue),
(CleanupStage.FFMPEG, self._force_cleanup_ffmpeg),
(CleanupStage.TASKS, self._force_cleanup_tasks)
]
for stage, cleanup_func in stages:
try:
start_time = datetime.utcnow()
await cleanup_func()
duration = (datetime.utcnow() - start_time).total_seconds()
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(True, stage, duration=duration)
)
except Exception as e:
logger.error(f"Error in force {stage.value} cleanup: {e}")
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(False, stage, str(e))
)
logger.info("Force cleanup completed")
except Exception as e:
logger.error(f"Error during force cleanup: {str(e)}", exc_info=True)
finally:
self.tracker.end_cleanup(cleanup_id)
async def _cleanup_queue(self) -> None:
"""Clean up queue handler"""
await self.queue_handler.cleanup()
async def _cleanup_ffmpeg(self) -> None:
"""Clean up FFmpeg manager"""
if self.ffmpeg_mgr:
self.ffmpeg_mgr.kill_all_processes()
async def _cleanup_tasks(self) -> None:
"""Clean up tasks"""
if self._queue_task and not self._queue_task.done():
self._queue_task.cancel()
try:
await self._queue_task
except asyncio.CancelledError:
pass
async def _force_cleanup_queue(self) -> None:
"""Force clean up queue handler"""
await self.queue_handler.force_cleanup()
async def _force_cleanup_ffmpeg(self) -> None:
"""Force clean up FFmpeg manager"""
if self.ffmpeg_mgr:
self.ffmpeg_mgr.kill_all_processes()
async def _force_cleanup_tasks(self) -> None:
"""Force clean up tasks"""
if self._queue_task and not self._queue_task.done():
self._queue_task.cancel()
def set_queue_task(self, task: asyncio.Task) -> None:
"""Set the queue processing task for cleanup purposes"""
self._queue_task = task
def get_cleanup_stats(self) -> Dict[str, Any]:
"""Get cleanup statistics"""
return {
"stats": self.tracker.get_cleanup_stats(),
"strategy": self.strategy.value,
"active_cleanups": len(self.tracker.active_cleanups)
}

View File

@@ -2,19 +2,151 @@
import logging
import asyncio
from enum import Enum
from typing import Optional, Tuple, Dict, Any
from datetime import datetime
import discord
from discord.ext import commands
from discord import app_commands
from datetime import datetime
from typing import Dict, Any, Optional, Tuple
from .message_handler import MessageHandler
from .queue_handler import QueueHandler
from .progress_tracker import ProgressTracker
from .status_display import StatusDisplay
from .cleanup_manager import CleanupManager
from .reactions import REACTIONS
logger = logging.getLogger("VideoArchiver")
class ProcessorState(Enum):
"""Possible states of the video processor"""
INITIALIZING = "initializing"
READY = "ready"
PROCESSING = "processing"
PAUSED = "paused"
ERROR = "error"
SHUTDOWN = "shutdown"
class OperationType(Enum):
"""Types of processor operations"""
MESSAGE_PROCESSING = "message_processing"
VIDEO_PROCESSING = "video_processing"
QUEUE_MANAGEMENT = "queue_management"
CLEANUP = "cleanup"
class OperationTracker:
"""Tracks processor operations"""
def __init__(self):
self.operations: Dict[str, Dict[str, Any]] = {}
self.operation_history: List[Dict[str, Any]] = []
self.error_count = 0
self.success_count = 0
def start_operation(
self,
op_type: OperationType,
details: Dict[str, Any]
) -> str:
"""Start tracking an operation"""
op_id = f"{op_type.value}_{datetime.utcnow().timestamp()}"
self.operations[op_id] = {
"type": op_type.value,
"start_time": datetime.utcnow(),
"status": "running",
"details": details
}
return op_id
def end_operation(
self,
op_id: str,
success: bool,
error: Optional[str] = None
) -> None:
"""End tracking an operation"""
if op_id in self.operations:
self.operations[op_id].update({
"end_time": datetime.utcnow(),
"status": "success" if success else "error",
"error": error
})
# Move to history
self.operation_history.append(self.operations.pop(op_id))
# Update counts
if success:
self.success_count += 1
else:
self.error_count += 1
def get_active_operations(self) -> Dict[str, Dict[str, Any]]:
"""Get currently active operations"""
return self.operations.copy()
def get_operation_stats(self) -> Dict[str, Any]:
"""Get operation statistics"""
return {
"total_operations": len(self.operation_history) + len(self.operations),
"active_operations": len(self.operations),
"success_count": self.success_count,
"error_count": self.error_count,
"success_rate": (
self.success_count / (self.success_count + self.error_count)
if (self.success_count + self.error_count) > 0
else 0
)
}
class HealthMonitor:
"""Monitors processor health"""
def __init__(self, processor: 'VideoProcessor'):
self.processor = processor
self.last_check: Optional[datetime] = None
self.health_status: Dict[str, bool] = {}
self._monitor_task: Optional[asyncio.Task] = None
async def start_monitoring(self) -> None:
"""Start health monitoring"""
self._monitor_task = asyncio.create_task(self._monitor_health())
async def stop_monitoring(self) -> None:
"""Stop health monitoring"""
if self._monitor_task:
self._monitor_task.cancel()
try:
await self._monitor_task
except asyncio.CancelledError:
pass
async def _monitor_health(self) -> None:
"""Monitor processor health"""
while True:
try:
self.last_check = datetime.utcnow()
# Check component health
self.health_status.update({
"queue_handler": self.processor.queue_handler.is_healthy(),
"message_handler": self.processor.message_handler.is_healthy(),
"progress_tracker": self.processor.progress_tracker.is_healthy()
})
# Check operation health
op_stats = self.processor.operation_tracker.get_operation_stats()
self.health_status["operations"] = (
op_stats["success_rate"] >= 0.9 # 90% success rate threshold
)
await asyncio.sleep(60) # Check every minute
except Exception as e:
logger.error(f"Health monitoring error: {e}")
await asyncio.sleep(30) # Shorter interval on error
def is_healthy(self) -> bool:
"""Check if processor is healthy"""
return all(self.health_status.values())
class VideoProcessor:
"""Handles video processing operations"""
@@ -34,91 +166,101 @@ class VideoProcessor:
self.db = db
self.queue_manager = queue_manager
# Initialize state
self.state = ProcessorState.INITIALIZING
self.operation_tracker = OperationTracker()
self.health_monitor = HealthMonitor(self)
# Initialize handlers
self.queue_handler = QueueHandler(bot, config_manager, components)
self.message_handler = MessageHandler(bot, config_manager, queue_manager)
self.progress_tracker = ProgressTracker()
self.cleanup_manager = CleanupManager(self.queue_handler, ffmpeg_mgr)
# Pass db to queue handler if it exists
if self.db:
self.queue_handler.db = self.db
# Store queue task reference but don't start processing here
# Queue processing is managed by VideoArchiver class
# Store queue task reference
self._queue_task = None
# Mark as ready
self.state = ProcessorState.READY
logger.info("VideoProcessor initialized successfully")
async def start(self) -> None:
"""Start processor operations"""
await self.health_monitor.start_monitoring()
async def process_video(self, item) -> Tuple[bool, Optional[str]]:
"""Process a video from the queue by delegating to queue handler"""
return await self.queue_handler.process_video(item)
"""Process a video from the queue"""
op_id = self.operation_tracker.start_operation(
OperationType.VIDEO_PROCESSING,
{"item": str(item)}
)
try:
self.state = ProcessorState.PROCESSING
result = await self.queue_handler.process_video(item)
success = result[0]
error = None if success else result[1]
self.operation_tracker.end_operation(op_id, success, error)
return result
except Exception as e:
self.operation_tracker.end_operation(op_id, False, str(e))
raise
finally:
self.state = ProcessorState.READY
async def process_message(self, message: discord.Message) -> None:
"""Process a message for video content"""
await self.message_handler.process_message(message)
async def cleanup(self):
"""Clean up resources and stop processing"""
op_id = self.operation_tracker.start_operation(
OperationType.MESSAGE_PROCESSING,
{"message_id": message.id}
)
try:
logger.info("Starting VideoProcessor cleanup...")
# Clean up queue handler
try:
await self.queue_handler.cleanup()
except Exception as e:
logger.error(f"Error cleaning up queue handler: {e}")
# Clean up FFmpeg manager
if self.ffmpeg_mgr:
try:
self.ffmpeg_mgr.kill_all_processes()
except Exception as e:
logger.error(f"Error cleaning up FFmpeg manager: {e}")
# Cancel queue processing task if we have one
if self._queue_task and not self._queue_task.done():
self._queue_task.cancel()
try:
await self._queue_task
except asyncio.CancelledError:
pass
except Exception as e:
logger.error(f"Error cancelling queue task: {e}")
logger.info("VideoProcessor cleanup completed successfully")
await self.message_handler.process_message(message)
self.operation_tracker.end_operation(op_id, True)
except Exception as e:
logger.error(f"Error during VideoProcessor cleanup: {str(e)}", exc_info=True)
self.operation_tracker.end_operation(op_id, False, str(e))
raise
async def force_cleanup(self):
"""Force cleanup of resources when normal cleanup fails"""
async def cleanup(self) -> None:
"""Clean up resources and stop processing"""
op_id = self.operation_tracker.start_operation(
OperationType.CLEANUP,
{"type": "normal"}
)
try:
logger.info("Starting force cleanup of VideoProcessor...")
# Force cleanup queue handler
try:
await self.queue_handler.force_cleanup()
except Exception as e:
logger.error(f"Error force cleaning queue handler: {e}")
# Force cleanup FFmpeg
if self.ffmpeg_mgr:
try:
self.ffmpeg_mgr.kill_all_processes()
except Exception as e:
logger.error(f"Error force cleaning FFmpeg manager: {e}")
# Force cancel queue task
if self._queue_task and not self._queue_task.done():
self._queue_task.cancel()
logger.info("VideoProcessor force cleanup completed")
self.state = ProcessorState.SHUTDOWN
await self.health_monitor.stop_monitoring()
await self.cleanup_manager.cleanup()
self.operation_tracker.end_operation(op_id, True)
except Exception as e:
logger.error(f"Error during VideoProcessor force cleanup: {str(e)}", exc_info=True)
self.operation_tracker.end_operation(op_id, False, str(e))
logger.error(f"Error during cleanup: {e}", exc_info=True)
raise
async def show_queue_details(self, ctx: commands.Context):
"""Display detailed queue status and progress information"""
async def force_cleanup(self) -> None:
"""Force cleanup of resources"""
op_id = self.operation_tracker.start_operation(
OperationType.CLEANUP,
{"type": "force"}
)
try:
self.state = ProcessorState.SHUTDOWN
await self.health_monitor.stop_monitoring()
await self.cleanup_manager.force_cleanup()
self.operation_tracker.end_operation(op_id, True)
except Exception as e:
self.operation_tracker.end_operation(op_id, False, str(e))
raise
async def show_queue_details(self, ctx: commands.Context) -> None:
"""Display detailed queue status"""
try:
if not self.queue_manager:
await ctx.send("Queue manager is not initialized.")
@@ -126,111 +268,37 @@ class VideoProcessor:
# Get queue status
queue_status = self.queue_manager.get_queue_status(ctx.guild.id)
# Get active operations
active_ops = self.operation_tracker.get_active_operations()
# Create embed for queue overview
embed = discord.Embed(
title="Queue Status Details",
color=discord.Color.blue(),
timestamp=datetime.utcnow(),
# Create and send status embed
embed = await StatusDisplay.create_queue_status_embed(
queue_status,
active_ops
)
# Queue statistics
embed.add_field(
name="Queue Statistics",
value=f"```\n"
f"Pending: {queue_status['pending']}\n"
f"Processing: {queue_status['processing']}\n"
f"Completed: {queue_status['completed']}\n"
f"Failed: {queue_status['failed']}\n"
f"Success Rate: {queue_status['metrics']['success_rate']:.1%}\n"
f"Avg Processing Time: {queue_status['metrics']['avg_processing_time']:.1f}s\n"
f"```",
inline=False,
)
# Active operations
active_ops = self.progress_tracker.get_active_operations()
# Active downloads
downloads = active_ops['downloads']
if downloads:
active_downloads = ""
for url, progress in downloads.items():
active_downloads += (
f"URL: {url[:50]}...\n"
f"Progress: {progress.get('percent', 0):.1f}%\n"
f"Speed: {progress.get('speed', 'N/A')}\n"
f"ETA: {progress.get('eta', 'N/A')}\n"
f"Size: {progress.get('downloaded_bytes', 0)}/{progress.get('total_bytes', 0)} bytes\n"
f"Started: {progress.get('start_time', 'N/A')}\n"
f"Retries: {progress.get('retries', 0)}\n"
f"-------------------\n"
)
embed.add_field(
name="Active Downloads",
value=f"```\n{active_downloads}```",
inline=False,
)
else:
embed.add_field(
name="Active Downloads",
value="```\nNo active downloads```",
inline=False,
)
# Active compressions
compressions = active_ops['compressions']
if compressions:
active_compressions = ""
for file_id, progress in compressions.items():
active_compressions += (
f"File: {progress.get('filename', 'Unknown')}\n"
f"Progress: {progress.get('percent', 0):.1f}%\n"
f"Time Elapsed: {progress.get('elapsed_time', 'N/A')}\n"
f"Input Size: {progress.get('input_size', 0)} bytes\n"
f"Current Size: {progress.get('current_size', 0)} bytes\n"
f"Target Size: {progress.get('target_size', 0)} bytes\n"
f"Codec: {progress.get('codec', 'Unknown')}\n"
f"Hardware Accel: {progress.get('hardware_accel', False)}\n"
f"-------------------\n"
)
embed.add_field(
name="Active Compressions",
value=f"```\n{active_compressions}```",
inline=False,
)
else:
embed.add_field(
name="Active Compressions",
value="```\nNo active compressions```",
inline=False,
)
# Error statistics
if queue_status["metrics"]["errors_by_type"]:
error_stats = "\n".join(
f"{error_type}: {count}"
for error_type, count in queue_status["metrics"]["errors_by_type"].items()
)
embed.add_field(
name="Error Statistics",
value=f"```\n{error_stats}```",
inline=False,
)
# Hardware acceleration statistics
embed.add_field(
name="Hardware Statistics",
value=f"```\n"
f"Hardware Accel Failures: {queue_status['metrics']['hardware_accel_failures']}\n"
f"Compression Failures: {queue_status['metrics']['compression_failures']}\n"
f"Peak Memory Usage: {queue_status['metrics']['peak_memory_usage']:.1f}MB\n"
f"```",
inline=False,
)
await ctx.send(embed=embed)
except Exception as e:
logger.error(f"Error showing queue details: {str(e)}", exc_info=True)
logger.error(f"Error showing queue details: {e}", exc_info=True)
await ctx.send(f"Error getting queue details: {str(e)}")
def set_queue_task(self, task: asyncio.Task) -> None:
"""Set the queue processing task"""
self._queue_task = task
self.cleanup_manager.set_queue_task(task)
def get_status(self) -> Dict[str, Any]:
"""Get processor status"""
return {
"state": self.state.value,
"health": self.health_monitor.is_healthy(),
"operations": self.operation_tracker.get_operation_stats(),
"active_operations": self.operation_tracker.get_active_operations(),
"last_health_check": (
self.health_monitor.last_check.isoformat()
if self.health_monitor.last_check
else None
),
"health_status": self.health_monitor.health_status
}

View File

@@ -1,130 +1,256 @@
"""Message processing and URL extraction for VideoProcessor"""
import logging
import asyncio
from enum import Enum
from typing import Optional, Dict, Any, List, Tuple
from datetime import datetime
import discord
from typing import List, Tuple, Optional
from videoarchiver.utils.video_downloader import is_video_url_pattern
from .url_extractor import URLExtractor
from .message_validator import MessageValidator
from .queue_processor import QueueProcessor
from .reactions import REACTIONS
logger = logging.getLogger("VideoArchiver")
class MessageState(Enum):
"""Possible states of message processing"""
RECEIVED = "received"
VALIDATING = "validating"
EXTRACTING = "extracting"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
IGNORED = "ignored"
class ProcessingStage(Enum):
"""Message processing stages"""
VALIDATION = "validation"
EXTRACTION = "extraction"
QUEUEING = "queueing"
COMPLETION = "completion"
class MessageCache:
"""Caches message validation results"""
def __init__(self, max_size: int = 1000):
self.max_size = max_size
self._cache: Dict[int, Dict[str, Any]] = {}
self._access_times: Dict[int, datetime] = {}
def add(self, message_id: int, result: Dict[str, Any]) -> None:
"""Add a result to cache"""
if len(self._cache) >= self.max_size:
self._cleanup_oldest()
self._cache[message_id] = result
self._access_times[message_id] = datetime.utcnow()
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
"""Get a cached result"""
if message_id in self._cache:
self._access_times[message_id] = datetime.utcnow()
return self._cache[message_id]
return None
def _cleanup_oldest(self) -> None:
"""Remove oldest cache entries"""
if not self._access_times:
return
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
del self._cache[oldest]
del self._access_times[oldest]
class ProcessingTracker:
"""Tracks message processing state and progress"""
def __init__(self):
self.states: Dict[int, MessageState] = {}
self.stages: Dict[int, ProcessingStage] = {}
self.errors: Dict[int, str] = {}
self.start_times: Dict[int, datetime] = {}
self.end_times: Dict[int, datetime] = {}
def start_processing(self, message_id: int) -> None:
"""Start tracking a message"""
self.states[message_id] = MessageState.RECEIVED
self.start_times[message_id] = datetime.utcnow()
def update_state(
self,
message_id: int,
state: MessageState,
stage: Optional[ProcessingStage] = None,
error: Optional[str] = None
) -> None:
"""Update message state"""
self.states[message_id] = state
if stage:
self.stages[message_id] = stage
if error:
self.errors[message_id] = error
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
self.end_times[message_id] = datetime.utcnow()
def get_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
return {
"state": self.states.get(message_id),
"stage": self.stages.get(message_id),
"error": self.errors.get(message_id),
"start_time": self.start_times.get(message_id),
"end_time": self.end_times.get(message_id),
"duration": (
(self.end_times[message_id] - self.start_times[message_id]).total_seconds()
if message_id in self.end_times and message_id in self.start_times
else None
)
}
class MessageHandler:
"""Handles processing of messages for video content"""
def __init__(self, bot, config_manager, queue_manager):
self.bot = bot
self.config_manager = config_manager
self.queue_manager = queue_manager
self.url_extractor = URLExtractor()
self.message_validator = MessageValidator()
self.queue_processor = QueueProcessor(queue_manager)
# Initialize tracking and caching
self.tracker = ProcessingTracker()
self.validation_cache = MessageCache()
self._processing_lock = asyncio.Lock()
async def process_message(self, message: discord.Message) -> None:
"""Process a message for video content"""
# Start tracking
self.tracker.start_processing(message.id)
try:
# Check if message contains any content to process
if not message.content and not message.attachments:
logger.debug(f"No content or attachments in message {message.id}")
return
# Get guild settings
settings = await self.config_manager.get_guild_settings(message.guild.id)
if not settings:
logger.warning(f"No settings found for guild {message.guild.id}")
return
# Check if video archiving is enabled for this guild
if not settings.get("enabled", False):
logger.debug(f"Video archiving is disabled for guild {message.guild.id}")
return
# Log settings for debugging
logger.debug(f"Guild {message.guild.id} settings: {settings}")
# Check if channel is enabled (empty list means all channels)
enabled_channels = settings.get("enabled_channels", [])
if enabled_channels and message.channel.id not in enabled_channels:
logger.debug(f"Channel {message.channel.id} not in enabled channels: {enabled_channels}")
return
# Check if user has allowed role (empty list means all roles)
allowed_roles = settings.get("allowed_roles", [])
if allowed_roles:
user_roles = [role.id for role in message.author.roles]
if not any(role_id in allowed_roles for role_id in user_roles):
logger.debug(f"User {message.author.id} does not have any allowed roles")
return
# Extract URLs from message
urls = await self._extract_urls(message, settings)
if not urls:
logger.debug("No valid URLs found in message")
return
# Process each URL
await self._process_urls(message, urls)
async with self._processing_lock:
await self._process_message_internal(message)
except Exception as e:
logger.error(f"Error processing message: {str(e)}", exc_info=True)
self.tracker.update_state(
message.id,
MessageState.FAILED,
error=str(e)
)
try:
await message.add_reaction(REACTIONS["error"])
except:
pass
async def _extract_urls(self, message: discord.Message, settings: dict) -> List[str]:
"""Extract video URLs from message content and attachments"""
urls = []
# Extract from message content
if message.content:
logger.debug(f"Processing message content: {message.content}")
enabled_sites = settings.get("enabled_sites", [])
logger.debug(f"Enabled sites: {enabled_sites}")
async def _process_message_internal(self, message: discord.Message) -> None:
"""Internal message processing logic"""
try:
# Get guild settings
settings = await self.config_manager.get_guild_settings(message.guild.id)
if not settings:
logger.warning(f"No settings found for guild {message.guild.id}")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
for word in message.content.split():
logger.debug(f"Checking word: {word}")
if is_video_url_pattern(word):
# If enabled_sites is empty or None, allow all sites
if not enabled_sites or any(site in word.lower() for site in enabled_sites):
logger.debug(f"Found matching URL: {word}")
urls.append(word)
else:
logger.debug(f"URL {word} doesn't match any enabled sites")
else:
logger.debug(f"Word {word} is not a valid video URL")
# Extract from attachments
for attachment in message.attachments:
logger.debug(f"Checking attachment: {attachment.filename}")
if any(attachment.filename.lower().endswith(ext) for ext in ['.mp4', '.mov', '.avi', '.webm']):
logger.debug(f"Found video attachment: {attachment.url}")
urls.append(attachment.url)
return urls
async def _process_urls(self, message: discord.Message, urls: List[str]) -> None:
"""Process extracted URLs by adding them to the queue"""
for url in urls:
try:
logger.info(f"Adding URL to queue: {url}")
await message.add_reaction(REACTIONS['queued'])
await self.queue_manager.add_to_queue(
url=url,
message_id=message.id,
channel_id=message.channel.id,
guild_id=message.guild.id,
author_id=message.author.id,
priority=0
# Check cache for validation
cached_validation = self.validation_cache.get(message.id)
if cached_validation:
is_valid = cached_validation["valid"]
reason = cached_validation["reason"]
else:
# Validate message
self.tracker.update_state(
message.id,
MessageState.VALIDATING,
ProcessingStage.VALIDATION
)
logger.info(f"Successfully added video to queue: {url}")
except Exception as e:
logger.error(f"Failed to add video to queue: {str(e)}")
await message.add_reaction(REACTIONS['error'])
continue
is_valid, reason = await self.message_validator.validate_message(
message,
settings
)
# Cache result
self.validation_cache.add(message.id, {
"valid": is_valid,
"reason": reason
})
async def format_archive_message(self, author: Optional[discord.Member],
channel: discord.TextChannel,
url: str) -> str:
if not is_valid:
logger.debug(f"Message validation failed: {reason}")
self.tracker.update_state(
message.id,
MessageState.IGNORED,
error=reason
)
return
# Extract URLs
self.tracker.update_state(
message.id,
MessageState.EXTRACTING,
ProcessingStage.EXTRACTION
)
urls = await self.url_extractor.extract_urls(
message,
enabled_sites=settings.get("enabled_sites")
)
if not urls:
logger.debug("No valid URLs found in message")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
# Process URLs
self.tracker.update_state(
message.id,
MessageState.PROCESSING,
ProcessingStage.QUEUEING
)
await self.queue_processor.process_urls(message, urls)
# Mark completion
self.tracker.update_state(
message.id,
MessageState.COMPLETED,
ProcessingStage.COMPLETION
)
except Exception as e:
self.tracker.update_state(
message.id,
MessageState.FAILED,
error=str(e)
)
raise
async def format_archive_message(
self,
author: Optional[discord.Member],
channel: discord.TextChannel,
url: str
) -> str:
"""Format message for archive channel"""
author_mention = author.mention if author else "Unknown User"
channel_mention = channel.mention if channel else "Unknown Channel"
return (f"Video archived from {author_mention} in {channel_mention}\n"
f"Original URL: {url}")
return await self.queue_processor.format_archive_message(
author,
channel,
url
)
def get_message_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
return self.tracker.get_status(message_id)
def is_healthy(self) -> bool:
"""Check if handler is healthy"""
# Check for any stuck messages
current_time = datetime.utcnow()
for message_id, start_time in self.tracker.start_times.items():
if (
message_id in self.tracker.states and
self.tracker.states[message_id] not in (
MessageState.COMPLETED,
MessageState.FAILED,
MessageState.IGNORED
) and
(current_time - start_time).total_seconds() > 300 # 5 minutes timeout
):
return False
return True

View File

@@ -0,0 +1,225 @@
"""Message validation functionality for video processing"""
import logging
from enum import Enum
from dataclasses import dataclass
from typing import Dict, Optional, Tuple, List, Any, Callable, Set
from datetime import datetime
import discord
logger = logging.getLogger("VideoArchiver")
class ValidationResult(Enum):
"""Possible validation results"""
VALID = "valid"
INVALID = "invalid"
IGNORED = "ignored"
@dataclass
class ValidationContext:
"""Context for message validation"""
message: discord.Message
settings: Dict[str, Any]
guild_id: int
channel_id: int
author_id: int
roles: Set[int]
content_length: int
attachment_count: int
is_bot: bool
timestamp: datetime
@classmethod
def from_message(cls, message: discord.Message, settings: Dict[str, Any]) -> 'ValidationContext':
"""Create context from message"""
return cls(
message=message,
settings=settings,
guild_id=message.guild.id,
channel_id=message.channel.id,
author_id=message.author.id,
roles={role.id for role in message.author.roles},
content_length=len(message.content) if message.content else 0,
attachment_count=len(message.attachments),
is_bot=message.author.bot,
timestamp=message.created_at
)
@dataclass
class ValidationRule:
"""Defines a validation rule"""
name: str
description: str
validate: Callable[[ValidationContext], Tuple[bool, Optional[str]]]
enabled: bool = True
priority: int = 0
class ValidationCache:
"""Caches validation results"""
def __init__(self, max_size: int = 1000):
self.max_size = max_size
self._cache: Dict[int, Dict[str, Any]] = {}
self._access_times: Dict[int, datetime] = {}
def add(self, message_id: int, result: Dict[str, Any]) -> None:
"""Add validation result to cache"""
if len(self._cache) >= self.max_size:
self._cleanup_oldest()
self._cache[message_id] = result
self._access_times[message_id] = datetime.utcnow()
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
"""Get cached validation result"""
if message_id in self._cache:
self._access_times[message_id] = datetime.utcnow()
return self._cache[message_id]
return None
def _cleanup_oldest(self) -> None:
"""Remove oldest cache entries"""
if not self._access_times:
return
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
del self._cache[oldest]
del self._access_times[oldest]
class ValidationRuleManager:
"""Manages validation rules"""
def __init__(self):
self.rules: List[ValidationRule] = [
ValidationRule(
name="content_check",
description="Check if message has content to process",
validate=self._validate_content,
priority=1
),
ValidationRule(
name="guild_enabled",
description="Check if archiving is enabled for guild",
validate=self._validate_guild_enabled,
priority=2
),
ValidationRule(
name="channel_enabled",
description="Check if channel is enabled for archiving",
validate=self._validate_channel,
priority=3
),
ValidationRule(
name="user_roles",
description="Check if user has required roles",
validate=self._validate_user_roles,
priority=4
)
]
self.rules.sort(key=lambda x: x.priority)
def _validate_content(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
"""Validate message content"""
if not ctx.content_length and not ctx.attachment_count:
return False, "No content or attachments"
return True, None
def _validate_guild_enabled(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
"""Validate guild settings"""
if not ctx.settings.get("enabled", False):
return False, "Video archiving disabled for guild"
return True, None
def _validate_channel(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
"""Validate channel settings"""
enabled_channels = ctx.settings.get("enabled_channels", [])
if enabled_channels and ctx.channel_id not in enabled_channels:
return False, "Channel not enabled for archiving"
return True, None
def _validate_user_roles(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
"""Validate user roles"""
allowed_roles = ctx.settings.get("allowed_roles", [])
if allowed_roles and not (ctx.roles & set(allowed_roles)):
return False, "User does not have required roles"
return True, None
class MessageValidator:
"""Handles validation of messages for video processing"""
def __init__(self):
self.rule_manager = ValidationRuleManager()
self.cache = ValidationCache()
self.validation_stats: Dict[str, int] = {
"total": 0,
"valid": 0,
"invalid": 0,
"ignored": 0,
"cached": 0
}
async def validate_message(
self,
message: discord.Message,
settings: Dict
) -> Tuple[bool, Optional[str]]:
"""Validate if a message should be processed"""
self.validation_stats["total"] += 1
# Check cache
cached = self.cache.get(message.id)
if cached:
self.validation_stats["cached"] += 1
return cached["valid"], cached.get("reason")
# Create validation context
ctx = ValidationContext.from_message(message, settings)
# Run validation rules
for rule in self.rule_manager.rules:
if not rule.enabled:
continue
try:
valid, reason = rule.validate(ctx)
if not valid:
self.validation_stats["invalid"] += 1
# Cache result
self.cache.add(message.id, {
"valid": False,
"reason": reason,
"rule": rule.name
})
return False, reason
except Exception as e:
logger.error(f"Error in validation rule {rule.name}: {e}")
return False, f"Validation error: {str(e)}"
# Message passed all rules
self.validation_stats["valid"] += 1
self.cache.add(message.id, {
"valid": True,
"reason": None
})
return True, None
def get_stats(self) -> Dict[str, Any]:
"""Get validation statistics"""
return {
"validation_stats": self.validation_stats.copy(),
"rules": [
{
"name": rule.name,
"description": rule.description,
"enabled": rule.enabled,
"priority": rule.priority
}
for rule in self.rule_manager.rules
]
}
def clear_cache(self, message_id: Optional[int] = None) -> None:
"""Clear validation cache"""
if message_id:
self.cache._cache.pop(message_id, None)
self.cache._access_times.pop(message_id, None)
else:
self.cache = ValidationCache(self.cache.max_size)

View File

@@ -0,0 +1,237 @@
"""Queue processing functionality for video processing"""
import logging
import asyncio
from enum import Enum
from dataclasses import dataclass
from typing import List, Optional, Dict, Any, Set
from datetime import datetime
import discord
from .reactions import REACTIONS
logger = logging.getLogger("VideoArchiver")
class QueuePriority(Enum):
"""Queue item priorities"""
HIGH = 0
NORMAL = 1
LOW = 2
@dataclass
class QueueItem:
"""Represents an item in the processing queue"""
url: str
message_id: int
channel_id: int
guild_id: int
author_id: int
priority: QueuePriority
added_at: datetime
metadata: Optional[Dict[str, Any]] = None
attempts: int = 0
last_attempt: Optional[datetime] = None
error: Optional[str] = None
class ProcessingStrategy(Enum):
"""Available processing strategies"""
FIFO = "fifo" # First in, first out
PRIORITY = "priority" # Process by priority
SMART = "smart" # Smart processing based on various factors
class QueueMetrics:
"""Tracks queue processing metrics"""
def __init__(self):
self.total_processed = 0
self.successful = 0
self.failed = 0
self.processing_times: List[float] = []
self.errors: Dict[str, int] = {}
self.last_processed: Optional[datetime] = None
def record_success(self, processing_time: float) -> None:
"""Record successful processing"""
self.total_processed += 1
self.successful += 1
self.processing_times.append(processing_time)
self.last_processed = datetime.utcnow()
def record_failure(self, error: str) -> None:
"""Record processing failure"""
self.total_processed += 1
self.failed += 1
self.errors[error] = self.errors.get(error, 0) + 1
self.last_processed = datetime.utcnow()
def get_stats(self) -> Dict[str, Any]:
"""Get queue metrics"""
avg_time = (
sum(self.processing_times) / len(self.processing_times)
if self.processing_times
else 0
)
return {
"total_processed": self.total_processed,
"successful": self.successful,
"failed": self.failed,
"success_rate": (
self.successful / self.total_processed
if self.total_processed > 0
else 0
),
"average_processing_time": avg_time,
"error_counts": self.errors.copy(),
"last_processed": self.last_processed
}
class QueueProcessor:
"""Handles adding videos to the processing queue"""
def __init__(
self,
queue_manager,
strategy: ProcessingStrategy = ProcessingStrategy.SMART,
max_retries: int = 3
):
self.queue_manager = queue_manager
self.strategy = strategy
self.max_retries = max_retries
self.metrics = QueueMetrics()
self._processing: Set[str] = set()
self._processing_lock = asyncio.Lock()
async def process_urls(
self,
message: discord.Message,
urls: List[str],
priority: QueuePriority = QueuePriority.NORMAL
) -> None:
"""Process extracted URLs by adding them to the queue"""
for url in urls:
try:
logger.info(f"Adding URL to queue: {url}")
await message.add_reaction(REACTIONS['queued'])
# Create queue item
item = QueueItem(
url=url,
message_id=message.id,
channel_id=message.channel.id,
guild_id=message.guild.id,
author_id=message.author.id,
priority=priority,
added_at=datetime.utcnow()
)
# Add to queue with appropriate strategy
await self._add_to_queue(item)
logger.info(f"Successfully added video to queue: {url}")
except Exception as e:
logger.error(f"Failed to add video to queue: {str(e)}")
await message.add_reaction(REACTIONS['error'])
continue
async def _add_to_queue(self, item: QueueItem) -> None:
"""Add item to queue using current strategy"""
async with self._processing_lock:
if item.url in self._processing:
logger.debug(f"URL already being processed: {item.url}")
return
self._processing.add(item.url)
try:
# Apply processing strategy
if self.strategy == ProcessingStrategy.PRIORITY:
await self._add_with_priority(item)
elif self.strategy == ProcessingStrategy.SMART:
await self._add_with_smart_strategy(item)
else: # FIFO
await self._add_fifo(item)
finally:
async with self._processing_lock:
self._processing.remove(item.url)
async def _add_with_priority(self, item: QueueItem) -> None:
"""Add item with priority handling"""
await self.queue_manager.add_to_queue(
url=item.url,
message_id=item.message_id,
channel_id=item.channel_id,
guild_id=item.guild_id,
author_id=item.author_id,
priority=item.priority.value
)
async def _add_with_smart_strategy(self, item: QueueItem) -> None:
"""Add item using smart processing strategy"""
# Calculate priority based on various factors
priority = await self._calculate_smart_priority(item)
await self.queue_manager.add_to_queue(
url=item.url,
message_id=item.message_id,
channel_id=item.channel_id,
guild_id=item.guild_id,
author_id=item.author_id,
priority=priority
)
async def _add_fifo(self, item: QueueItem) -> None:
"""Add item using FIFO strategy"""
await self.queue_manager.add_to_queue(
url=item.url,
message_id=item.message_id,
channel_id=item.channel_id,
guild_id=item.guild_id,
author_id=item.author_id,
priority=QueuePriority.NORMAL.value
)
async def _calculate_smart_priority(self, item: QueueItem) -> int:
"""Calculate priority using smart strategy"""
base_priority = item.priority.value
# Adjust based on queue metrics
stats = self.metrics.get_stats()
if stats["total_processed"] > 0:
# Boost priority if queue is processing efficiently
if stats["success_rate"] > 0.9: # 90% success rate
base_priority -= 1
# Lower priority if having issues
elif stats["success_rate"] < 0.5: # 50% success rate
base_priority += 1
# Adjust based on retries
if item.attempts > 0:
base_priority += item.attempts
# Ensure priority stays in valid range
return max(0, min(base_priority, len(QueuePriority) - 1))
async def format_archive_message(
self,
author: Optional[discord.Member],
channel: discord.TextChannel,
url: str
) -> str:
"""Format message for archive channel"""
author_mention = author.mention if author else "Unknown User"
channel_mention = channel.mention if channel else "Unknown Channel"
return (
f"Video archived from {author_mention} in {channel_mention}\n"
f"Original URL: {url}"
)
def get_metrics(self) -> Dict[str, Any]:
"""Get queue processing metrics"""
return {
"metrics": self.metrics.get_stats(),
"strategy": self.strategy.value,
"active_processing": len(self._processing),
"max_retries": self.max_retries
}

View File

@@ -0,0 +1,316 @@
"""Module for handling queue status display and formatting"""
import discord
from enum import Enum
from dataclasses import dataclass
from datetime import datetime
from typing import Dict, Any, List, Optional
import logging
logger = logging.getLogger("VideoArchiver")
class DisplayTheme:
"""Defines display themes"""
DEFAULT = {
"title_color": discord.Color.blue(),
"success_color": discord.Color.green(),
"warning_color": discord.Color.gold(),
"error_color": discord.Color.red(),
"info_color": discord.Color.blurple()
}
@dataclass
class DisplayTemplate:
"""Template for status display sections"""
name: str
format_string: str
inline: bool = False
order: int = 0
condition: Optional[str] = None
class DisplaySection(Enum):
"""Available display sections"""
QUEUE_STATS = "queue_stats"
DOWNLOADS = "downloads"
COMPRESSIONS = "compressions"
ERRORS = "errors"
HARDWARE = "hardware"
class StatusFormatter:
"""Formats status information for display"""
@staticmethod
def format_bytes(bytes: int) -> str:
"""Format bytes into human readable format"""
for unit in ['B', 'KB', 'MB', 'GB']:
if bytes < 1024:
return f"{bytes:.1f}{unit}"
bytes /= 1024
return f"{bytes:.1f}TB"
@staticmethod
def format_time(seconds: float) -> str:
"""Format time duration"""
if seconds < 60:
return f"{seconds:.1f}s"
minutes = seconds / 60
if minutes < 60:
return f"{minutes:.1f}m"
hours = minutes / 60
return f"{hours:.1f}h"
@staticmethod
def format_percentage(value: float) -> str:
"""Format percentage value"""
return f"{value:.1f}%"
class DisplayManager:
"""Manages status display configuration"""
def __init__(self):
self.templates: Dict[DisplaySection, DisplayTemplate] = {
DisplaySection.QUEUE_STATS: DisplayTemplate(
name="Queue Statistics",
format_string=(
"```\n"
"Pending: {pending}\n"
"Processing: {processing}\n"
"Completed: {completed}\n"
"Failed: {failed}\n"
"Success Rate: {success_rate}\n"
"Avg Processing Time: {avg_processing_time}\n"
"```"
),
order=1
),
DisplaySection.DOWNLOADS: DisplayTemplate(
name="Active Downloads",
format_string=(
"```\n"
"URL: {url}\n"
"Progress: {percent}\n"
"Speed: {speed}\n"
"ETA: {eta}\n"
"Size: {size}\n"
"Started: {start_time}\n"
"Retries: {retries}\n"
"```"
),
order=2
),
DisplaySection.COMPRESSIONS: DisplayTemplate(
name="Active Compressions",
format_string=(
"```\n"
"File: {filename}\n"
"Progress: {percent}\n"
"Time Elapsed: {elapsed_time}\n"
"Input Size: {input_size}\n"
"Current Size: {current_size}\n"
"Target Size: {target_size}\n"
"Codec: {codec}\n"
"Hardware Accel: {hardware_accel}\n"
"```"
),
order=3
),
DisplaySection.ERRORS: DisplayTemplate(
name="Error Statistics",
format_string="```\n{error_stats}```",
condition="has_errors",
order=4
),
DisplaySection.HARDWARE: DisplayTemplate(
name="Hardware Statistics",
format_string=(
"```\n"
"Hardware Accel Failures: {hw_failures}\n"
"Compression Failures: {comp_failures}\n"
"Peak Memory Usage: {memory_usage}\n"
"```"
),
order=5
)
}
self.theme = DisplayTheme.DEFAULT
class StatusDisplay:
"""Handles formatting and display of queue status information"""
def __init__(self):
self.display_manager = DisplayManager()
self.formatter = StatusFormatter()
async def create_queue_status_embed(
self,
queue_status: Dict[str, Any],
active_ops: Dict[str, Any]
) -> discord.Embed:
"""Create an embed displaying queue status and active operations"""
embed = discord.Embed(
title="Queue Status Details",
color=self.display_manager.theme["title_color"],
timestamp=datetime.utcnow()
)
# Add sections in order
sections = sorted(
self.display_manager.templates.items(),
key=lambda x: x[1].order
)
for section, template in sections:
# Check condition if exists
if template.condition:
if not self._check_condition(template.condition, queue_status, active_ops):
continue
# Add section based on type
if section == DisplaySection.QUEUE_STATS:
self._add_queue_statistics(embed, queue_status, template)
elif section == DisplaySection.DOWNLOADS:
self._add_active_downloads(embed, active_ops.get('downloads', {}), template)
elif section == DisplaySection.COMPRESSIONS:
self._add_active_compressions(embed, active_ops.get('compressions', {}), template)
elif section == DisplaySection.ERRORS:
self._add_error_statistics(embed, queue_status, template)
elif section == DisplaySection.HARDWARE:
self._add_hardware_statistics(embed, queue_status, template)
return embed
def _check_condition(
self,
condition: str,
queue_status: Dict[str, Any],
active_ops: Dict[str, Any]
) -> bool:
"""Check if condition for displaying section is met"""
if condition == "has_errors":
return bool(queue_status["metrics"]["errors_by_type"])
return True
def _add_queue_statistics(
self,
embed: discord.Embed,
queue_status: Dict[str, Any],
template: DisplayTemplate
) -> None:
"""Add queue statistics to the embed"""
embed.add_field(
name=template.name,
value=template.format_string.format(
pending=queue_status['pending'],
processing=queue_status['processing'],
completed=queue_status['completed'],
failed=queue_status['failed'],
success_rate=self.formatter.format_percentage(
queue_status['metrics']['success_rate'] * 100
),
avg_processing_time=self.formatter.format_time(
queue_status['metrics']['avg_processing_time']
)
),
inline=template.inline
)
def _add_active_downloads(
self,
embed: discord.Embed,
downloads: Dict[str, Any],
template: DisplayTemplate
) -> None:
"""Add active downloads information to the embed"""
if downloads:
content = []
for url, progress in downloads.items():
content.append(template.format_string.format(
url=url[:50] + "..." if len(url) > 50 else url,
percent=self.formatter.format_percentage(progress.get('percent', 0)),
speed=progress.get('speed', 'N/A'),
eta=progress.get('eta', 'N/A'),
size=f"{self.formatter.format_bytes(progress.get('downloaded_bytes', 0))}/"
f"{self.formatter.format_bytes(progress.get('total_bytes', 0))}",
start_time=progress.get('start_time', 'N/A'),
retries=progress.get('retries', 0)
))
embed.add_field(
name=template.name,
value="".join(content),
inline=template.inline
)
else:
embed.add_field(
name=template.name,
value="```\nNo active downloads```",
inline=template.inline
)
def _add_active_compressions(
self,
embed: discord.Embed,
compressions: Dict[str, Any],
template: DisplayTemplate
) -> None:
"""Add active compressions information to the embed"""
if compressions:
content = []
for file_id, progress in compressions.items():
content.append(template.format_string.format(
filename=progress.get('filename', 'Unknown'),
percent=self.formatter.format_percentage(progress.get('percent', 0)),
elapsed_time=progress.get('elapsed_time', 'N/A'),
input_size=self.formatter.format_bytes(progress.get('input_size', 0)),
current_size=self.formatter.format_bytes(progress.get('current_size', 0)),
target_size=self.formatter.format_bytes(progress.get('target_size', 0)),
codec=progress.get('codec', 'Unknown'),
hardware_accel=progress.get('hardware_accel', False)
))
embed.add_field(
name=template.name,
value="".join(content),
inline=template.inline
)
else:
embed.add_field(
name=template.name,
value="```\nNo active compressions```",
inline=template.inline
)
def _add_error_statistics(
self,
embed: discord.Embed,
queue_status: Dict[str, Any],
template: DisplayTemplate
) -> None:
"""Add error statistics to the embed"""
if queue_status["metrics"]["errors_by_type"]:
error_stats = "\n".join(
f"{error_type}: {count}"
for error_type, count in queue_status["metrics"]["errors_by_type"].items()
)
embed.add_field(
name=template.name,
value=template.format_string.format(error_stats=error_stats),
inline=template.inline
)
def _add_hardware_statistics(
self,
embed: discord.Embed,
queue_status: Dict[str, Any],
template: DisplayTemplate
) -> None:
"""Add hardware statistics to the embed"""
embed.add_field(
name=template.name,
value=template.format_string.format(
hw_failures=queue_status['metrics']['hardware_accel_failures'],
comp_failures=queue_status['metrics']['compression_failures'],
memory_usage=self.formatter.format_bytes(
queue_status['metrics']['peak_memory_usage'] * 1024 * 1024 # Convert MB to bytes
)
),
inline=template.inline
)

View File

@@ -0,0 +1,264 @@
"""URL extraction functionality for video processing"""
import logging
import re
from enum import Enum
from dataclasses import dataclass
from typing import List, Dict, Optional, Set, Pattern
import discord
from urllib.parse import urlparse, parse_qs
logger = logging.getLogger("VideoArchiver")
@dataclass
class URLPattern:
"""Defines a URL pattern for a video site"""
site: str
pattern: Pattern
requires_api: bool = False
supports_timestamp: bool = False
supports_playlist: bool = False
@dataclass
class URLMetadata:
"""Metadata about an extracted URL"""
url: str
site: str
timestamp: Optional[int] = None
playlist_id: Optional[str] = None
video_id: Optional[str] = None
quality: Optional[str] = None
class URLType(Enum):
"""Types of video URLs"""
DIRECT = "direct"
PLATFORM = "platform"
UNKNOWN = "unknown"
class URLPatternManager:
"""Manages URL patterns for different video sites"""
def __init__(self):
self.patterns: Dict[str, URLPattern] = {
"youtube": URLPattern(
site="youtube",
pattern=re.compile(
r'(?:https?://)?(?:www\.)?'
r'(?:youtube\.com/watch\?v=|youtu\.be/)'
r'([a-zA-Z0-9_-]{11})'
),
supports_timestamp=True,
supports_playlist=True
),
"vimeo": URLPattern(
site="vimeo",
pattern=re.compile(
r'(?:https?://)?(?:www\.)?'
r'vimeo\.com/(?:channels/(?:\w+/)?|groups/(?:[^/]*/)*|)'
r'(\d+)(?:|/\w+)*'
),
supports_timestamp=True
),
"twitter": URLPattern(
site="twitter",
pattern=re.compile(
r'(?:https?://)?(?:www\.)?'
r'(?:twitter\.com|x\.com)/\w+/status/(\d+)'
),
requires_api=True
),
# Add more patterns as needed
}
self.direct_extensions = {'.mp4', '.mov', '.avi', '.webm', '.mkv'}
def get_pattern(self, site: str) -> Optional[URLPattern]:
"""Get pattern for a site"""
return self.patterns.get(site.lower())
def is_supported_site(self, url: str, enabled_sites: Optional[List[str]]) -> bool:
"""Check if URL is from a supported site"""
if not enabled_sites:
return True
parsed = urlparse(url.lower())
domain = parsed.netloc.replace('www.', '')
return any(site.lower() in domain for site in enabled_sites)
class URLValidator:
"""Validates extracted URLs"""
def __init__(self, pattern_manager: URLPatternManager):
self.pattern_manager = pattern_manager
def get_url_type(self, url: str) -> URLType:
"""Determine URL type"""
parsed = urlparse(url)
if any(parsed.path.lower().endswith(ext) for ext in self.pattern_manager.direct_extensions):
return URLType.DIRECT
if any(pattern.pattern.match(url) for pattern in self.pattern_manager.patterns.values()):
return URLType.PLATFORM
return URLType.UNKNOWN
def is_valid_url(self, url: str) -> bool:
"""Validate URL format"""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except Exception:
return False
class URLMetadataExtractor:
"""Extracts metadata from URLs"""
def __init__(self, pattern_manager: URLPatternManager):
self.pattern_manager = pattern_manager
def extract_metadata(self, url: str) -> Optional[URLMetadata]:
"""Extract metadata from URL"""
try:
parsed = urlparse(url)
# Handle direct video URLs
if any(parsed.path.lower().endswith(ext) for ext in self.pattern_manager.direct_extensions):
return URLMetadata(url=url, site="direct")
# Handle platform URLs
for site, pattern in self.pattern_manager.patterns.items():
if match := pattern.pattern.match(url):
metadata = URLMetadata(
url=url,
site=site,
video_id=match.group(1)
)
# Extract additional metadata
if pattern.supports_timestamp:
metadata.timestamp = self._extract_timestamp(parsed)
if pattern.supports_playlist:
metadata.playlist_id = self._extract_playlist_id(parsed)
return metadata
return None
except Exception as e:
logger.error(f"Error extracting metadata from URL {url}: {e}")
return None
def _extract_timestamp(self, parsed_url: urlparse) -> Optional[int]:
"""Extract timestamp from URL"""
try:
params = parse_qs(parsed_url.query)
if 't' in params:
return int(params['t'][0])
return None
except Exception:
return None
def _extract_playlist_id(self, parsed_url: urlparse) -> Optional[str]:
"""Extract playlist ID from URL"""
try:
params = parse_qs(parsed_url.query)
if 'list' in params:
return params['list'][0]
return None
except Exception:
return None
class URLExtractor:
"""Handles extraction of video URLs from messages"""
def __init__(self):
self.pattern_manager = URLPatternManager()
self.validator = URLValidator(self.pattern_manager)
self.metadata_extractor = URLMetadataExtractor(self.pattern_manager)
self._url_cache: Dict[str, Set[str]] = {}
async def extract_urls(
self,
message: discord.Message,
enabled_sites: Optional[List[str]] = None
) -> List[URLMetadata]:
"""Extract video URLs from message content and attachments"""
urls = []
# Check cache
cache_key = f"{message.id}_{'-'.join(enabled_sites) if enabled_sites else 'all'}"
if cache_key in self._url_cache:
return [
self.metadata_extractor.extract_metadata(url)
for url in self._url_cache[cache_key]
if url # Filter out None values
]
# Extract URLs
content_urls = await self._extract_from_content(message.content, enabled_sites)
attachment_urls = await self._extract_from_attachments(message.attachments)
# Process all URLs
all_urls = content_urls + attachment_urls
valid_urls = []
for url in all_urls:
if not self.validator.is_valid_url(url):
logger.debug(f"Invalid URL format: {url}")
continue
if not self.pattern_manager.is_supported_site(url, enabled_sites):
logger.debug(f"URL {url} doesn't match any enabled sites")
continue
metadata = self.metadata_extractor.extract_metadata(url)
if metadata:
urls.append(metadata)
valid_urls.append(url)
else:
logger.debug(f"Could not extract metadata from URL: {url}")
# Update cache
self._url_cache[cache_key] = set(valid_urls)
return urls
async def _extract_from_content(
self,
content: str,
enabled_sites: Optional[List[str]]
) -> List[str]:
"""Extract video URLs from message content"""
if not content:
return []
urls = []
for word in content.split():
if self.validator.get_url_type(word) != URLType.UNKNOWN:
urls.append(word)
return urls
async def _extract_from_attachments(
self,
attachments: List[discord.Attachment]
) -> List[str]:
"""Extract video URLs from message attachments"""
return [
attachment.url
for attachment in attachments
if any(
attachment.filename.lower().endswith(ext)
for ext in self.pattern_manager.direct_extensions
)
]
def clear_cache(self, message_id: Optional[int] = None) -> None:
"""Clear URL cache"""
if message_id:
keys_to_remove = [
key for key in self._url_cache
if key.startswith(f"{message_id}_")
]
for key in keys_to_remove:
self._url_cache.pop(key, None)
else:
self._url_cache.clear()