mirror of
https://github.com/pacnpal/Pac-cogs.git
synced 2025-12-20 10:51:05 -05:00
Core Systems:
Component-based architecture with lifecycle management Enhanced error handling and recovery mechanisms Comprehensive state management and tracking Event-driven architecture with monitoring Queue Management: Multiple processing strategies for different scenarios Advanced state management with recovery Comprehensive metrics and health monitoring Sophisticated cleanup system with multiple strategies Processing Pipeline: Enhanced message handling with validation Improved URL extraction and processing Better queue management and monitoring Advanced cleanup mechanisms Overall Benefits: Better code organization and maintainability Improved error handling and recovery Enhanced monitoring and reporting More robust and reliable system
This commit is contained in:
252
videoarchiver/processor/cleanup_manager.py
Normal file
252
videoarchiver/processor/cleanup_manager.py
Normal file
@@ -0,0 +1,252 @@
|
||||
"""Module for managing cleanup operations in the video processor"""
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Dict, Any, List, Set
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class CleanupStage(Enum):
|
||||
"""Cleanup stages"""
|
||||
QUEUE = "queue"
|
||||
FFMPEG = "ffmpeg"
|
||||
TASKS = "tasks"
|
||||
RESOURCES = "resources"
|
||||
|
||||
class CleanupStrategy(Enum):
|
||||
"""Cleanup strategies"""
|
||||
NORMAL = "normal"
|
||||
FORCE = "force"
|
||||
GRACEFUL = "graceful"
|
||||
|
||||
@dataclass
|
||||
class CleanupResult:
|
||||
"""Result of a cleanup operation"""
|
||||
success: bool
|
||||
stage: CleanupStage
|
||||
error: Optional[str] = None
|
||||
duration: float = 0.0
|
||||
|
||||
class CleanupTracker:
|
||||
"""Tracks cleanup operations"""
|
||||
|
||||
def __init__(self):
|
||||
self.cleanup_history: List[Dict[str, Any]] = []
|
||||
self.active_cleanups: Set[str] = set()
|
||||
self.start_times: Dict[str, datetime] = {}
|
||||
self.stage_results: Dict[str, List[CleanupResult]] = {}
|
||||
|
||||
def start_cleanup(self, cleanup_id: str) -> None:
|
||||
"""Start tracking a cleanup operation"""
|
||||
self.active_cleanups.add(cleanup_id)
|
||||
self.start_times[cleanup_id] = datetime.utcnow()
|
||||
self.stage_results[cleanup_id] = []
|
||||
|
||||
def record_stage_result(
|
||||
self,
|
||||
cleanup_id: str,
|
||||
result: CleanupResult
|
||||
) -> None:
|
||||
"""Record result of a cleanup stage"""
|
||||
if cleanup_id in self.stage_results:
|
||||
self.stage_results[cleanup_id].append(result)
|
||||
|
||||
def end_cleanup(self, cleanup_id: str) -> None:
|
||||
"""End tracking a cleanup operation"""
|
||||
if cleanup_id in self.active_cleanups:
|
||||
end_time = datetime.utcnow()
|
||||
self.cleanup_history.append({
|
||||
"id": cleanup_id,
|
||||
"start_time": self.start_times[cleanup_id],
|
||||
"end_time": end_time,
|
||||
"duration": (end_time - self.start_times[cleanup_id]).total_seconds(),
|
||||
"results": self.stage_results[cleanup_id]
|
||||
})
|
||||
self.active_cleanups.remove(cleanup_id)
|
||||
self.start_times.pop(cleanup_id)
|
||||
self.stage_results.pop(cleanup_id)
|
||||
|
||||
def get_cleanup_stats(self) -> Dict[str, Any]:
|
||||
"""Get cleanup statistics"""
|
||||
return {
|
||||
"total_cleanups": len(self.cleanup_history),
|
||||
"active_cleanups": len(self.active_cleanups),
|
||||
"success_rate": self._calculate_success_rate(),
|
||||
"average_duration": self._calculate_average_duration(),
|
||||
"stage_success_rates": self._calculate_stage_success_rates()
|
||||
}
|
||||
|
||||
def _calculate_success_rate(self) -> float:
|
||||
"""Calculate overall cleanup success rate"""
|
||||
if not self.cleanup_history:
|
||||
return 1.0
|
||||
successful = sum(
|
||||
1 for cleanup in self.cleanup_history
|
||||
if all(result.success for result in cleanup["results"])
|
||||
)
|
||||
return successful / len(self.cleanup_history)
|
||||
|
||||
def _calculate_average_duration(self) -> float:
|
||||
"""Calculate average cleanup duration"""
|
||||
if not self.cleanup_history:
|
||||
return 0.0
|
||||
total_duration = sum(cleanup["duration"] for cleanup in self.cleanup_history)
|
||||
return total_duration / len(self.cleanup_history)
|
||||
|
||||
def _calculate_stage_success_rates(self) -> Dict[str, float]:
|
||||
"""Calculate success rates by stage"""
|
||||
stage_attempts: Dict[str, int] = {}
|
||||
stage_successes: Dict[str, int] = {}
|
||||
|
||||
for cleanup in self.cleanup_history:
|
||||
for result in cleanup["results"]:
|
||||
stage = result.stage.value
|
||||
stage_attempts[stage] = stage_attempts.get(stage, 0) + 1
|
||||
if result.success:
|
||||
stage_successes[stage] = stage_successes.get(stage, 0) + 1
|
||||
|
||||
return {
|
||||
stage: stage_successes.get(stage, 0) / attempts
|
||||
for stage, attempts in stage_attempts.items()
|
||||
}
|
||||
|
||||
class CleanupManager:
|
||||
"""Manages cleanup operations for the video processor"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
queue_handler,
|
||||
ffmpeg_mgr: Optional[object] = None,
|
||||
strategy: CleanupStrategy = CleanupStrategy.NORMAL
|
||||
):
|
||||
self.queue_handler = queue_handler
|
||||
self.ffmpeg_mgr = ffmpeg_mgr
|
||||
self.strategy = strategy
|
||||
self._queue_task: Optional[asyncio.Task] = None
|
||||
self.tracker = CleanupTracker()
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
"""Perform normal cleanup of resources"""
|
||||
cleanup_id = f"cleanup_{datetime.utcnow().timestamp()}"
|
||||
self.tracker.start_cleanup(cleanup_id)
|
||||
|
||||
try:
|
||||
logger.info("Starting normal cleanup...")
|
||||
|
||||
# Clean up in stages
|
||||
stages = [
|
||||
(CleanupStage.QUEUE, self._cleanup_queue),
|
||||
(CleanupStage.FFMPEG, self._cleanup_ffmpeg),
|
||||
(CleanupStage.TASKS, self._cleanup_tasks)
|
||||
]
|
||||
|
||||
for stage, cleanup_func in stages:
|
||||
try:
|
||||
start_time = datetime.utcnow()
|
||||
await cleanup_func()
|
||||
duration = (datetime.utcnow() - start_time).total_seconds()
|
||||
self.tracker.record_stage_result(
|
||||
cleanup_id,
|
||||
CleanupResult(True, stage, duration=duration)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in {stage.value} cleanup: {e}")
|
||||
self.tracker.record_stage_result(
|
||||
cleanup_id,
|
||||
CleanupResult(False, stage, str(e))
|
||||
)
|
||||
if self.strategy != CleanupStrategy.GRACEFUL:
|
||||
raise
|
||||
|
||||
logger.info("Normal cleanup completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during normal cleanup: {str(e)}", exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
self.tracker.end_cleanup(cleanup_id)
|
||||
|
||||
async def force_cleanup(self) -> None:
|
||||
"""Force cleanup of resources when normal cleanup fails"""
|
||||
cleanup_id = f"force_cleanup_{datetime.utcnow().timestamp()}"
|
||||
self.tracker.start_cleanup(cleanup_id)
|
||||
|
||||
try:
|
||||
logger.info("Starting force cleanup...")
|
||||
|
||||
# Force cleanup in stages
|
||||
stages = [
|
||||
(CleanupStage.QUEUE, self._force_cleanup_queue),
|
||||
(CleanupStage.FFMPEG, self._force_cleanup_ffmpeg),
|
||||
(CleanupStage.TASKS, self._force_cleanup_tasks)
|
||||
]
|
||||
|
||||
for stage, cleanup_func in stages:
|
||||
try:
|
||||
start_time = datetime.utcnow()
|
||||
await cleanup_func()
|
||||
duration = (datetime.utcnow() - start_time).total_seconds()
|
||||
self.tracker.record_stage_result(
|
||||
cleanup_id,
|
||||
CleanupResult(True, stage, duration=duration)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in force {stage.value} cleanup: {e}")
|
||||
self.tracker.record_stage_result(
|
||||
cleanup_id,
|
||||
CleanupResult(False, stage, str(e))
|
||||
)
|
||||
|
||||
logger.info("Force cleanup completed")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during force cleanup: {str(e)}", exc_info=True)
|
||||
finally:
|
||||
self.tracker.end_cleanup(cleanup_id)
|
||||
|
||||
async def _cleanup_queue(self) -> None:
|
||||
"""Clean up queue handler"""
|
||||
await self.queue_handler.cleanup()
|
||||
|
||||
async def _cleanup_ffmpeg(self) -> None:
|
||||
"""Clean up FFmpeg manager"""
|
||||
if self.ffmpeg_mgr:
|
||||
self.ffmpeg_mgr.kill_all_processes()
|
||||
|
||||
async def _cleanup_tasks(self) -> None:
|
||||
"""Clean up tasks"""
|
||||
if self._queue_task and not self._queue_task.done():
|
||||
self._queue_task.cancel()
|
||||
try:
|
||||
await self._queue_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
async def _force_cleanup_queue(self) -> None:
|
||||
"""Force clean up queue handler"""
|
||||
await self.queue_handler.force_cleanup()
|
||||
|
||||
async def _force_cleanup_ffmpeg(self) -> None:
|
||||
"""Force clean up FFmpeg manager"""
|
||||
if self.ffmpeg_mgr:
|
||||
self.ffmpeg_mgr.kill_all_processes()
|
||||
|
||||
async def _force_cleanup_tasks(self) -> None:
|
||||
"""Force clean up tasks"""
|
||||
if self._queue_task and not self._queue_task.done():
|
||||
self._queue_task.cancel()
|
||||
|
||||
def set_queue_task(self, task: asyncio.Task) -> None:
|
||||
"""Set the queue processing task for cleanup purposes"""
|
||||
self._queue_task = task
|
||||
|
||||
def get_cleanup_stats(self) -> Dict[str, Any]:
|
||||
"""Get cleanup statistics"""
|
||||
return {
|
||||
"stats": self.tracker.get_cleanup_stats(),
|
||||
"strategy": self.strategy.value,
|
||||
"active_cleanups": len(self.tracker.active_cleanups)
|
||||
}
|
||||
@@ -2,19 +2,151 @@
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from enum import Enum
|
||||
from typing import Optional, Tuple, Dict, Any
|
||||
from datetime import datetime
|
||||
import discord
|
||||
from discord.ext import commands
|
||||
from discord import app_commands
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
|
||||
from .message_handler import MessageHandler
|
||||
from .queue_handler import QueueHandler
|
||||
from .progress_tracker import ProgressTracker
|
||||
from .status_display import StatusDisplay
|
||||
from .cleanup_manager import CleanupManager
|
||||
from .reactions import REACTIONS
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class ProcessorState(Enum):
|
||||
"""Possible states of the video processor"""
|
||||
INITIALIZING = "initializing"
|
||||
READY = "ready"
|
||||
PROCESSING = "processing"
|
||||
PAUSED = "paused"
|
||||
ERROR = "error"
|
||||
SHUTDOWN = "shutdown"
|
||||
|
||||
class OperationType(Enum):
|
||||
"""Types of processor operations"""
|
||||
MESSAGE_PROCESSING = "message_processing"
|
||||
VIDEO_PROCESSING = "video_processing"
|
||||
QUEUE_MANAGEMENT = "queue_management"
|
||||
CLEANUP = "cleanup"
|
||||
|
||||
class OperationTracker:
|
||||
"""Tracks processor operations"""
|
||||
|
||||
def __init__(self):
|
||||
self.operations: Dict[str, Dict[str, Any]] = {}
|
||||
self.operation_history: List[Dict[str, Any]] = []
|
||||
self.error_count = 0
|
||||
self.success_count = 0
|
||||
|
||||
def start_operation(
|
||||
self,
|
||||
op_type: OperationType,
|
||||
details: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Start tracking an operation"""
|
||||
op_id = f"{op_type.value}_{datetime.utcnow().timestamp()}"
|
||||
self.operations[op_id] = {
|
||||
"type": op_type.value,
|
||||
"start_time": datetime.utcnow(),
|
||||
"status": "running",
|
||||
"details": details
|
||||
}
|
||||
return op_id
|
||||
|
||||
def end_operation(
|
||||
self,
|
||||
op_id: str,
|
||||
success: bool,
|
||||
error: Optional[str] = None
|
||||
) -> None:
|
||||
"""End tracking an operation"""
|
||||
if op_id in self.operations:
|
||||
self.operations[op_id].update({
|
||||
"end_time": datetime.utcnow(),
|
||||
"status": "success" if success else "error",
|
||||
"error": error
|
||||
})
|
||||
# Move to history
|
||||
self.operation_history.append(self.operations.pop(op_id))
|
||||
# Update counts
|
||||
if success:
|
||||
self.success_count += 1
|
||||
else:
|
||||
self.error_count += 1
|
||||
|
||||
def get_active_operations(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Get currently active operations"""
|
||||
return self.operations.copy()
|
||||
|
||||
def get_operation_stats(self) -> Dict[str, Any]:
|
||||
"""Get operation statistics"""
|
||||
return {
|
||||
"total_operations": len(self.operation_history) + len(self.operations),
|
||||
"active_operations": len(self.operations),
|
||||
"success_count": self.success_count,
|
||||
"error_count": self.error_count,
|
||||
"success_rate": (
|
||||
self.success_count / (self.success_count + self.error_count)
|
||||
if (self.success_count + self.error_count) > 0
|
||||
else 0
|
||||
)
|
||||
}
|
||||
|
||||
class HealthMonitor:
|
||||
"""Monitors processor health"""
|
||||
|
||||
def __init__(self, processor: 'VideoProcessor'):
|
||||
self.processor = processor
|
||||
self.last_check: Optional[datetime] = None
|
||||
self.health_status: Dict[str, bool] = {}
|
||||
self._monitor_task: Optional[asyncio.Task] = None
|
||||
|
||||
async def start_monitoring(self) -> None:
|
||||
"""Start health monitoring"""
|
||||
self._monitor_task = asyncio.create_task(self._monitor_health())
|
||||
|
||||
async def stop_monitoring(self) -> None:
|
||||
"""Stop health monitoring"""
|
||||
if self._monitor_task:
|
||||
self._monitor_task.cancel()
|
||||
try:
|
||||
await self._monitor_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
async def _monitor_health(self) -> None:
|
||||
"""Monitor processor health"""
|
||||
while True:
|
||||
try:
|
||||
self.last_check = datetime.utcnow()
|
||||
|
||||
# Check component health
|
||||
self.health_status.update({
|
||||
"queue_handler": self.processor.queue_handler.is_healthy(),
|
||||
"message_handler": self.processor.message_handler.is_healthy(),
|
||||
"progress_tracker": self.processor.progress_tracker.is_healthy()
|
||||
})
|
||||
|
||||
# Check operation health
|
||||
op_stats = self.processor.operation_tracker.get_operation_stats()
|
||||
self.health_status["operations"] = (
|
||||
op_stats["success_rate"] >= 0.9 # 90% success rate threshold
|
||||
)
|
||||
|
||||
await asyncio.sleep(60) # Check every minute
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Health monitoring error: {e}")
|
||||
await asyncio.sleep(30) # Shorter interval on error
|
||||
|
||||
def is_healthy(self) -> bool:
|
||||
"""Check if processor is healthy"""
|
||||
return all(self.health_status.values())
|
||||
|
||||
class VideoProcessor:
|
||||
"""Handles video processing operations"""
|
||||
|
||||
@@ -34,91 +166,101 @@ class VideoProcessor:
|
||||
self.db = db
|
||||
self.queue_manager = queue_manager
|
||||
|
||||
# Initialize state
|
||||
self.state = ProcessorState.INITIALIZING
|
||||
self.operation_tracker = OperationTracker()
|
||||
self.health_monitor = HealthMonitor(self)
|
||||
|
||||
# Initialize handlers
|
||||
self.queue_handler = QueueHandler(bot, config_manager, components)
|
||||
self.message_handler = MessageHandler(bot, config_manager, queue_manager)
|
||||
self.progress_tracker = ProgressTracker()
|
||||
self.cleanup_manager = CleanupManager(self.queue_handler, ffmpeg_mgr)
|
||||
|
||||
# Pass db to queue handler if it exists
|
||||
if self.db:
|
||||
self.queue_handler.db = self.db
|
||||
|
||||
# Store queue task reference but don't start processing here
|
||||
# Queue processing is managed by VideoArchiver class
|
||||
# Store queue task reference
|
||||
self._queue_task = None
|
||||
|
||||
# Mark as ready
|
||||
self.state = ProcessorState.READY
|
||||
logger.info("VideoProcessor initialized successfully")
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start processor operations"""
|
||||
await self.health_monitor.start_monitoring()
|
||||
|
||||
async def process_video(self, item) -> Tuple[bool, Optional[str]]:
|
||||
"""Process a video from the queue by delegating to queue handler"""
|
||||
return await self.queue_handler.process_video(item)
|
||||
"""Process a video from the queue"""
|
||||
op_id = self.operation_tracker.start_operation(
|
||||
OperationType.VIDEO_PROCESSING,
|
||||
{"item": str(item)}
|
||||
)
|
||||
|
||||
try:
|
||||
self.state = ProcessorState.PROCESSING
|
||||
result = await self.queue_handler.process_video(item)
|
||||
success = result[0]
|
||||
error = None if success else result[1]
|
||||
self.operation_tracker.end_operation(op_id, success, error)
|
||||
return result
|
||||
except Exception as e:
|
||||
self.operation_tracker.end_operation(op_id, False, str(e))
|
||||
raise
|
||||
finally:
|
||||
self.state = ProcessorState.READY
|
||||
|
||||
async def process_message(self, message: discord.Message) -> None:
|
||||
"""Process a message for video content"""
|
||||
await self.message_handler.process_message(message)
|
||||
|
||||
async def cleanup(self):
|
||||
"""Clean up resources and stop processing"""
|
||||
op_id = self.operation_tracker.start_operation(
|
||||
OperationType.MESSAGE_PROCESSING,
|
||||
{"message_id": message.id}
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info("Starting VideoProcessor cleanup...")
|
||||
|
||||
# Clean up queue handler
|
||||
try:
|
||||
await self.queue_handler.cleanup()
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up queue handler: {e}")
|
||||
|
||||
# Clean up FFmpeg manager
|
||||
if self.ffmpeg_mgr:
|
||||
try:
|
||||
self.ffmpeg_mgr.kill_all_processes()
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up FFmpeg manager: {e}")
|
||||
|
||||
# Cancel queue processing task if we have one
|
||||
if self._queue_task and not self._queue_task.done():
|
||||
self._queue_task.cancel()
|
||||
try:
|
||||
await self._queue_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.error(f"Error cancelling queue task: {e}")
|
||||
|
||||
logger.info("VideoProcessor cleanup completed successfully")
|
||||
|
||||
await self.message_handler.process_message(message)
|
||||
self.operation_tracker.end_operation(op_id, True)
|
||||
except Exception as e:
|
||||
logger.error(f"Error during VideoProcessor cleanup: {str(e)}", exc_info=True)
|
||||
self.operation_tracker.end_operation(op_id, False, str(e))
|
||||
raise
|
||||
|
||||
async def force_cleanup(self):
|
||||
"""Force cleanup of resources when normal cleanup fails"""
|
||||
async def cleanup(self) -> None:
|
||||
"""Clean up resources and stop processing"""
|
||||
op_id = self.operation_tracker.start_operation(
|
||||
OperationType.CLEANUP,
|
||||
{"type": "normal"}
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info("Starting force cleanup of VideoProcessor...")
|
||||
|
||||
# Force cleanup queue handler
|
||||
try:
|
||||
await self.queue_handler.force_cleanup()
|
||||
except Exception as e:
|
||||
logger.error(f"Error force cleaning queue handler: {e}")
|
||||
|
||||
# Force cleanup FFmpeg
|
||||
if self.ffmpeg_mgr:
|
||||
try:
|
||||
self.ffmpeg_mgr.kill_all_processes()
|
||||
except Exception as e:
|
||||
logger.error(f"Error force cleaning FFmpeg manager: {e}")
|
||||
|
||||
# Force cancel queue task
|
||||
if self._queue_task and not self._queue_task.done():
|
||||
self._queue_task.cancel()
|
||||
|
||||
logger.info("VideoProcessor force cleanup completed")
|
||||
|
||||
self.state = ProcessorState.SHUTDOWN
|
||||
await self.health_monitor.stop_monitoring()
|
||||
await self.cleanup_manager.cleanup()
|
||||
self.operation_tracker.end_operation(op_id, True)
|
||||
except Exception as e:
|
||||
logger.error(f"Error during VideoProcessor force cleanup: {str(e)}", exc_info=True)
|
||||
self.operation_tracker.end_operation(op_id, False, str(e))
|
||||
logger.error(f"Error during cleanup: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def show_queue_details(self, ctx: commands.Context):
|
||||
"""Display detailed queue status and progress information"""
|
||||
async def force_cleanup(self) -> None:
|
||||
"""Force cleanup of resources"""
|
||||
op_id = self.operation_tracker.start_operation(
|
||||
OperationType.CLEANUP,
|
||||
{"type": "force"}
|
||||
)
|
||||
|
||||
try:
|
||||
self.state = ProcessorState.SHUTDOWN
|
||||
await self.health_monitor.stop_monitoring()
|
||||
await self.cleanup_manager.force_cleanup()
|
||||
self.operation_tracker.end_operation(op_id, True)
|
||||
except Exception as e:
|
||||
self.operation_tracker.end_operation(op_id, False, str(e))
|
||||
raise
|
||||
|
||||
async def show_queue_details(self, ctx: commands.Context) -> None:
|
||||
"""Display detailed queue status"""
|
||||
try:
|
||||
if not self.queue_manager:
|
||||
await ctx.send("Queue manager is not initialized.")
|
||||
@@ -126,111 +268,37 @@ class VideoProcessor:
|
||||
|
||||
# Get queue status
|
||||
queue_status = self.queue_manager.get_queue_status(ctx.guild.id)
|
||||
|
||||
# Get active operations
|
||||
active_ops = self.operation_tracker.get_active_operations()
|
||||
|
||||
# Create embed for queue overview
|
||||
embed = discord.Embed(
|
||||
title="Queue Status Details",
|
||||
color=discord.Color.blue(),
|
||||
timestamp=datetime.utcnow(),
|
||||
# Create and send status embed
|
||||
embed = await StatusDisplay.create_queue_status_embed(
|
||||
queue_status,
|
||||
active_ops
|
||||
)
|
||||
|
||||
# Queue statistics
|
||||
embed.add_field(
|
||||
name="Queue Statistics",
|
||||
value=f"```\n"
|
||||
f"Pending: {queue_status['pending']}\n"
|
||||
f"Processing: {queue_status['processing']}\n"
|
||||
f"Completed: {queue_status['completed']}\n"
|
||||
f"Failed: {queue_status['failed']}\n"
|
||||
f"Success Rate: {queue_status['metrics']['success_rate']:.1%}\n"
|
||||
f"Avg Processing Time: {queue_status['metrics']['avg_processing_time']:.1f}s\n"
|
||||
f"```",
|
||||
inline=False,
|
||||
)
|
||||
|
||||
# Active operations
|
||||
active_ops = self.progress_tracker.get_active_operations()
|
||||
|
||||
# Active downloads
|
||||
downloads = active_ops['downloads']
|
||||
if downloads:
|
||||
active_downloads = ""
|
||||
for url, progress in downloads.items():
|
||||
active_downloads += (
|
||||
f"URL: {url[:50]}...\n"
|
||||
f"Progress: {progress.get('percent', 0):.1f}%\n"
|
||||
f"Speed: {progress.get('speed', 'N/A')}\n"
|
||||
f"ETA: {progress.get('eta', 'N/A')}\n"
|
||||
f"Size: {progress.get('downloaded_bytes', 0)}/{progress.get('total_bytes', 0)} bytes\n"
|
||||
f"Started: {progress.get('start_time', 'N/A')}\n"
|
||||
f"Retries: {progress.get('retries', 0)}\n"
|
||||
f"-------------------\n"
|
||||
)
|
||||
embed.add_field(
|
||||
name="Active Downloads",
|
||||
value=f"```\n{active_downloads}```",
|
||||
inline=False,
|
||||
)
|
||||
else:
|
||||
embed.add_field(
|
||||
name="Active Downloads",
|
||||
value="```\nNo active downloads```",
|
||||
inline=False,
|
||||
)
|
||||
|
||||
# Active compressions
|
||||
compressions = active_ops['compressions']
|
||||
if compressions:
|
||||
active_compressions = ""
|
||||
for file_id, progress in compressions.items():
|
||||
active_compressions += (
|
||||
f"File: {progress.get('filename', 'Unknown')}\n"
|
||||
f"Progress: {progress.get('percent', 0):.1f}%\n"
|
||||
f"Time Elapsed: {progress.get('elapsed_time', 'N/A')}\n"
|
||||
f"Input Size: {progress.get('input_size', 0)} bytes\n"
|
||||
f"Current Size: {progress.get('current_size', 0)} bytes\n"
|
||||
f"Target Size: {progress.get('target_size', 0)} bytes\n"
|
||||
f"Codec: {progress.get('codec', 'Unknown')}\n"
|
||||
f"Hardware Accel: {progress.get('hardware_accel', False)}\n"
|
||||
f"-------------------\n"
|
||||
)
|
||||
embed.add_field(
|
||||
name="Active Compressions",
|
||||
value=f"```\n{active_compressions}```",
|
||||
inline=False,
|
||||
)
|
||||
else:
|
||||
embed.add_field(
|
||||
name="Active Compressions",
|
||||
value="```\nNo active compressions```",
|
||||
inline=False,
|
||||
)
|
||||
|
||||
# Error statistics
|
||||
if queue_status["metrics"]["errors_by_type"]:
|
||||
error_stats = "\n".join(
|
||||
f"{error_type}: {count}"
|
||||
for error_type, count in queue_status["metrics"]["errors_by_type"].items()
|
||||
)
|
||||
embed.add_field(
|
||||
name="Error Statistics",
|
||||
value=f"```\n{error_stats}```",
|
||||
inline=False,
|
||||
)
|
||||
|
||||
# Hardware acceleration statistics
|
||||
embed.add_field(
|
||||
name="Hardware Statistics",
|
||||
value=f"```\n"
|
||||
f"Hardware Accel Failures: {queue_status['metrics']['hardware_accel_failures']}\n"
|
||||
f"Compression Failures: {queue_status['metrics']['compression_failures']}\n"
|
||||
f"Peak Memory Usage: {queue_status['metrics']['peak_memory_usage']:.1f}MB\n"
|
||||
f"```",
|
||||
inline=False,
|
||||
)
|
||||
|
||||
await ctx.send(embed=embed)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error showing queue details: {str(e)}", exc_info=True)
|
||||
logger.error(f"Error showing queue details: {e}", exc_info=True)
|
||||
await ctx.send(f"Error getting queue details: {str(e)}")
|
||||
|
||||
def set_queue_task(self, task: asyncio.Task) -> None:
|
||||
"""Set the queue processing task"""
|
||||
self._queue_task = task
|
||||
self.cleanup_manager.set_queue_task(task)
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Get processor status"""
|
||||
return {
|
||||
"state": self.state.value,
|
||||
"health": self.health_monitor.is_healthy(),
|
||||
"operations": self.operation_tracker.get_operation_stats(),
|
||||
"active_operations": self.operation_tracker.get_active_operations(),
|
||||
"last_health_check": (
|
||||
self.health_monitor.last_check.isoformat()
|
||||
if self.health_monitor.last_check
|
||||
else None
|
||||
),
|
||||
"health_status": self.health_monitor.health_status
|
||||
}
|
||||
|
||||
@@ -1,130 +1,256 @@
|
||||
"""Message processing and URL extraction for VideoProcessor"""
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from enum import Enum
|
||||
from typing import Optional, Dict, Any, List, Tuple
|
||||
from datetime import datetime
|
||||
import discord
|
||||
from typing import List, Tuple, Optional
|
||||
from videoarchiver.utils.video_downloader import is_video_url_pattern
|
||||
|
||||
from .url_extractor import URLExtractor
|
||||
from .message_validator import MessageValidator
|
||||
from .queue_processor import QueueProcessor
|
||||
from .reactions import REACTIONS
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class MessageState(Enum):
|
||||
"""Possible states of message processing"""
|
||||
RECEIVED = "received"
|
||||
VALIDATING = "validating"
|
||||
EXTRACTING = "extracting"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
IGNORED = "ignored"
|
||||
|
||||
class ProcessingStage(Enum):
|
||||
"""Message processing stages"""
|
||||
VALIDATION = "validation"
|
||||
EXTRACTION = "extraction"
|
||||
QUEUEING = "queueing"
|
||||
COMPLETION = "completion"
|
||||
|
||||
class MessageCache:
|
||||
"""Caches message validation results"""
|
||||
|
||||
def __init__(self, max_size: int = 1000):
|
||||
self.max_size = max_size
|
||||
self._cache: Dict[int, Dict[str, Any]] = {}
|
||||
self._access_times: Dict[int, datetime] = {}
|
||||
|
||||
def add(self, message_id: int, result: Dict[str, Any]) -> None:
|
||||
"""Add a result to cache"""
|
||||
if len(self._cache) >= self.max_size:
|
||||
self._cleanup_oldest()
|
||||
self._cache[message_id] = result
|
||||
self._access_times[message_id] = datetime.utcnow()
|
||||
|
||||
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get a cached result"""
|
||||
if message_id in self._cache:
|
||||
self._access_times[message_id] = datetime.utcnow()
|
||||
return self._cache[message_id]
|
||||
return None
|
||||
|
||||
def _cleanup_oldest(self) -> None:
|
||||
"""Remove oldest cache entries"""
|
||||
if not self._access_times:
|
||||
return
|
||||
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
|
||||
del self._cache[oldest]
|
||||
del self._access_times[oldest]
|
||||
|
||||
class ProcessingTracker:
|
||||
"""Tracks message processing state and progress"""
|
||||
|
||||
def __init__(self):
|
||||
self.states: Dict[int, MessageState] = {}
|
||||
self.stages: Dict[int, ProcessingStage] = {}
|
||||
self.errors: Dict[int, str] = {}
|
||||
self.start_times: Dict[int, datetime] = {}
|
||||
self.end_times: Dict[int, datetime] = {}
|
||||
|
||||
def start_processing(self, message_id: int) -> None:
|
||||
"""Start tracking a message"""
|
||||
self.states[message_id] = MessageState.RECEIVED
|
||||
self.start_times[message_id] = datetime.utcnow()
|
||||
|
||||
def update_state(
|
||||
self,
|
||||
message_id: int,
|
||||
state: MessageState,
|
||||
stage: Optional[ProcessingStage] = None,
|
||||
error: Optional[str] = None
|
||||
) -> None:
|
||||
"""Update message state"""
|
||||
self.states[message_id] = state
|
||||
if stage:
|
||||
self.stages[message_id] = stage
|
||||
if error:
|
||||
self.errors[message_id] = error
|
||||
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
|
||||
self.end_times[message_id] = datetime.utcnow()
|
||||
|
||||
def get_status(self, message_id: int) -> Dict[str, Any]:
|
||||
"""Get processing status for a message"""
|
||||
return {
|
||||
"state": self.states.get(message_id),
|
||||
"stage": self.stages.get(message_id),
|
||||
"error": self.errors.get(message_id),
|
||||
"start_time": self.start_times.get(message_id),
|
||||
"end_time": self.end_times.get(message_id),
|
||||
"duration": (
|
||||
(self.end_times[message_id] - self.start_times[message_id]).total_seconds()
|
||||
if message_id in self.end_times and message_id in self.start_times
|
||||
else None
|
||||
)
|
||||
}
|
||||
|
||||
class MessageHandler:
|
||||
"""Handles processing of messages for video content"""
|
||||
|
||||
def __init__(self, bot, config_manager, queue_manager):
|
||||
self.bot = bot
|
||||
self.config_manager = config_manager
|
||||
self.queue_manager = queue_manager
|
||||
self.url_extractor = URLExtractor()
|
||||
self.message_validator = MessageValidator()
|
||||
self.queue_processor = QueueProcessor(queue_manager)
|
||||
|
||||
# Initialize tracking and caching
|
||||
self.tracker = ProcessingTracker()
|
||||
self.validation_cache = MessageCache()
|
||||
self._processing_lock = asyncio.Lock()
|
||||
|
||||
async def process_message(self, message: discord.Message) -> None:
|
||||
"""Process a message for video content"""
|
||||
# Start tracking
|
||||
self.tracker.start_processing(message.id)
|
||||
|
||||
try:
|
||||
# Check if message contains any content to process
|
||||
if not message.content and not message.attachments:
|
||||
logger.debug(f"No content or attachments in message {message.id}")
|
||||
return
|
||||
|
||||
# Get guild settings
|
||||
settings = await self.config_manager.get_guild_settings(message.guild.id)
|
||||
if not settings:
|
||||
logger.warning(f"No settings found for guild {message.guild.id}")
|
||||
return
|
||||
|
||||
# Check if video archiving is enabled for this guild
|
||||
if not settings.get("enabled", False):
|
||||
logger.debug(f"Video archiving is disabled for guild {message.guild.id}")
|
||||
return
|
||||
|
||||
# Log settings for debugging
|
||||
logger.debug(f"Guild {message.guild.id} settings: {settings}")
|
||||
|
||||
# Check if channel is enabled (empty list means all channels)
|
||||
enabled_channels = settings.get("enabled_channels", [])
|
||||
if enabled_channels and message.channel.id not in enabled_channels:
|
||||
logger.debug(f"Channel {message.channel.id} not in enabled channels: {enabled_channels}")
|
||||
return
|
||||
|
||||
# Check if user has allowed role (empty list means all roles)
|
||||
allowed_roles = settings.get("allowed_roles", [])
|
||||
if allowed_roles:
|
||||
user_roles = [role.id for role in message.author.roles]
|
||||
if not any(role_id in allowed_roles for role_id in user_roles):
|
||||
logger.debug(f"User {message.author.id} does not have any allowed roles")
|
||||
return
|
||||
|
||||
# Extract URLs from message
|
||||
urls = await self._extract_urls(message, settings)
|
||||
if not urls:
|
||||
logger.debug("No valid URLs found in message")
|
||||
return
|
||||
|
||||
# Process each URL
|
||||
await self._process_urls(message, urls)
|
||||
|
||||
async with self._processing_lock:
|
||||
await self._process_message_internal(message)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing message: {str(e)}", exc_info=True)
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.FAILED,
|
||||
error=str(e)
|
||||
)
|
||||
try:
|
||||
await message.add_reaction(REACTIONS["error"])
|
||||
except:
|
||||
pass
|
||||
|
||||
async def _extract_urls(self, message: discord.Message, settings: dict) -> List[str]:
|
||||
"""Extract video URLs from message content and attachments"""
|
||||
urls = []
|
||||
|
||||
# Extract from message content
|
||||
if message.content:
|
||||
logger.debug(f"Processing message content: {message.content}")
|
||||
enabled_sites = settings.get("enabled_sites", [])
|
||||
logger.debug(f"Enabled sites: {enabled_sites}")
|
||||
async def _process_message_internal(self, message: discord.Message) -> None:
|
||||
"""Internal message processing logic"""
|
||||
try:
|
||||
# Get guild settings
|
||||
settings = await self.config_manager.get_guild_settings(message.guild.id)
|
||||
if not settings:
|
||||
logger.warning(f"No settings found for guild {message.guild.id}")
|
||||
self.tracker.update_state(message.id, MessageState.IGNORED)
|
||||
return
|
||||
|
||||
for word in message.content.split():
|
||||
logger.debug(f"Checking word: {word}")
|
||||
if is_video_url_pattern(word):
|
||||
# If enabled_sites is empty or None, allow all sites
|
||||
if not enabled_sites or any(site in word.lower() for site in enabled_sites):
|
||||
logger.debug(f"Found matching URL: {word}")
|
||||
urls.append(word)
|
||||
else:
|
||||
logger.debug(f"URL {word} doesn't match any enabled sites")
|
||||
else:
|
||||
logger.debug(f"Word {word} is not a valid video URL")
|
||||
|
||||
# Extract from attachments
|
||||
for attachment in message.attachments:
|
||||
logger.debug(f"Checking attachment: {attachment.filename}")
|
||||
if any(attachment.filename.lower().endswith(ext) for ext in ['.mp4', '.mov', '.avi', '.webm']):
|
||||
logger.debug(f"Found video attachment: {attachment.url}")
|
||||
urls.append(attachment.url)
|
||||
|
||||
return urls
|
||||
|
||||
async def _process_urls(self, message: discord.Message, urls: List[str]) -> None:
|
||||
"""Process extracted URLs by adding them to the queue"""
|
||||
for url in urls:
|
||||
try:
|
||||
logger.info(f"Adding URL to queue: {url}")
|
||||
await message.add_reaction(REACTIONS['queued'])
|
||||
await self.queue_manager.add_to_queue(
|
||||
url=url,
|
||||
message_id=message.id,
|
||||
channel_id=message.channel.id,
|
||||
guild_id=message.guild.id,
|
||||
author_id=message.author.id,
|
||||
priority=0
|
||||
# Check cache for validation
|
||||
cached_validation = self.validation_cache.get(message.id)
|
||||
if cached_validation:
|
||||
is_valid = cached_validation["valid"]
|
||||
reason = cached_validation["reason"]
|
||||
else:
|
||||
# Validate message
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.VALIDATING,
|
||||
ProcessingStage.VALIDATION
|
||||
)
|
||||
logger.info(f"Successfully added video to queue: {url}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to add video to queue: {str(e)}")
|
||||
await message.add_reaction(REACTIONS['error'])
|
||||
continue
|
||||
is_valid, reason = await self.message_validator.validate_message(
|
||||
message,
|
||||
settings
|
||||
)
|
||||
# Cache result
|
||||
self.validation_cache.add(message.id, {
|
||||
"valid": is_valid,
|
||||
"reason": reason
|
||||
})
|
||||
|
||||
async def format_archive_message(self, author: Optional[discord.Member],
|
||||
channel: discord.TextChannel,
|
||||
url: str) -> str:
|
||||
if not is_valid:
|
||||
logger.debug(f"Message validation failed: {reason}")
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.IGNORED,
|
||||
error=reason
|
||||
)
|
||||
return
|
||||
|
||||
# Extract URLs
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.EXTRACTING,
|
||||
ProcessingStage.EXTRACTION
|
||||
)
|
||||
urls = await self.url_extractor.extract_urls(
|
||||
message,
|
||||
enabled_sites=settings.get("enabled_sites")
|
||||
)
|
||||
if not urls:
|
||||
logger.debug("No valid URLs found in message")
|
||||
self.tracker.update_state(message.id, MessageState.IGNORED)
|
||||
return
|
||||
|
||||
# Process URLs
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.PROCESSING,
|
||||
ProcessingStage.QUEUEING
|
||||
)
|
||||
await self.queue_processor.process_urls(message, urls)
|
||||
|
||||
# Mark completion
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.COMPLETED,
|
||||
ProcessingStage.COMPLETION
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.tracker.update_state(
|
||||
message.id,
|
||||
MessageState.FAILED,
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
async def format_archive_message(
|
||||
self,
|
||||
author: Optional[discord.Member],
|
||||
channel: discord.TextChannel,
|
||||
url: str
|
||||
) -> str:
|
||||
"""Format message for archive channel"""
|
||||
author_mention = author.mention if author else "Unknown User"
|
||||
channel_mention = channel.mention if channel else "Unknown Channel"
|
||||
|
||||
return (f"Video archived from {author_mention} in {channel_mention}\n"
|
||||
f"Original URL: {url}")
|
||||
return await self.queue_processor.format_archive_message(
|
||||
author,
|
||||
channel,
|
||||
url
|
||||
)
|
||||
|
||||
def get_message_status(self, message_id: int) -> Dict[str, Any]:
|
||||
"""Get processing status for a message"""
|
||||
return self.tracker.get_status(message_id)
|
||||
|
||||
def is_healthy(self) -> bool:
|
||||
"""Check if handler is healthy"""
|
||||
# Check for any stuck messages
|
||||
current_time = datetime.utcnow()
|
||||
for message_id, start_time in self.tracker.start_times.items():
|
||||
if (
|
||||
message_id in self.tracker.states and
|
||||
self.tracker.states[message_id] not in (
|
||||
MessageState.COMPLETED,
|
||||
MessageState.FAILED,
|
||||
MessageState.IGNORED
|
||||
) and
|
||||
(current_time - start_time).total_seconds() > 300 # 5 minutes timeout
|
||||
):
|
||||
return False
|
||||
return True
|
||||
|
||||
225
videoarchiver/processor/message_validator.py
Normal file
225
videoarchiver/processor/message_validator.py
Normal file
@@ -0,0 +1,225 @@
|
||||
"""Message validation functionality for video processing"""
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Optional, Tuple, List, Any, Callable, Set
|
||||
from datetime import datetime
|
||||
import discord
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class ValidationResult(Enum):
|
||||
"""Possible validation results"""
|
||||
VALID = "valid"
|
||||
INVALID = "invalid"
|
||||
IGNORED = "ignored"
|
||||
|
||||
@dataclass
|
||||
class ValidationContext:
|
||||
"""Context for message validation"""
|
||||
message: discord.Message
|
||||
settings: Dict[str, Any]
|
||||
guild_id: int
|
||||
channel_id: int
|
||||
author_id: int
|
||||
roles: Set[int]
|
||||
content_length: int
|
||||
attachment_count: int
|
||||
is_bot: bool
|
||||
timestamp: datetime
|
||||
|
||||
@classmethod
|
||||
def from_message(cls, message: discord.Message, settings: Dict[str, Any]) -> 'ValidationContext':
|
||||
"""Create context from message"""
|
||||
return cls(
|
||||
message=message,
|
||||
settings=settings,
|
||||
guild_id=message.guild.id,
|
||||
channel_id=message.channel.id,
|
||||
author_id=message.author.id,
|
||||
roles={role.id for role in message.author.roles},
|
||||
content_length=len(message.content) if message.content else 0,
|
||||
attachment_count=len(message.attachments),
|
||||
is_bot=message.author.bot,
|
||||
timestamp=message.created_at
|
||||
)
|
||||
|
||||
@dataclass
|
||||
class ValidationRule:
|
||||
"""Defines a validation rule"""
|
||||
name: str
|
||||
description: str
|
||||
validate: Callable[[ValidationContext], Tuple[bool, Optional[str]]]
|
||||
enabled: bool = True
|
||||
priority: int = 0
|
||||
|
||||
class ValidationCache:
|
||||
"""Caches validation results"""
|
||||
|
||||
def __init__(self, max_size: int = 1000):
|
||||
self.max_size = max_size
|
||||
self._cache: Dict[int, Dict[str, Any]] = {}
|
||||
self._access_times: Dict[int, datetime] = {}
|
||||
|
||||
def add(self, message_id: int, result: Dict[str, Any]) -> None:
|
||||
"""Add validation result to cache"""
|
||||
if len(self._cache) >= self.max_size:
|
||||
self._cleanup_oldest()
|
||||
self._cache[message_id] = result
|
||||
self._access_times[message_id] = datetime.utcnow()
|
||||
|
||||
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached validation result"""
|
||||
if message_id in self._cache:
|
||||
self._access_times[message_id] = datetime.utcnow()
|
||||
return self._cache[message_id]
|
||||
return None
|
||||
|
||||
def _cleanup_oldest(self) -> None:
|
||||
"""Remove oldest cache entries"""
|
||||
if not self._access_times:
|
||||
return
|
||||
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
|
||||
del self._cache[oldest]
|
||||
del self._access_times[oldest]
|
||||
|
||||
class ValidationRuleManager:
|
||||
"""Manages validation rules"""
|
||||
|
||||
def __init__(self):
|
||||
self.rules: List[ValidationRule] = [
|
||||
ValidationRule(
|
||||
name="content_check",
|
||||
description="Check if message has content to process",
|
||||
validate=self._validate_content,
|
||||
priority=1
|
||||
),
|
||||
ValidationRule(
|
||||
name="guild_enabled",
|
||||
description="Check if archiving is enabled for guild",
|
||||
validate=self._validate_guild_enabled,
|
||||
priority=2
|
||||
),
|
||||
ValidationRule(
|
||||
name="channel_enabled",
|
||||
description="Check if channel is enabled for archiving",
|
||||
validate=self._validate_channel,
|
||||
priority=3
|
||||
),
|
||||
ValidationRule(
|
||||
name="user_roles",
|
||||
description="Check if user has required roles",
|
||||
validate=self._validate_user_roles,
|
||||
priority=4
|
||||
)
|
||||
]
|
||||
self.rules.sort(key=lambda x: x.priority)
|
||||
|
||||
def _validate_content(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate message content"""
|
||||
if not ctx.content_length and not ctx.attachment_count:
|
||||
return False, "No content or attachments"
|
||||
return True, None
|
||||
|
||||
def _validate_guild_enabled(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate guild settings"""
|
||||
if not ctx.settings.get("enabled", False):
|
||||
return False, "Video archiving disabled for guild"
|
||||
return True, None
|
||||
|
||||
def _validate_channel(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate channel settings"""
|
||||
enabled_channels = ctx.settings.get("enabled_channels", [])
|
||||
if enabled_channels and ctx.channel_id not in enabled_channels:
|
||||
return False, "Channel not enabled for archiving"
|
||||
return True, None
|
||||
|
||||
def _validate_user_roles(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate user roles"""
|
||||
allowed_roles = ctx.settings.get("allowed_roles", [])
|
||||
if allowed_roles and not (ctx.roles & set(allowed_roles)):
|
||||
return False, "User does not have required roles"
|
||||
return True, None
|
||||
|
||||
class MessageValidator:
|
||||
"""Handles validation of messages for video processing"""
|
||||
|
||||
def __init__(self):
|
||||
self.rule_manager = ValidationRuleManager()
|
||||
self.cache = ValidationCache()
|
||||
self.validation_stats: Dict[str, int] = {
|
||||
"total": 0,
|
||||
"valid": 0,
|
||||
"invalid": 0,
|
||||
"ignored": 0,
|
||||
"cached": 0
|
||||
}
|
||||
|
||||
async def validate_message(
|
||||
self,
|
||||
message: discord.Message,
|
||||
settings: Dict
|
||||
) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate if a message should be processed"""
|
||||
self.validation_stats["total"] += 1
|
||||
|
||||
# Check cache
|
||||
cached = self.cache.get(message.id)
|
||||
if cached:
|
||||
self.validation_stats["cached"] += 1
|
||||
return cached["valid"], cached.get("reason")
|
||||
|
||||
# Create validation context
|
||||
ctx = ValidationContext.from_message(message, settings)
|
||||
|
||||
# Run validation rules
|
||||
for rule in self.rule_manager.rules:
|
||||
if not rule.enabled:
|
||||
continue
|
||||
|
||||
try:
|
||||
valid, reason = rule.validate(ctx)
|
||||
if not valid:
|
||||
self.validation_stats["invalid"] += 1
|
||||
# Cache result
|
||||
self.cache.add(message.id, {
|
||||
"valid": False,
|
||||
"reason": reason,
|
||||
"rule": rule.name
|
||||
})
|
||||
return False, reason
|
||||
except Exception as e:
|
||||
logger.error(f"Error in validation rule {rule.name}: {e}")
|
||||
return False, f"Validation error: {str(e)}"
|
||||
|
||||
# Message passed all rules
|
||||
self.validation_stats["valid"] += 1
|
||||
self.cache.add(message.id, {
|
||||
"valid": True,
|
||||
"reason": None
|
||||
})
|
||||
return True, None
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get validation statistics"""
|
||||
return {
|
||||
"validation_stats": self.validation_stats.copy(),
|
||||
"rules": [
|
||||
{
|
||||
"name": rule.name,
|
||||
"description": rule.description,
|
||||
"enabled": rule.enabled,
|
||||
"priority": rule.priority
|
||||
}
|
||||
for rule in self.rule_manager.rules
|
||||
]
|
||||
}
|
||||
|
||||
def clear_cache(self, message_id: Optional[int] = None) -> None:
|
||||
"""Clear validation cache"""
|
||||
if message_id:
|
||||
self.cache._cache.pop(message_id, None)
|
||||
self.cache._access_times.pop(message_id, None)
|
||||
else:
|
||||
self.cache = ValidationCache(self.cache.max_size)
|
||||
237
videoarchiver/processor/queue_processor.py
Normal file
237
videoarchiver/processor/queue_processor.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""Queue processing functionality for video processing"""
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, Dict, Any, Set
|
||||
from datetime import datetime
|
||||
import discord
|
||||
|
||||
from .reactions import REACTIONS
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class QueuePriority(Enum):
|
||||
"""Queue item priorities"""
|
||||
HIGH = 0
|
||||
NORMAL = 1
|
||||
LOW = 2
|
||||
|
||||
@dataclass
|
||||
class QueueItem:
|
||||
"""Represents an item in the processing queue"""
|
||||
url: str
|
||||
message_id: int
|
||||
channel_id: int
|
||||
guild_id: int
|
||||
author_id: int
|
||||
priority: QueuePriority
|
||||
added_at: datetime
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
attempts: int = 0
|
||||
last_attempt: Optional[datetime] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
class ProcessingStrategy(Enum):
|
||||
"""Available processing strategies"""
|
||||
FIFO = "fifo" # First in, first out
|
||||
PRIORITY = "priority" # Process by priority
|
||||
SMART = "smart" # Smart processing based on various factors
|
||||
|
||||
class QueueMetrics:
|
||||
"""Tracks queue processing metrics"""
|
||||
|
||||
def __init__(self):
|
||||
self.total_processed = 0
|
||||
self.successful = 0
|
||||
self.failed = 0
|
||||
self.processing_times: List[float] = []
|
||||
self.errors: Dict[str, int] = {}
|
||||
self.last_processed: Optional[datetime] = None
|
||||
|
||||
def record_success(self, processing_time: float) -> None:
|
||||
"""Record successful processing"""
|
||||
self.total_processed += 1
|
||||
self.successful += 1
|
||||
self.processing_times.append(processing_time)
|
||||
self.last_processed = datetime.utcnow()
|
||||
|
||||
def record_failure(self, error: str) -> None:
|
||||
"""Record processing failure"""
|
||||
self.total_processed += 1
|
||||
self.failed += 1
|
||||
self.errors[error] = self.errors.get(error, 0) + 1
|
||||
self.last_processed = datetime.utcnow()
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get queue metrics"""
|
||||
avg_time = (
|
||||
sum(self.processing_times) / len(self.processing_times)
|
||||
if self.processing_times
|
||||
else 0
|
||||
)
|
||||
return {
|
||||
"total_processed": self.total_processed,
|
||||
"successful": self.successful,
|
||||
"failed": self.failed,
|
||||
"success_rate": (
|
||||
self.successful / self.total_processed
|
||||
if self.total_processed > 0
|
||||
else 0
|
||||
),
|
||||
"average_processing_time": avg_time,
|
||||
"error_counts": self.errors.copy(),
|
||||
"last_processed": self.last_processed
|
||||
}
|
||||
|
||||
class QueueProcessor:
|
||||
"""Handles adding videos to the processing queue"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
queue_manager,
|
||||
strategy: ProcessingStrategy = ProcessingStrategy.SMART,
|
||||
max_retries: int = 3
|
||||
):
|
||||
self.queue_manager = queue_manager
|
||||
self.strategy = strategy
|
||||
self.max_retries = max_retries
|
||||
self.metrics = QueueMetrics()
|
||||
self._processing: Set[str] = set()
|
||||
self._processing_lock = asyncio.Lock()
|
||||
|
||||
async def process_urls(
|
||||
self,
|
||||
message: discord.Message,
|
||||
urls: List[str],
|
||||
priority: QueuePriority = QueuePriority.NORMAL
|
||||
) -> None:
|
||||
"""Process extracted URLs by adding them to the queue"""
|
||||
for url in urls:
|
||||
try:
|
||||
logger.info(f"Adding URL to queue: {url}")
|
||||
await message.add_reaction(REACTIONS['queued'])
|
||||
|
||||
# Create queue item
|
||||
item = QueueItem(
|
||||
url=url,
|
||||
message_id=message.id,
|
||||
channel_id=message.channel.id,
|
||||
guild_id=message.guild.id,
|
||||
author_id=message.author.id,
|
||||
priority=priority,
|
||||
added_at=datetime.utcnow()
|
||||
)
|
||||
|
||||
# Add to queue with appropriate strategy
|
||||
await self._add_to_queue(item)
|
||||
logger.info(f"Successfully added video to queue: {url}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to add video to queue: {str(e)}")
|
||||
await message.add_reaction(REACTIONS['error'])
|
||||
continue
|
||||
|
||||
async def _add_to_queue(self, item: QueueItem) -> None:
|
||||
"""Add item to queue using current strategy"""
|
||||
async with self._processing_lock:
|
||||
if item.url in self._processing:
|
||||
logger.debug(f"URL already being processed: {item.url}")
|
||||
return
|
||||
|
||||
self._processing.add(item.url)
|
||||
|
||||
try:
|
||||
# Apply processing strategy
|
||||
if self.strategy == ProcessingStrategy.PRIORITY:
|
||||
await self._add_with_priority(item)
|
||||
elif self.strategy == ProcessingStrategy.SMART:
|
||||
await self._add_with_smart_strategy(item)
|
||||
else: # FIFO
|
||||
await self._add_fifo(item)
|
||||
|
||||
finally:
|
||||
async with self._processing_lock:
|
||||
self._processing.remove(item.url)
|
||||
|
||||
async def _add_with_priority(self, item: QueueItem) -> None:
|
||||
"""Add item with priority handling"""
|
||||
await self.queue_manager.add_to_queue(
|
||||
url=item.url,
|
||||
message_id=item.message_id,
|
||||
channel_id=item.channel_id,
|
||||
guild_id=item.guild_id,
|
||||
author_id=item.author_id,
|
||||
priority=item.priority.value
|
||||
)
|
||||
|
||||
async def _add_with_smart_strategy(self, item: QueueItem) -> None:
|
||||
"""Add item using smart processing strategy"""
|
||||
# Calculate priority based on various factors
|
||||
priority = await self._calculate_smart_priority(item)
|
||||
|
||||
await self.queue_manager.add_to_queue(
|
||||
url=item.url,
|
||||
message_id=item.message_id,
|
||||
channel_id=item.channel_id,
|
||||
guild_id=item.guild_id,
|
||||
author_id=item.author_id,
|
||||
priority=priority
|
||||
)
|
||||
|
||||
async def _add_fifo(self, item: QueueItem) -> None:
|
||||
"""Add item using FIFO strategy"""
|
||||
await self.queue_manager.add_to_queue(
|
||||
url=item.url,
|
||||
message_id=item.message_id,
|
||||
channel_id=item.channel_id,
|
||||
guild_id=item.guild_id,
|
||||
author_id=item.author_id,
|
||||
priority=QueuePriority.NORMAL.value
|
||||
)
|
||||
|
||||
async def _calculate_smart_priority(self, item: QueueItem) -> int:
|
||||
"""Calculate priority using smart strategy"""
|
||||
base_priority = item.priority.value
|
||||
|
||||
# Adjust based on queue metrics
|
||||
stats = self.metrics.get_stats()
|
||||
if stats["total_processed"] > 0:
|
||||
# Boost priority if queue is processing efficiently
|
||||
if stats["success_rate"] > 0.9: # 90% success rate
|
||||
base_priority -= 1
|
||||
# Lower priority if having issues
|
||||
elif stats["success_rate"] < 0.5: # 50% success rate
|
||||
base_priority += 1
|
||||
|
||||
# Adjust based on retries
|
||||
if item.attempts > 0:
|
||||
base_priority += item.attempts
|
||||
|
||||
# Ensure priority stays in valid range
|
||||
return max(0, min(base_priority, len(QueuePriority) - 1))
|
||||
|
||||
async def format_archive_message(
|
||||
self,
|
||||
author: Optional[discord.Member],
|
||||
channel: discord.TextChannel,
|
||||
url: str
|
||||
) -> str:
|
||||
"""Format message for archive channel"""
|
||||
author_mention = author.mention if author else "Unknown User"
|
||||
channel_mention = channel.mention if channel else "Unknown Channel"
|
||||
|
||||
return (
|
||||
f"Video archived from {author_mention} in {channel_mention}\n"
|
||||
f"Original URL: {url}"
|
||||
)
|
||||
|
||||
def get_metrics(self) -> Dict[str, Any]:
|
||||
"""Get queue processing metrics"""
|
||||
return {
|
||||
"metrics": self.metrics.get_stats(),
|
||||
"strategy": self.strategy.value,
|
||||
"active_processing": len(self._processing),
|
||||
"max_retries": self.max_retries
|
||||
}
|
||||
316
videoarchiver/processor/status_display.py
Normal file
316
videoarchiver/processor/status_display.py
Normal file
@@ -0,0 +1,316 @@
|
||||
"""Module for handling queue status display and formatting"""
|
||||
|
||||
import discord
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class DisplayTheme:
|
||||
"""Defines display themes"""
|
||||
DEFAULT = {
|
||||
"title_color": discord.Color.blue(),
|
||||
"success_color": discord.Color.green(),
|
||||
"warning_color": discord.Color.gold(),
|
||||
"error_color": discord.Color.red(),
|
||||
"info_color": discord.Color.blurple()
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class DisplayTemplate:
|
||||
"""Template for status display sections"""
|
||||
name: str
|
||||
format_string: str
|
||||
inline: bool = False
|
||||
order: int = 0
|
||||
condition: Optional[str] = None
|
||||
|
||||
class DisplaySection(Enum):
|
||||
"""Available display sections"""
|
||||
QUEUE_STATS = "queue_stats"
|
||||
DOWNLOADS = "downloads"
|
||||
COMPRESSIONS = "compressions"
|
||||
ERRORS = "errors"
|
||||
HARDWARE = "hardware"
|
||||
|
||||
class StatusFormatter:
|
||||
"""Formats status information for display"""
|
||||
|
||||
@staticmethod
|
||||
def format_bytes(bytes: int) -> str:
|
||||
"""Format bytes into human readable format"""
|
||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||
if bytes < 1024:
|
||||
return f"{bytes:.1f}{unit}"
|
||||
bytes /= 1024
|
||||
return f"{bytes:.1f}TB"
|
||||
|
||||
@staticmethod
|
||||
def format_time(seconds: float) -> str:
|
||||
"""Format time duration"""
|
||||
if seconds < 60:
|
||||
return f"{seconds:.1f}s"
|
||||
minutes = seconds / 60
|
||||
if minutes < 60:
|
||||
return f"{minutes:.1f}m"
|
||||
hours = minutes / 60
|
||||
return f"{hours:.1f}h"
|
||||
|
||||
@staticmethod
|
||||
def format_percentage(value: float) -> str:
|
||||
"""Format percentage value"""
|
||||
return f"{value:.1f}%"
|
||||
|
||||
class DisplayManager:
|
||||
"""Manages status display configuration"""
|
||||
|
||||
def __init__(self):
|
||||
self.templates: Dict[DisplaySection, DisplayTemplate] = {
|
||||
DisplaySection.QUEUE_STATS: DisplayTemplate(
|
||||
name="Queue Statistics",
|
||||
format_string=(
|
||||
"```\n"
|
||||
"Pending: {pending}\n"
|
||||
"Processing: {processing}\n"
|
||||
"Completed: {completed}\n"
|
||||
"Failed: {failed}\n"
|
||||
"Success Rate: {success_rate}\n"
|
||||
"Avg Processing Time: {avg_processing_time}\n"
|
||||
"```"
|
||||
),
|
||||
order=1
|
||||
),
|
||||
DisplaySection.DOWNLOADS: DisplayTemplate(
|
||||
name="Active Downloads",
|
||||
format_string=(
|
||||
"```\n"
|
||||
"URL: {url}\n"
|
||||
"Progress: {percent}\n"
|
||||
"Speed: {speed}\n"
|
||||
"ETA: {eta}\n"
|
||||
"Size: {size}\n"
|
||||
"Started: {start_time}\n"
|
||||
"Retries: {retries}\n"
|
||||
"```"
|
||||
),
|
||||
order=2
|
||||
),
|
||||
DisplaySection.COMPRESSIONS: DisplayTemplate(
|
||||
name="Active Compressions",
|
||||
format_string=(
|
||||
"```\n"
|
||||
"File: {filename}\n"
|
||||
"Progress: {percent}\n"
|
||||
"Time Elapsed: {elapsed_time}\n"
|
||||
"Input Size: {input_size}\n"
|
||||
"Current Size: {current_size}\n"
|
||||
"Target Size: {target_size}\n"
|
||||
"Codec: {codec}\n"
|
||||
"Hardware Accel: {hardware_accel}\n"
|
||||
"```"
|
||||
),
|
||||
order=3
|
||||
),
|
||||
DisplaySection.ERRORS: DisplayTemplate(
|
||||
name="Error Statistics",
|
||||
format_string="```\n{error_stats}```",
|
||||
condition="has_errors",
|
||||
order=4
|
||||
),
|
||||
DisplaySection.HARDWARE: DisplayTemplate(
|
||||
name="Hardware Statistics",
|
||||
format_string=(
|
||||
"```\n"
|
||||
"Hardware Accel Failures: {hw_failures}\n"
|
||||
"Compression Failures: {comp_failures}\n"
|
||||
"Peak Memory Usage: {memory_usage}\n"
|
||||
"```"
|
||||
),
|
||||
order=5
|
||||
)
|
||||
}
|
||||
self.theme = DisplayTheme.DEFAULT
|
||||
|
||||
class StatusDisplay:
|
||||
"""Handles formatting and display of queue status information"""
|
||||
|
||||
def __init__(self):
|
||||
self.display_manager = DisplayManager()
|
||||
self.formatter = StatusFormatter()
|
||||
|
||||
async def create_queue_status_embed(
|
||||
self,
|
||||
queue_status: Dict[str, Any],
|
||||
active_ops: Dict[str, Any]
|
||||
) -> discord.Embed:
|
||||
"""Create an embed displaying queue status and active operations"""
|
||||
embed = discord.Embed(
|
||||
title="Queue Status Details",
|
||||
color=self.display_manager.theme["title_color"],
|
||||
timestamp=datetime.utcnow()
|
||||
)
|
||||
|
||||
# Add sections in order
|
||||
sections = sorted(
|
||||
self.display_manager.templates.items(),
|
||||
key=lambda x: x[1].order
|
||||
)
|
||||
|
||||
for section, template in sections:
|
||||
# Check condition if exists
|
||||
if template.condition:
|
||||
if not self._check_condition(template.condition, queue_status, active_ops):
|
||||
continue
|
||||
|
||||
# Add section based on type
|
||||
if section == DisplaySection.QUEUE_STATS:
|
||||
self._add_queue_statistics(embed, queue_status, template)
|
||||
elif section == DisplaySection.DOWNLOADS:
|
||||
self._add_active_downloads(embed, active_ops.get('downloads', {}), template)
|
||||
elif section == DisplaySection.COMPRESSIONS:
|
||||
self._add_active_compressions(embed, active_ops.get('compressions', {}), template)
|
||||
elif section == DisplaySection.ERRORS:
|
||||
self._add_error_statistics(embed, queue_status, template)
|
||||
elif section == DisplaySection.HARDWARE:
|
||||
self._add_hardware_statistics(embed, queue_status, template)
|
||||
|
||||
return embed
|
||||
|
||||
def _check_condition(
|
||||
self,
|
||||
condition: str,
|
||||
queue_status: Dict[str, Any],
|
||||
active_ops: Dict[str, Any]
|
||||
) -> bool:
|
||||
"""Check if condition for displaying section is met"""
|
||||
if condition == "has_errors":
|
||||
return bool(queue_status["metrics"]["errors_by_type"])
|
||||
return True
|
||||
|
||||
def _add_queue_statistics(
|
||||
self,
|
||||
embed: discord.Embed,
|
||||
queue_status: Dict[str, Any],
|
||||
template: DisplayTemplate
|
||||
) -> None:
|
||||
"""Add queue statistics to the embed"""
|
||||
embed.add_field(
|
||||
name=template.name,
|
||||
value=template.format_string.format(
|
||||
pending=queue_status['pending'],
|
||||
processing=queue_status['processing'],
|
||||
completed=queue_status['completed'],
|
||||
failed=queue_status['failed'],
|
||||
success_rate=self.formatter.format_percentage(
|
||||
queue_status['metrics']['success_rate'] * 100
|
||||
),
|
||||
avg_processing_time=self.formatter.format_time(
|
||||
queue_status['metrics']['avg_processing_time']
|
||||
)
|
||||
),
|
||||
inline=template.inline
|
||||
)
|
||||
|
||||
def _add_active_downloads(
|
||||
self,
|
||||
embed: discord.Embed,
|
||||
downloads: Dict[str, Any],
|
||||
template: DisplayTemplate
|
||||
) -> None:
|
||||
"""Add active downloads information to the embed"""
|
||||
if downloads:
|
||||
content = []
|
||||
for url, progress in downloads.items():
|
||||
content.append(template.format_string.format(
|
||||
url=url[:50] + "..." if len(url) > 50 else url,
|
||||
percent=self.formatter.format_percentage(progress.get('percent', 0)),
|
||||
speed=progress.get('speed', 'N/A'),
|
||||
eta=progress.get('eta', 'N/A'),
|
||||
size=f"{self.formatter.format_bytes(progress.get('downloaded_bytes', 0))}/"
|
||||
f"{self.formatter.format_bytes(progress.get('total_bytes', 0))}",
|
||||
start_time=progress.get('start_time', 'N/A'),
|
||||
retries=progress.get('retries', 0)
|
||||
))
|
||||
embed.add_field(
|
||||
name=template.name,
|
||||
value="".join(content),
|
||||
inline=template.inline
|
||||
)
|
||||
else:
|
||||
embed.add_field(
|
||||
name=template.name,
|
||||
value="```\nNo active downloads```",
|
||||
inline=template.inline
|
||||
)
|
||||
|
||||
def _add_active_compressions(
|
||||
self,
|
||||
embed: discord.Embed,
|
||||
compressions: Dict[str, Any],
|
||||
template: DisplayTemplate
|
||||
) -> None:
|
||||
"""Add active compressions information to the embed"""
|
||||
if compressions:
|
||||
content = []
|
||||
for file_id, progress in compressions.items():
|
||||
content.append(template.format_string.format(
|
||||
filename=progress.get('filename', 'Unknown'),
|
||||
percent=self.formatter.format_percentage(progress.get('percent', 0)),
|
||||
elapsed_time=progress.get('elapsed_time', 'N/A'),
|
||||
input_size=self.formatter.format_bytes(progress.get('input_size', 0)),
|
||||
current_size=self.formatter.format_bytes(progress.get('current_size', 0)),
|
||||
target_size=self.formatter.format_bytes(progress.get('target_size', 0)),
|
||||
codec=progress.get('codec', 'Unknown'),
|
||||
hardware_accel=progress.get('hardware_accel', False)
|
||||
))
|
||||
embed.add_field(
|
||||
name=template.name,
|
||||
value="".join(content),
|
||||
inline=template.inline
|
||||
)
|
||||
else:
|
||||
embed.add_field(
|
||||
name=template.name,
|
||||
value="```\nNo active compressions```",
|
||||
inline=template.inline
|
||||
)
|
||||
|
||||
def _add_error_statistics(
|
||||
self,
|
||||
embed: discord.Embed,
|
||||
queue_status: Dict[str, Any],
|
||||
template: DisplayTemplate
|
||||
) -> None:
|
||||
"""Add error statistics to the embed"""
|
||||
if queue_status["metrics"]["errors_by_type"]:
|
||||
error_stats = "\n".join(
|
||||
f"{error_type}: {count}"
|
||||
for error_type, count in queue_status["metrics"]["errors_by_type"].items()
|
||||
)
|
||||
embed.add_field(
|
||||
name=template.name,
|
||||
value=template.format_string.format(error_stats=error_stats),
|
||||
inline=template.inline
|
||||
)
|
||||
|
||||
def _add_hardware_statistics(
|
||||
self,
|
||||
embed: discord.Embed,
|
||||
queue_status: Dict[str, Any],
|
||||
template: DisplayTemplate
|
||||
) -> None:
|
||||
"""Add hardware statistics to the embed"""
|
||||
embed.add_field(
|
||||
name=template.name,
|
||||
value=template.format_string.format(
|
||||
hw_failures=queue_status['metrics']['hardware_accel_failures'],
|
||||
comp_failures=queue_status['metrics']['compression_failures'],
|
||||
memory_usage=self.formatter.format_bytes(
|
||||
queue_status['metrics']['peak_memory_usage'] * 1024 * 1024 # Convert MB to bytes
|
||||
)
|
||||
),
|
||||
inline=template.inline
|
||||
)
|
||||
264
videoarchiver/processor/url_extractor.py
Normal file
264
videoarchiver/processor/url_extractor.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""URL extraction functionality for video processing"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Optional, Set, Pattern
|
||||
import discord
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
@dataclass
|
||||
class URLPattern:
|
||||
"""Defines a URL pattern for a video site"""
|
||||
site: str
|
||||
pattern: Pattern
|
||||
requires_api: bool = False
|
||||
supports_timestamp: bool = False
|
||||
supports_playlist: bool = False
|
||||
|
||||
@dataclass
|
||||
class URLMetadata:
|
||||
"""Metadata about an extracted URL"""
|
||||
url: str
|
||||
site: str
|
||||
timestamp: Optional[int] = None
|
||||
playlist_id: Optional[str] = None
|
||||
video_id: Optional[str] = None
|
||||
quality: Optional[str] = None
|
||||
|
||||
class URLType(Enum):
|
||||
"""Types of video URLs"""
|
||||
DIRECT = "direct"
|
||||
PLATFORM = "platform"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
class URLPatternManager:
|
||||
"""Manages URL patterns for different video sites"""
|
||||
|
||||
def __init__(self):
|
||||
self.patterns: Dict[str, URLPattern] = {
|
||||
"youtube": URLPattern(
|
||||
site="youtube",
|
||||
pattern=re.compile(
|
||||
r'(?:https?://)?(?:www\.)?'
|
||||
r'(?:youtube\.com/watch\?v=|youtu\.be/)'
|
||||
r'([a-zA-Z0-9_-]{11})'
|
||||
),
|
||||
supports_timestamp=True,
|
||||
supports_playlist=True
|
||||
),
|
||||
"vimeo": URLPattern(
|
||||
site="vimeo",
|
||||
pattern=re.compile(
|
||||
r'(?:https?://)?(?:www\.)?'
|
||||
r'vimeo\.com/(?:channels/(?:\w+/)?|groups/(?:[^/]*/)*|)'
|
||||
r'(\d+)(?:|/\w+)*'
|
||||
),
|
||||
supports_timestamp=True
|
||||
),
|
||||
"twitter": URLPattern(
|
||||
site="twitter",
|
||||
pattern=re.compile(
|
||||
r'(?:https?://)?(?:www\.)?'
|
||||
r'(?:twitter\.com|x\.com)/\w+/status/(\d+)'
|
||||
),
|
||||
requires_api=True
|
||||
),
|
||||
# Add more patterns as needed
|
||||
}
|
||||
|
||||
self.direct_extensions = {'.mp4', '.mov', '.avi', '.webm', '.mkv'}
|
||||
|
||||
def get_pattern(self, site: str) -> Optional[URLPattern]:
|
||||
"""Get pattern for a site"""
|
||||
return self.patterns.get(site.lower())
|
||||
|
||||
def is_supported_site(self, url: str, enabled_sites: Optional[List[str]]) -> bool:
|
||||
"""Check if URL is from a supported site"""
|
||||
if not enabled_sites:
|
||||
return True
|
||||
|
||||
parsed = urlparse(url.lower())
|
||||
domain = parsed.netloc.replace('www.', '')
|
||||
return any(site.lower() in domain for site in enabled_sites)
|
||||
|
||||
class URLValidator:
|
||||
"""Validates extracted URLs"""
|
||||
|
||||
def __init__(self, pattern_manager: URLPatternManager):
|
||||
self.pattern_manager = pattern_manager
|
||||
|
||||
def get_url_type(self, url: str) -> URLType:
|
||||
"""Determine URL type"""
|
||||
parsed = urlparse(url)
|
||||
if any(parsed.path.lower().endswith(ext) for ext in self.pattern_manager.direct_extensions):
|
||||
return URLType.DIRECT
|
||||
if any(pattern.pattern.match(url) for pattern in self.pattern_manager.patterns.values()):
|
||||
return URLType.PLATFORM
|
||||
return URLType.UNKNOWN
|
||||
|
||||
def is_valid_url(self, url: str) -> bool:
|
||||
"""Validate URL format"""
|
||||
try:
|
||||
result = urlparse(url)
|
||||
return all([result.scheme, result.netloc])
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
class URLMetadataExtractor:
|
||||
"""Extracts metadata from URLs"""
|
||||
|
||||
def __init__(self, pattern_manager: URLPatternManager):
|
||||
self.pattern_manager = pattern_manager
|
||||
|
||||
def extract_metadata(self, url: str) -> Optional[URLMetadata]:
|
||||
"""Extract metadata from URL"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
|
||||
# Handle direct video URLs
|
||||
if any(parsed.path.lower().endswith(ext) for ext in self.pattern_manager.direct_extensions):
|
||||
return URLMetadata(url=url, site="direct")
|
||||
|
||||
# Handle platform URLs
|
||||
for site, pattern in self.pattern_manager.patterns.items():
|
||||
if match := pattern.pattern.match(url):
|
||||
metadata = URLMetadata(
|
||||
url=url,
|
||||
site=site,
|
||||
video_id=match.group(1)
|
||||
)
|
||||
|
||||
# Extract additional metadata
|
||||
if pattern.supports_timestamp:
|
||||
metadata.timestamp = self._extract_timestamp(parsed)
|
||||
if pattern.supports_playlist:
|
||||
metadata.playlist_id = self._extract_playlist_id(parsed)
|
||||
|
||||
return metadata
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting metadata from URL {url}: {e}")
|
||||
return None
|
||||
|
||||
def _extract_timestamp(self, parsed_url: urlparse) -> Optional[int]:
|
||||
"""Extract timestamp from URL"""
|
||||
try:
|
||||
params = parse_qs(parsed_url.query)
|
||||
if 't' in params:
|
||||
return int(params['t'][0])
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _extract_playlist_id(self, parsed_url: urlparse) -> Optional[str]:
|
||||
"""Extract playlist ID from URL"""
|
||||
try:
|
||||
params = parse_qs(parsed_url.query)
|
||||
if 'list' in params:
|
||||
return params['list'][0]
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
class URLExtractor:
|
||||
"""Handles extraction of video URLs from messages"""
|
||||
|
||||
def __init__(self):
|
||||
self.pattern_manager = URLPatternManager()
|
||||
self.validator = URLValidator(self.pattern_manager)
|
||||
self.metadata_extractor = URLMetadataExtractor(self.pattern_manager)
|
||||
self._url_cache: Dict[str, Set[str]] = {}
|
||||
|
||||
async def extract_urls(
|
||||
self,
|
||||
message: discord.Message,
|
||||
enabled_sites: Optional[List[str]] = None
|
||||
) -> List[URLMetadata]:
|
||||
"""Extract video URLs from message content and attachments"""
|
||||
urls = []
|
||||
|
||||
# Check cache
|
||||
cache_key = f"{message.id}_{'-'.join(enabled_sites) if enabled_sites else 'all'}"
|
||||
if cache_key in self._url_cache:
|
||||
return [
|
||||
self.metadata_extractor.extract_metadata(url)
|
||||
for url in self._url_cache[cache_key]
|
||||
if url # Filter out None values
|
||||
]
|
||||
|
||||
# Extract URLs
|
||||
content_urls = await self._extract_from_content(message.content, enabled_sites)
|
||||
attachment_urls = await self._extract_from_attachments(message.attachments)
|
||||
|
||||
# Process all URLs
|
||||
all_urls = content_urls + attachment_urls
|
||||
valid_urls = []
|
||||
|
||||
for url in all_urls:
|
||||
if not self.validator.is_valid_url(url):
|
||||
logger.debug(f"Invalid URL format: {url}")
|
||||
continue
|
||||
|
||||
if not self.pattern_manager.is_supported_site(url, enabled_sites):
|
||||
logger.debug(f"URL {url} doesn't match any enabled sites")
|
||||
continue
|
||||
|
||||
metadata = self.metadata_extractor.extract_metadata(url)
|
||||
if metadata:
|
||||
urls.append(metadata)
|
||||
valid_urls.append(url)
|
||||
else:
|
||||
logger.debug(f"Could not extract metadata from URL: {url}")
|
||||
|
||||
# Update cache
|
||||
self._url_cache[cache_key] = set(valid_urls)
|
||||
|
||||
return urls
|
||||
|
||||
async def _extract_from_content(
|
||||
self,
|
||||
content: str,
|
||||
enabled_sites: Optional[List[str]]
|
||||
) -> List[str]:
|
||||
"""Extract video URLs from message content"""
|
||||
if not content:
|
||||
return []
|
||||
|
||||
urls = []
|
||||
for word in content.split():
|
||||
if self.validator.get_url_type(word) != URLType.UNKNOWN:
|
||||
urls.append(word)
|
||||
|
||||
return urls
|
||||
|
||||
async def _extract_from_attachments(
|
||||
self,
|
||||
attachments: List[discord.Attachment]
|
||||
) -> List[str]:
|
||||
"""Extract video URLs from message attachments"""
|
||||
return [
|
||||
attachment.url
|
||||
for attachment in attachments
|
||||
if any(
|
||||
attachment.filename.lower().endswith(ext)
|
||||
for ext in self.pattern_manager.direct_extensions
|
||||
)
|
||||
]
|
||||
|
||||
def clear_cache(self, message_id: Optional[int] = None) -> None:
|
||||
"""Clear URL cache"""
|
||||
if message_id:
|
||||
keys_to_remove = [
|
||||
key for key in self._url_cache
|
||||
if key.startswith(f"{message_id}_")
|
||||
]
|
||||
for key in keys_to_remove:
|
||||
self._url_cache.pop(key, None)
|
||||
else:
|
||||
self._url_cache.clear()
|
||||
Reference in New Issue
Block a user