From dac21f2fcdfe3f4145fc00bc3e9b18e2e8cc4e1e Mon Sep 17 00:00:00 2001 From: pacnpal <183241239+pacnpal@users.noreply.github.com> Date: Sat, 16 Nov 2024 22:32:08 +0000 Subject: [PATCH] fixed --- videoarchiver/core/base.py | 298 +++++-- videoarchiver/core/component_manager.py | 482 ++++++++--- videoarchiver/core/error_handler.py | 278 ++++--- videoarchiver/core/events.py | 313 +++++-- videoarchiver/core/initialization.py | 98 ++- videoarchiver/core/lifecycle.py | 383 +++++++-- videoarchiver/core/response_handler.py | 195 +++-- videoarchiver/core/settings.py | 259 ++++-- videoarchiver/database/schema_manager.py | 331 +++++++- videoarchiver/processor/__init__.py | 209 ++++- videoarchiver/processor/cleanup_manager.py | 228 ++++-- videoarchiver/processor/constants.py | 84 +- videoarchiver/processor/core.py | 340 +++++--- videoarchiver/processor/message_handler.py | 300 +++++-- videoarchiver/processor/message_validator.py | 292 ++++--- videoarchiver/processor/queue_handler.py | 447 +++++++--- videoarchiver/processor/queue_processor.py | 149 +++- videoarchiver/processor/reactions.py | 182 +++-- videoarchiver/processor/status_display.py | 484 +++++++---- videoarchiver/processor/url_extractor.py | 308 ++++--- videoarchiver/utils/__init__.py | 220 ++++- videoarchiver/utils/compression_handler.py | 210 +++++ videoarchiver/utils/download_core.py | 271 +++++++ videoarchiver/utils/exceptions.py | 192 ++++- videoarchiver/utils/file_operations.py | 138 ++++ videoarchiver/utils/process_manager.py | 111 +++ videoarchiver/utils/progress_handler.py | 126 +++ videoarchiver/utils/progress_tracker.py | 320 ++++++-- videoarchiver/utils/url_validator.py | 76 ++ videoarchiver/utils/video_downloader.py | 809 ------------------- 30 files changed, 5854 insertions(+), 2279 deletions(-) create mode 100644 videoarchiver/utils/compression_handler.py create mode 100644 videoarchiver/utils/download_core.py create mode 100644 videoarchiver/utils/file_operations.py create mode 100644 videoarchiver/utils/process_manager.py create mode 100644 videoarchiver/utils/progress_handler.py create mode 100644 videoarchiver/utils/url_validator.py delete mode 100644 videoarchiver/utils/video_downloader.py diff --git a/videoarchiver/core/base.py b/videoarchiver/core/base.py index fb670bb..5963a92 100644 --- a/videoarchiver/core/base.py +++ b/videoarchiver/core/base.py @@ -4,31 +4,68 @@ from __future__ import annotations import asyncio import logging -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, TypedDict, ClassVar, List, Set, Union from datetime import datetime +from pathlib import Path + +import discord from redbot.core.bot import Red -from redbot.core.commands import GroupCog +from redbot.core.commands import GroupCog, Context from .settings import Settings -from .lifecycle import LifecycleManager +from .lifecycle import LifecycleManager, LifecycleState from .component_manager import ComponentManager, ComponentState from .error_handler import error_manager, handle_command_error from .response_handler import response_manager -from .commands import setup_archiver_commands, setup_database_commands, setup_settings_commands -from .events import setup_events +from .commands.archiver_commands import setup_archiver_commands +from .commands.database_commands import setup_database_commands +from .commands.settings_commands import setup_settings_commands +from .events import setup_events, EventManager + +from ..processor.core import Processor +from ..queue.manager import QueueManager +from ..ffmpeg.ffmpeg_manager import FFmpegManager +from ..database.video_archive_db import VideoArchiveDB +from ..config_manager import ConfigManager +from ..utils.exceptions import ( + CogError, + ErrorContext, + ErrorSeverity +) logger = logging.getLogger("VideoArchiver") -class CogStatus: +class CogHealthCheck(TypedDict): + """Type definition for health check status""" + name: str + status: bool + last_check: str + details: Optional[Dict[str, Any]] + +class CogStatus(TypedDict): + """Type definition for cog status""" + uptime: float + last_error: Optional[str] + error_count: int + command_count: int + last_command: Optional[str] + health_checks: Dict[str, CogHealthCheck] + state: str + ready: bool + +class StatusTracker: """Tracks cog status and health""" - def __init__(self): + HEALTH_CHECK_INTERVAL: ClassVar[int] = 30 # Seconds between health checks + ERROR_THRESHOLD: ClassVar[int] = 100 # Maximum errors before health warning + + def __init__(self) -> None: self.start_time = datetime.utcnow() self.last_error: Optional[str] = None self.error_count = 0 self.command_count = 0 self.last_command_time: Optional[datetime] = None - self.health_checks: Dict[str, bool] = {} + self.health_checks: Dict[str, CogHealthCheck] = {} def record_error(self, error: str) -> None: """Record an error occurrence""" @@ -40,36 +77,70 @@ class CogStatus: self.command_count += 1 self.last_command_time = datetime.utcnow() - def update_health_check(self, check: str, status: bool) -> None: + def update_health_check( + self, + name: str, + status: bool, + details: Optional[Dict[str, Any]] = None + ) -> None: """Update health check status""" - self.health_checks[check] = status + self.health_checks[name] = CogHealthCheck( + name=name, + status=status, + last_check=datetime.utcnow().isoformat(), + details=details + ) - def get_status(self) -> Dict[str, Any]: + def get_status(self) -> CogStatus: """Get current status""" - return { - "uptime": (datetime.utcnow() - self.start_time).total_seconds(), - "last_error": self.last_error, - "error_count": self.error_count, - "command_count": self.command_count, - "last_command": self.last_command_time.isoformat() if self.last_command_time else None, - "health_checks": self.health_checks.copy() - } + return CogStatus( + uptime=(datetime.utcnow() - self.start_time).total_seconds(), + last_error=self.last_error, + error_count=self.error_count, + command_count=self.command_count, + last_command=self.last_command_time.isoformat() if self.last_command_time else None, + health_checks=self.health_checks.copy(), + state="healthy" if self.is_healthy() else "unhealthy", + ready=True + ) + + def is_healthy(self) -> bool: + """Check if cog is healthy""" + if self.error_count > self.ERROR_THRESHOLD: + return False + return all(check["status"] for check in self.health_checks.values()) class ComponentAccessor: """Provides safe access to components""" - def __init__(self, component_manager: ComponentManager): + def __init__(self, component_manager: ComponentManager) -> None: self._component_manager = component_manager def get_component(self, name: str) -> Optional[Any]: - """Get a component with state validation""" + """ + Get a component with state validation. + + Args: + name: Component name + + Returns: + Component instance if ready, None otherwise + """ component = self._component_manager.get(name) if component and component.state == ComponentState.READY: return component return None def get_component_status(self, name: str) -> Dict[str, Any]: - """Get component status""" + """ + Get component status. + + Args: + name: Component name + + Returns: + Component status dictionary + """ return self._component_manager.get_component_status().get(name, {}) class VideoArchiver(GroupCog, Settings): @@ -85,7 +156,19 @@ class VideoArchiver(GroupCog, Settings): self.lifecycle_manager = LifecycleManager(self) self.component_manager = ComponentManager(self) self.component_accessor = ComponentAccessor(self.component_manager) - self.status = CogStatus() + self.status_tracker = StatusTracker() + self.event_manager: Optional[EventManager] = None + + # Initialize task trackers + self._init_task: Optional[asyncio.Task] = None + self._cleanup_task: Optional[asyncio.Task] = None + self._queue_task: Optional[asyncio.Task] = None + self._health_tasks: Set[asyncio.Task] = set() + + # Initialize component storage + self.components: Dict[int, Dict[str, Any]] = {} + self.update_checker = None + self._db = None # Set up commands setup_archiver_commands(self) @@ -93,42 +176,85 @@ class VideoArchiver(GroupCog, Settings): setup_settings_commands(self) # Set up events - setup_events(self) + self.event_manager = setup_events(self) # Register cleanup handlers self.lifecycle_manager.register_cleanup_handler(self._cleanup_handler) async def cog_load(self) -> None: - """Handle cog loading""" + """ + Handle cog loading. + + Raises: + CogError: If loading fails + """ try: await self.lifecycle_manager.handle_load() await self._start_health_monitoring() except Exception as e: - self.status.record_error(str(e)) - raise + error = f"Failed to load cog: {str(e)}" + self.status_tracker.record_error(error) + logger.error(error, exc_info=True) + raise CogError( + error, + context=ErrorContext( + "VideoArchiver", + "cog_load", + None, + ErrorSeverity.CRITICAL + ) + ) async def cog_unload(self) -> None: - """Handle cog unloading""" + """ + Handle cog unloading. + + Raises: + CogError: If unloading fails + """ try: + # Cancel health monitoring + for task in self._health_tasks: + task.cancel() + self._health_tasks.clear() + await self.lifecycle_manager.handle_unload() except Exception as e: - self.status.record_error(str(e)) - raise + error = f"Failed to unload cog: {str(e)}" + self.status_tracker.record_error(error) + logger.error(error, exc_info=True) + raise CogError( + error, + context=ErrorContext( + "VideoArchiver", + "cog_unload", + None, + ErrorSeverity.CRITICAL + ) + ) - async def cog_command_error(self, ctx, error): + async def cog_command_error( + self, + ctx: Context, + error: Exception + ) -> None: """Handle command errors""" - self.status.record_error(str(error)) + self.status_tracker.record_error(str(error)) await handle_command_error(ctx, error) - async def cog_before_invoke(self, ctx) -> bool: + async def cog_before_invoke(self, ctx: Context) -> bool: """Pre-command hook""" - self.status.record_command() + self.status_tracker.record_command() return True async def _start_health_monitoring(self) -> None: """Start health monitoring tasks""" - asyncio.create_task(self._monitor_component_health()) - asyncio.create_task(self._monitor_system_health()) + self._health_tasks.add( + asyncio.create_task(self._monitor_component_health()) + ) + self._health_tasks.add( + asyncio.create_task(self._monitor_system_health()) + ) async def _monitor_component_health(self) -> None: """Monitor component health""" @@ -136,98 +262,134 @@ class VideoArchiver(GroupCog, Settings): try: component_status = self.component_manager.get_component_status() for name, status in component_status.items(): - self.status.update_health_check( + self.status_tracker.update_health_check( f"component_{name}", - status["state"] == ComponentState.READY.value + status["state"] == ComponentState.READY.name, + status ) except Exception as e: - logger.error(f"Error monitoring component health: {e}") - await asyncio.sleep(60) # Check every minute + logger.error(f"Error monitoring component health: {e}", exc_info=True) + await asyncio.sleep(self.status_tracker.HEALTH_CHECK_INTERVAL) async def _monitor_system_health(self) -> None: """Monitor system health metrics""" while True: try: # Check queue health - queue_manager = self.queue_manager - if queue_manager: + if queue_manager := self.queue_manager: queue_status = await queue_manager.get_queue_status() - self.status.update_health_check( + self.status_tracker.update_health_check( "queue_health", - queue_status["active"] and not queue_status["stalled"] + queue_status["active"] and not queue_status["stalled"], + queue_status ) # Check processor health - processor = self.processor - if processor: + if processor := self.processor: processor_status = await processor.get_status() - self.status.update_health_check( + self.status_tracker.update_health_check( "processor_health", - processor_status["active"] + processor_status["active"], + processor_status ) # Check database health - db = self.db - if db: + if db := self.db: db_status = await db.get_status() - self.status.update_health_check( + self.status_tracker.update_health_check( "database_health", - db_status["connected"] + db_status["connected"], + db_status + ) + + # Check event system health + if self.event_manager: + event_stats = self.event_manager.get_stats() + self.status_tracker.update_health_check( + "event_health", + event_stats["health"], + event_stats ) except Exception as e: - logger.error(f"Error monitoring system health: {e}") - await asyncio.sleep(30) # Check every 30 seconds + logger.error(f"Error monitoring system health: {e}", exc_info=True) + await asyncio.sleep(self.status_tracker.HEALTH_CHECK_INTERVAL) async def _cleanup_handler(self) -> None: """Custom cleanup handler""" try: - # Perform any custom cleanup - pass + # Cancel health monitoring tasks + for task in self._health_tasks: + if not task.done(): + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + self._health_tasks.clear() + except Exception as e: - logger.error(f"Error in cleanup handler: {e}") + logger.error(f"Error in cleanup handler: {e}", exc_info=True) def get_status(self) -> Dict[str, Any]: - """Get comprehensive cog status""" + """ + Get comprehensive cog status. + + Returns: + Dictionary containing cog status information + """ return { - "cog": self.status.get_status(), + "cog": self.status_tracker.get_status(), "lifecycle": self.lifecycle_manager.get_status(), "components": self.component_manager.get_component_status(), - "errors": error_manager.tracker.get_error_stats() + "errors": error_manager.tracker.get_error_stats(), + "events": self.event_manager.get_stats() if self.event_manager else None } # Component property accessors @property - def processor(self): + def processor(self) -> Optional[Processor]: """Get the processor component""" return self.component_accessor.get_component("processor") @property - def queue_manager(self): + def queue_manager(self) -> Optional[QueueManager]: """Get the queue manager component""" return self.component_accessor.get_component("queue_manager") @property - def config_manager(self): + def config_manager(self) -> Optional[ConfigManager]: """Get the config manager component""" return self.component_accessor.get_component("config_manager") @property - def ffmpeg_mgr(self): + def ffmpeg_mgr(self) -> Optional[FFmpegManager]: """Get the FFmpeg manager component""" return self.component_accessor.get_component("ffmpeg_mgr") @property - def db(self): + def db(self) -> Optional[VideoArchiveDB]: """Get the database component""" - return self.component_accessor.get_component("db") + return self._db + + @db.setter + def db(self, value: VideoArchiveDB) -> None: + """Set the database component""" + self._db = value @property - def data_path(self): + def data_path(self) -> Optional[Path]: """Get the data path""" return self.component_accessor.get_component("data_path") @property - def download_path(self): + def download_path(self) -> Optional[Path]: """Get the download path""" return self.component_accessor.get_component("download_path") + + @property + def queue_handler(self): + """Get the queue handler from processor""" + if processor := self.processor: + return processor.queue_handler + return None diff --git a/videoarchiver/core/component_manager.py b/videoarchiver/core/component_manager.py index 5fe7e78..e9580b5 100644 --- a/videoarchiver/core/component_manager.py +++ b/videoarchiver/core/component_manager.py @@ -2,33 +2,61 @@ import logging import asyncio -from typing import Dict, Any, Optional, Set, List -from enum import Enum +from typing import Dict, Any, Optional, Set, List, TypedDict, ClassVar, Type, Union, Protocol +from enum import Enum, auto from datetime import datetime +from pathlib import Path + +from ..utils.exceptions import ( + ComponentError, + ErrorContext, + ErrorSeverity +) +from ..utils.path_manager import ensure_directory logger = logging.getLogger("VideoArchiver") class ComponentState(Enum): """Possible states of a component""" - UNREGISTERED = "unregistered" - REGISTERED = "registered" - INITIALIZING = "initializing" - READY = "ready" - ERROR = "error" - SHUTDOWN = "shutdown" + UNREGISTERED = auto() + REGISTERED = auto() + INITIALIZING = auto() + READY = auto() + ERROR = auto() + SHUTDOWN = auto() -class ComponentDependencyError(Exception): - """Raised when component dependencies cannot be satisfied""" - pass +class ComponentHistory(TypedDict): + """Type definition for component history entry""" + component: str + state: str + timestamp: str + error: Optional[str] + duration: float -class ComponentLifecycleError(Exception): - """Raised when component lifecycle operations fail""" - pass +class ComponentStatus(TypedDict): + """Type definition for component status""" + state: str + registration_time: Optional[str] + initialization_time: Optional[str] + dependencies: Set[str] + dependents: Set[str] + error: Optional[str] + health: bool + +class Initializable(Protocol): + """Protocol for initializable components""" + async def initialize(self) -> None: + """Initialize the component""" + ... + + async def shutdown(self) -> None: + """Shutdown the component""" + ... class Component: """Base class for managed components""" - def __init__(self, name: str): + def __init__(self, name: str) -> None: self.name = name self.state = ComponentState.UNREGISTERED self.dependencies: Set[str] = set() @@ -36,33 +64,74 @@ class Component: self.registration_time: Optional[datetime] = None self.initialization_time: Optional[datetime] = None self.error: Optional[str] = None + self._health_check_task: Optional[asyncio.Task] = None async def initialize(self) -> None: - """Initialize the component""" + """ + Initialize the component. + + Raises: + ComponentError: If initialization fails + """ pass async def shutdown(self) -> None: - """Shutdown the component""" - pass + """ + Shutdown the component. + + Raises: + ComponentError: If shutdown fails + """ + if self._health_check_task: + self._health_check_task.cancel() + try: + await self._health_check_task + except asyncio.CancelledError: + pass + + def is_healthy(self) -> bool: + """Check if component is healthy""" + return self.state == ComponentState.READY and not self.error class ComponentTracker: """Tracks component states and relationships""" - def __init__(self): - self.states: Dict[str, ComponentState] = {} - self.history: List[Dict[str, Any]] = [] + MAX_HISTORY: ClassVar[int] = 1000 # Maximum history entries to keep - def update_state(self, name: str, state: ComponentState, error: Optional[str] = None) -> None: + def __init__(self) -> None: + self.states: Dict[str, ComponentState] = {} + self.history: List[ComponentHistory] = [] + + def update_state( + self, + name: str, + state: ComponentState, + error: Optional[str] = None + ) -> None: """Update component state""" self.states[name] = state - self.history.append({ - "component": name, - "state": state.value, - "timestamp": datetime.utcnow(), - "error": error - }) + + # Add history entry + now = datetime.utcnow() + duration = 0.0 + if self.history: + last_entry = self.history[-1] + last_time = datetime.fromisoformat(last_entry["timestamp"]) + duration = (now - last_time).total_seconds() - def get_component_history(self, name: str) -> List[Dict[str, Any]]: + self.history.append(ComponentHistory( + component=name, + state=state.name, + timestamp=now.isoformat(), + error=error, + duration=duration + )) + + # Cleanup old history + if len(self.history) > self.MAX_HISTORY: + self.history = self.history[-self.MAX_HISTORY:] + + def get_component_history(self, name: str) -> List[ComponentHistory]: """Get state history for a component""" return [ entry for entry in self.history @@ -72,12 +141,33 @@ class ComponentTracker: class DependencyManager: """Manages component dependencies""" - def __init__(self): + def __init__(self) -> None: self.dependencies: Dict[str, Set[str]] = {} self.dependents: Dict[str, Set[str]] = {} def add_dependency(self, component: str, dependency: str) -> None: - """Add a dependency relationship""" + """ + Add a dependency relationship. + + Args: + component: Component name + dependency: Dependency name + + Raises: + ComponentError: If dependency cycle is detected + """ + # Check for cycles + if self._would_create_cycle(component, dependency): + raise ComponentError( + f"Dependency cycle detected: {component} -> {dependency}", + context=ErrorContext( + "DependencyManager", + "add_dependency", + {"component": component, "dependency": dependency}, + ErrorSeverity.HIGH + ) + ) + if component not in self.dependencies: self.dependencies[component] = set() self.dependencies[component].add(dependency) @@ -86,6 +176,23 @@ class DependencyManager: self.dependents[dependency] = set() self.dependents[dependency].add(component) + def _would_create_cycle(self, component: str, dependency: str) -> bool: + """Check if adding dependency would create a cycle""" + visited = set() + + def has_path(start: str, end: str) -> bool: + if start == end: + return True + if start in visited: + return False + visited.add(start) + return any( + has_path(dep, end) + for dep in self.dependencies.get(start, set()) + ) + + return has_path(dependency, component) + def get_dependencies(self, component: str) -> Set[str]: """Get dependencies for a component""" return self.dependencies.get(component, set()) @@ -95,27 +202,72 @@ class DependencyManager: return self.dependents.get(component, set()) def get_initialization_order(self) -> List[str]: - """Get components in dependency order""" - visited = set() - order = [] + """ + Get components in dependency order. + + Returns: + List of component names in initialization order + + Raises: + ComponentError: If dependency cycle is detected + """ + visited: Set[str] = set() + temp_visited: Set[str] = set() + order: List[str] = [] def visit(component: str) -> None: + if component in temp_visited: + cycle = " -> ".join( + name for name in self.dependencies + if name in temp_visited + ) + raise ComponentError( + f"Dependency cycle detected: {cycle}", + context=ErrorContext( + "DependencyManager", + "get_initialization_order", + {"cycle": cycle}, + ErrorSeverity.HIGH + ) + ) if component in visited: return - visited.add(component) + + temp_visited.add(component) for dep in self.dependencies.get(component, set()): visit(dep) + temp_visited.remove(component) + visited.add(component) order.append(component) - for component in self.dependencies: - visit(component) + try: + for component in self.dependencies: + if component not in visited: + visit(component) + except RecursionError: + raise ComponentError( + "Dependency resolution exceeded maximum recursion depth", + context=ErrorContext( + "DependencyManager", + "get_initialization_order", + None, + ErrorSeverity.HIGH + ) + ) return order class ComponentManager: """Manages VideoArchiver components""" - def __init__(self, cog): + CORE_COMPONENTS: ClassVar[Dict[str, Tuple[Type[Any], Set[str]]]] = { + "config_manager": ("..config_manager.ConfigManager", set()), + "processor": ("..processor.core.Processor", {"config_manager"}), + "queue_manager": ("..queue.manager.EnhancedVideoQueueManager", {"config_manager"}), + "ffmpeg_mgr": ("..ffmpeg.ffmpeg_manager.FFmpegManager", set()) + } + + def __init__(self, cog: Any) -> None: self.cog = cog self._components: Dict[str, Component] = {} self.tracker = ComponentTracker() @@ -124,21 +276,41 @@ class ComponentManager: def register( self, name: str, - component: Any, + component: Union[Component, Any], dependencies: Optional[Set[str]] = None ) -> None: - """Register a component with dependencies""" + """ + Register a component with dependencies. + + Args: + name: Component name + component: Component instance + dependencies: Optional set of dependency names + + Raises: + ComponentError: If registration fails + """ try: # Wrap non-Component objects if not isinstance(component, Component): - component = Component(name) + wrapped = Component(name) + if isinstance(component, Initializable): + wrapped.initialize = component.initialize + wrapped.shutdown = component.shutdown + component = wrapped # Register dependencies if dependencies: for dep in dependencies: if dep not in self._components: - raise ComponentDependencyError( - f"Dependency {dep} not registered for {name}" + raise ComponentError( + f"Dependency {dep} not registered for {name}", + context=ErrorContext( + "ComponentManager", + "register", + {"component": name, "dependency": dep}, + ErrorSeverity.HIGH + ) ) self.dependency_manager.add_dependency(name, dep) @@ -149,19 +321,33 @@ class ComponentManager: logger.debug(f"Registered component: {name}") except Exception as e: - logger.error(f"Error registering component {name}: {e}") + error = f"Failed to register component {name}: {str(e)}" + logger.error(error, exc_info=True) self.tracker.update_state(name, ComponentState.ERROR, str(e)) - raise ComponentLifecycleError(f"Failed to register component: {str(e)}") + raise ComponentError( + error, + context=ErrorContext( + "ComponentManager", + "register", + {"component": name}, + ErrorSeverity.HIGH + ) + ) async def initialize_components(self) -> None: - """Initialize all components in dependency order""" + """ + Initialize all components in dependency order. + + Raises: + ComponentError: If initialization fails + """ try: - # Get initialization order - init_order = self.dependency_manager.get_initialization_order() - # Initialize core components first await self._initialize_core_components() + # Get initialization order + init_order = self.dependency_manager.get_initialization_order() + # Initialize remaining components for name in init_order: if name not in self._components: @@ -174,88 +360,172 @@ class ComponentManager: component.initialization_time = datetime.utcnow() self.tracker.update_state(name, ComponentState.READY) except Exception as e: - logger.error(f"Error initializing component {name}: {e}") + error = f"Failed to initialize component {name}: {str(e)}" + logger.error(error, exc_info=True) self.tracker.update_state(name, ComponentState.ERROR, str(e)) - raise ComponentLifecycleError( - f"Failed to initialize component {name}: {str(e)}" + raise ComponentError( + error, + context=ErrorContext( + "ComponentManager", + "initialize_components", + {"component": name}, + ErrorSeverity.HIGH + ) ) except Exception as e: - logger.error(f"Error during component initialization: {e}") - raise ComponentLifecycleError(f"Component initialization failed: {str(e)}") + error = f"Component initialization failed: {str(e)}" + logger.error(error, exc_info=True) + raise ComponentError( + error, + context=ErrorContext( + "ComponentManager", + "initialize_components", + None, + ErrorSeverity.HIGH + ) + ) async def _initialize_core_components(self) -> None: - """Initialize core system components""" - from ..config_manager import ConfigManager - from ..processor.core import Processor - from ..queue.manager import EnhancedVideoQueueManager - from ..ffmpeg.ffmpeg_manager import FFmpegManager + """ + Initialize core system components. + + Raises: + ComponentError: If core component initialization fails + """ + try: + for name, (component_path, deps) in self.CORE_COMPONENTS.items(): + module_path, class_name = component_path.rsplit(".", 1) + module = __import__(module_path, fromlist=[class_name]) + component_class = getattr(module, class_name) + + if name == "processor": + component = component_class(self.cog) + elif name == "ffmpeg_mgr": + component = component_class(self.cog) + else: + component = component_class() - core_components = { - "config_manager": (ConfigManager(self.cog), set()), - "processor": (Processor(self.cog), {"config_manager"}), - "queue_manager": (EnhancedVideoQueueManager(), {"config_manager"}), - "ffmpeg_mgr": (FFmpegManager(self.cog), set()) - } + self.register(name, component, deps) - for name, (component, deps) in core_components.items(): - self.register(name, component, deps) + # Initialize paths + await self._initialize_paths() - # Initialize paths - await self._initialize_paths() + except Exception as e: + error = f"Failed to initialize core components: {str(e)}" + logger.error(error, exc_info=True) + raise ComponentError( + error, + context=ErrorContext( + "ComponentManager", + "_initialize_core_components", + None, + ErrorSeverity.HIGH + ) + ) async def _initialize_paths(self) -> None: - """Initialize required paths""" - from pathlib import Path - from ..utils.path_manager import ensure_directory + """ + Initialize required paths. + + Raises: + ComponentError: If path initialization fails + """ + try: + data_dir = Path(self.cog.bot.data_path) / "VideoArchiver" + download_dir = data_dir / "downloads" - data_dir = Path(self.cog.bot.data_path) / "VideoArchiver" - download_dir = data_dir / "downloads" + # Ensure directories exist + await ensure_directory(data_dir) + await ensure_directory(download_dir) - # Ensure directories exist - await ensure_directory(data_dir) - await ensure_directory(download_dir) + # Register paths + self.register("data_path", data_dir) + self.register("download_path", download_dir) - # Register paths - self.register("data_path", data_dir) - self.register("download_path", download_dir) + except Exception as e: + error = f"Failed to initialize paths: {str(e)}" + logger.error(error, exc_info=True) + raise ComponentError( + error, + context=ErrorContext( + "ComponentManager", + "_initialize_paths", + None, + ErrorSeverity.HIGH + ) + ) - def get(self, name: str) -> Optional[Any]: + def get(self, name: str) -> Optional[Component]: """Get a registered component""" - component = self._components.get(name) - return component if isinstance(component, Component) else None + return self._components.get(name) async def shutdown_components(self) -> None: - """Shutdown components in reverse dependency order""" - shutdown_order = reversed(self.dependency_manager.get_initialization_order()) + """ + Shutdown components in reverse dependency order. - for name in shutdown_order: - if name not in self._components: - continue - - component = self._components[name] - try: - await component.shutdown() - self.tracker.update_state(name, ComponentState.SHUTDOWN) - except Exception as e: - logger.error(f"Error shutting down component {name}: {e}") - self.tracker.update_state(name, ComponentState.ERROR, str(e)) + Raises: + ComponentError: If shutdown fails + """ + try: + shutdown_order = reversed(self.dependency_manager.get_initialization_order()) + + for name in shutdown_order: + if name not in self._components: + continue + + component = self._components[name] + try: + await component.shutdown() + self.tracker.update_state(name, ComponentState.SHUTDOWN) + except Exception as e: + error = f"Error shutting down component {name}: {str(e)}" + logger.error(error, exc_info=True) + self.tracker.update_state(name, ComponentState.ERROR, str(e)) + raise ComponentError( + error, + context=ErrorContext( + "ComponentManager", + "shutdown_components", + {"component": name}, + ErrorSeverity.HIGH + ) + ) + + except Exception as e: + error = f"Component shutdown failed: {str(e)}" + logger.error(error, exc_info=True) + raise ComponentError( + error, + context=ErrorContext( + "ComponentManager", + "shutdown_components", + None, + ErrorSeverity.HIGH + ) + ) def clear(self) -> None: """Clear all registered components""" self._components.clear() logger.debug("Cleared all components") - def get_component_status(self) -> Dict[str, Any]: - """Get status of all components""" + def get_component_status(self) -> Dict[str, ComponentStatus]: + """ + Get status of all components. + + Returns: + Dictionary mapping component names to their status + """ return { - name: { - "state": self.tracker.states.get(name, ComponentState.UNREGISTERED).value, - "registration_time": component.registration_time, - "initialization_time": component.initialization_time, - "dependencies": self.dependency_manager.get_dependencies(name), - "dependents": self.dependency_manager.get_dependents(name), - "error": component.error - } + name: ComponentStatus( + state=self.tracker.states.get(name, ComponentState.UNREGISTERED).name, + registration_time=component.registration_time.isoformat() if component.registration_time else None, + initialization_time=component.initialization_time.isoformat() if component.initialization_time else None, + dependencies=self.dependency_manager.get_dependencies(name), + dependents=self.dependency_manager.get_dependents(name), + error=component.error, + health=component.is_healthy() + ) for name, component in self._components.items() } diff --git a/videoarchiver/core/error_handler.py b/videoarchiver/core/error_handler.py index b7768ea..6b0223d 100644 --- a/videoarchiver/core/error_handler.py +++ b/videoarchiver/core/error_handler.py @@ -2,7 +2,8 @@ import logging import traceback -from typing import Dict, Optional, Tuple, Type +from typing import Dict, Optional, Tuple, Type, TypedDict, ClassVar +from enum import Enum, auto import discord from redbot.core.commands import ( Context, @@ -13,98 +14,179 @@ from redbot.core.commands import ( CommandError ) -from ..utils.exceptions import VideoArchiverError as ProcessingError, ConfigurationError as ConfigError +from ..utils.exceptions import ( + VideoArchiverError, + ErrorSeverity, + ErrorContext, + ProcessorError, + ValidationError, + DisplayError, + URLExtractionError, + MessageHandlerError, + QueueHandlerError, + QueueProcessorError, + FFmpegError, + DatabaseError, + HealthCheckError, + TrackingError, + NetworkError, + ResourceExhaustedError, + ConfigurationError +) from .response_handler import response_manager logger = logging.getLogger("VideoArchiver") +class ErrorCategory(Enum): + """Categories of errors""" + PERMISSION = auto() + ARGUMENT = auto() + CONFIGURATION = auto() + PROCESSING = auto() + NETWORK = auto() + RESOURCE = auto() + DATABASE = auto() + VALIDATION = auto() + QUEUE = auto() + CLEANUP = auto() + HEALTH = auto() + UNEXPECTED = auto() + +class ErrorStats(TypedDict): + """Type definition for error statistics""" + counts: Dict[str, int] + patterns: Dict[str, Dict[str, int]] + severities: Dict[str, Dict[str, int]] + class ErrorFormatter: """Formats error messages for display""" @staticmethod - def format_permission_error(error: Exception) -> str: - """Format permission error messages""" + def format_error_message(error: Exception, context: Optional[ErrorContext] = None) -> str: + """Format error message with context""" + base_message = str(error) + if context: + return f"{context}: {base_message}" + return base_message + + @staticmethod + def format_user_message(error: Exception, category: ErrorCategory) -> str: + """Format user-friendly error message""" if isinstance(error, MissingPermissions): return "You don't have permission to use this command." elif isinstance(error, BotMissingPermissions): return "I don't have the required permissions to do that." - return str(error) - - @staticmethod - def format_argument_error(error: Exception) -> str: - """Format argument error messages""" - if isinstance(error, MissingRequiredArgument): + elif isinstance(error, MissingRequiredArgument): return f"Missing required argument: {error.param.name}" elif isinstance(error, BadArgument): return f"Invalid argument: {str(error)}" + elif isinstance(error, VideoArchiverError): + return str(error) + elif category == ErrorCategory.UNEXPECTED: + return "An unexpected error occurred. Please check the logs for details." return str(error) - @staticmethod - def format_processing_error(error: ProcessingError) -> str: - """Format processing error messages""" - return f"Processing error: {str(error)}" - - @staticmethod - def format_config_error(error: ConfigError) -> str: - """Format configuration error messages""" - return f"Configuration error: {str(error)}" - - @staticmethod - def format_unexpected_error(error: Exception) -> str: - """Format unexpected error messages""" - return "An unexpected error occurred. Check the logs for details." - class ErrorCategorizer: """Categorizes errors and determines handling strategy""" - ERROR_TYPES = { - MissingPermissions: ("permission", "error"), - BotMissingPermissions: ("permission", "error"), - MissingRequiredArgument: ("argument", "warning"), - BadArgument: ("argument", "warning"), - ConfigError: ("configuration", "error"), - ProcessingError: ("processing", "error"), + ERROR_MAPPING: ClassVar[Dict[Type[Exception], Tuple[ErrorCategory, ErrorSeverity]]] = { + # Discord command errors + MissingPermissions: (ErrorCategory.PERMISSION, ErrorSeverity.MEDIUM), + BotMissingPermissions: (ErrorCategory.PERMISSION, ErrorSeverity.HIGH), + MissingRequiredArgument: (ErrorCategory.ARGUMENT, ErrorSeverity.LOW), + BadArgument: (ErrorCategory.ARGUMENT, ErrorSeverity.LOW), + + # VideoArchiver errors + ProcessorError: (ErrorCategory.PROCESSING, ErrorSeverity.HIGH), + ValidationError: (ErrorCategory.VALIDATION, ErrorSeverity.MEDIUM), + DisplayError: (ErrorCategory.PROCESSING, ErrorSeverity.LOW), + URLExtractionError: (ErrorCategory.PROCESSING, ErrorSeverity.MEDIUM), + MessageHandlerError: (ErrorCategory.PROCESSING, ErrorSeverity.MEDIUM), + QueueHandlerError: (ErrorCategory.QUEUE, ErrorSeverity.HIGH), + QueueProcessorError: (ErrorCategory.QUEUE, ErrorSeverity.HIGH), + FFmpegError: (ErrorCategory.PROCESSING, ErrorSeverity.HIGH), + DatabaseError: (ErrorCategory.DATABASE, ErrorSeverity.HIGH), + HealthCheckError: (ErrorCategory.HEALTH, ErrorSeverity.HIGH), + TrackingError: (ErrorCategory.PROCESSING, ErrorSeverity.MEDIUM), + NetworkError: (ErrorCategory.NETWORK, ErrorSeverity.MEDIUM), + ResourceExhaustedError: (ErrorCategory.RESOURCE, ErrorSeverity.HIGH), + ConfigurationError: (ErrorCategory.CONFIGURATION, ErrorSeverity.HIGH) } @classmethod - def categorize_error(cls, error: Exception) -> Tuple[str, str]: - """Categorize an error and determine its severity - - Returns: - Tuple[str, str]: (Error category, Severity level) + def categorize_error(cls, error: Exception) -> Tuple[ErrorCategory, ErrorSeverity]: """ - for error_type, (category, severity) in cls.ERROR_TYPES.items(): + Categorize an error and determine its severity. + + Args: + error: Exception to categorize + + Returns: + Tuple of (Error category, Severity level) + """ + for error_type, (category, severity) in cls.ERROR_MAPPING.items(): if isinstance(error, error_type): return category, severity - return "unexpected", "error" + return ErrorCategory.UNEXPECTED, ErrorSeverity.HIGH class ErrorTracker: """Tracks error occurrences and patterns""" - def __init__(self): + def __init__(self) -> None: self.error_counts: Dict[str, int] = {} self.error_patterns: Dict[str, Dict[str, int]] = {} + self.error_severities: Dict[str, Dict[str, int]] = {} - def track_error(self, error: Exception, category: str) -> None: - """Track an error occurrence""" + def track_error( + self, + error: Exception, + category: ErrorCategory, + severity: ErrorSeverity + ) -> None: + """ + Track an error occurrence. + + Args: + error: Exception that occurred + category: Error category + severity: Error severity + """ error_type = type(error).__name__ + + # Track error counts self.error_counts[error_type] = self.error_counts.get(error_type, 0) + 1 - if category not in self.error_patterns: - self.error_patterns[category] = {} - self.error_patterns[category][error_type] = self.error_patterns[category].get(error_type, 0) + 1 + # Track error patterns by category + if category.value not in self.error_patterns: + self.error_patterns[category.value] = {} + self.error_patterns[category.value][error_type] = ( + self.error_patterns[category.value].get(error_type, 0) + 1 + ) + + # Track error severities + if severity.value not in self.error_severities: + self.error_severities[severity.value] = {} + self.error_severities[severity.value][error_type] = ( + self.error_severities[severity.value].get(error_type, 0) + 1 + ) - def get_error_stats(self) -> Dict: - """Get error statistics""" - return { - "counts": self.error_counts.copy(), - "patterns": self.error_patterns.copy() - } + def get_error_stats(self) -> ErrorStats: + """ + Get error statistics. + + Returns: + Dictionary containing error statistics + """ + return ErrorStats( + counts=self.error_counts.copy(), + patterns=self.error_patterns.copy(), + severities=self.error_severities.copy() + ) class ErrorManager: """Manages error handling and reporting""" - def __init__(self): + def __init__(self) -> None: self.formatter = ErrorFormatter() self.categorizer = ErrorCategorizer() self.tracker = ErrorTracker() @@ -114,7 +196,8 @@ class ErrorManager: ctx: Context, error: Exception ) -> None: - """Handle a command error + """ + Handle a command error. Args: ctx: Command context @@ -124,24 +207,40 @@ class ErrorManager: # Categorize error category, severity = self.categorizer.categorize_error(error) - # Track error - self.tracker.track_error(error, category) + # Create error context + context = ErrorContext( + component=ctx.command.qualified_name if ctx.command else "unknown", + operation="command_execution", + details={ + "guild_id": str(ctx.guild.id) if ctx.guild else "DM", + "channel_id": str(ctx.channel.id), + "user_id": str(ctx.author.id) + }, + severity=severity + ) - # Format error message - error_msg = await self._format_error_message(error, category) + # Track error + self.tracker.track_error(error, category, severity) + + # Format error messages + log_message = self.formatter.format_error_message(error, context) + user_message = self.formatter.format_user_message(error, category) # Log error details - self._log_error(ctx, error, category, severity) + self._log_error(log_message, severity) # Send response await response_manager.send_response( ctx, - content=error_msg, - response_type=severity + content=user_message, + response_type=severity.name.lower() ) except Exception as e: - logger.error(f"Error handling command error: {str(e)}") + logger.error( + f"Error handling command error: {str(e)}\n" + f"Original error: {traceback.format_exc()}" + ) try: await response_manager.send_response( ctx, @@ -151,46 +250,25 @@ class ErrorManager: except Exception: pass - async def _format_error_message( - self, - error: Exception, - category: str - ) -> str: - """Format error message based on category""" - try: - if category == "permission": - return self.formatter.format_permission_error(error) - elif category == "argument": - return self.formatter.format_argument_error(error) - elif category == "processing": - return self.formatter.format_processing_error(error) - elif category == "configuration": - return self.formatter.format_config_error(error) - else: - return self.formatter.format_unexpected_error(error) - except Exception as e: - logger.error(f"Error formatting error message: {e}") - return "An error occurred. Please check the logs." - def _log_error( self, - ctx: Context, - error: Exception, - category: str, - severity: str + message: str, + severity: ErrorSeverity ) -> None: - """Log error details""" + """ + Log error details. + + Args: + message: Error message to log + severity: Error severity + """ try: - if severity == "error": - logger.error( - f"Command error in {ctx.command} (Category: {category}):\n" - f"{traceback.format_exc()}" - ) + if severity in (ErrorSeverity.HIGH, ErrorSeverity.CRITICAL): + logger.error(f"{message}\n{traceback.format_exc()}") + elif severity == ErrorSeverity.MEDIUM: + logger.warning(message) else: - logger.warning( - f"Command warning in {ctx.command} (Category: {category}):\n" - f"{str(error)}" - ) + logger.info(message) except Exception as e: logger.error(f"Error logging error details: {e}") @@ -198,5 +276,11 @@ class ErrorManager: error_manager = ErrorManager() async def handle_command_error(ctx: Context, error: Exception) -> None: - """Helper function to handle command errors using the error manager""" + """ + Helper function to handle command errors using the error manager. + + Args: + ctx: Command context + error: Exception to handle + """ await error_manager.handle_error(ctx, error) diff --git a/videoarchiver/core/events.py b/videoarchiver/core/events.py index 2228b79..cfc459a 100644 --- a/videoarchiver/core/events.py +++ b/videoarchiver/core/events.py @@ -1,59 +1,147 @@ """Event handlers for VideoArchiver""" -import logging -import discord import asyncio +import logging import traceback -from typing import TYPE_CHECKING, Dict, Any, Optional from datetime import datetime +from enum import Enum, auto +from typing import TYPE_CHECKING, Dict, Any, Optional, TypedDict, ClassVar, List + +import discord from ..processor.constants import REACTIONS from ..processor.reactions import handle_archived_reaction from .guild import initialize_guild_components, cleanup_guild_components from .error_handler import error_manager from .response_handler import response_manager +from ..utils.exceptions import EventError, ErrorContext, ErrorSeverity if TYPE_CHECKING: from .base import VideoArchiver logger = logging.getLogger("VideoArchiver") + +class EventType(Enum): + """Types of Discord events""" + GUILD_JOIN = auto() + GUILD_REMOVE = auto() + MESSAGE = auto() + REACTION_ADD = auto() + MESSAGE_PROCESSING = auto() + REACTION_PROCESSING = auto() + + +class EventStats(TypedDict): + """Type definition for event statistics""" + counts: Dict[str, int] + last_events: Dict[str, str] + errors: Dict[str, int] + error_rate: float + health: bool + + +class EventHistory(TypedDict): + """Type definition for event history entry""" + event_type: str + timestamp: str + guild_id: Optional[int] + channel_id: Optional[int] + message_id: Optional[int] + user_id: Optional[int] + error: Optional[str] + duration: float + + class EventTracker: """Tracks event occurrences and patterns""" - def __init__(self): + MAX_HISTORY: ClassVar[int] = 1000 # Maximum history entries to keep + ERROR_THRESHOLD: ClassVar[float] = 0.1 # 10% error rate threshold + + def __init__(self) -> None: self.event_counts: Dict[str, int] = {} self.last_events: Dict[str, datetime] = {} self.error_counts: Dict[str, int] = {} + self.history: List[EventHistory] = [] - def record_event(self, event_type: str) -> None: + def record_event( + self, + event_type: EventType, + guild_id: Optional[int] = None, + channel_id: Optional[int] = None, + message_id: Optional[int] = None, + user_id: Optional[int] = None, + ) -> None: """Record an event occurrence""" - self.event_counts[event_type] = self.event_counts.get(event_type, 0) + 1 - self.last_events[event_type] = datetime.utcnow() + event_name = event_type.name + self.event_counts[event_name] = self.event_counts.get(event_name, 0) + 1 + self.last_events[event_name] = datetime.utcnow() - def record_error(self, event_type: str) -> None: + # Add to history + self.history.append( + EventHistory( + event_type=event_name, + timestamp=datetime.utcnow().isoformat(), + guild_id=guild_id, + channel_id=channel_id, + message_id=message_id, + user_id=user_id, + error=None, + duration=0.0, + ) + ) + + # Cleanup old history + if len(self.history) > self.MAX_HISTORY: + self.history = self.history[-self.MAX_HISTORY:] + + def record_error( + self, event_type: EventType, error: str, duration: float = 0.0 + ) -> None: """Record an event error""" - self.error_counts[event_type] = self.error_counts.get(event_type, 0) + 1 + event_name = event_type.name + self.error_counts[event_name] = self.error_counts.get(event_name, 0) + 1 - def get_stats(self) -> Dict[str, Any]: + # Update last history entry with error + if self.history: + self.history[-1].update({"error": error, "duration": duration}) + + def get_stats(self) -> EventStats: """Get event statistics""" - return { - "counts": self.event_counts.copy(), - "last_events": {k: v.isoformat() for k, v in self.last_events.items()}, - "errors": self.error_counts.copy() - } + total_events = sum(self.event_counts.values()) + total_errors = sum(self.error_counts.values()) + error_rate = total_errors / total_events if total_events > 0 else 0.0 + + return EventStats( + counts=self.event_counts.copy(), + last_events={k: v.isoformat() for k, v in self.last_events.items()}, + errors=self.error_counts.copy(), + error_rate=error_rate, + health=error_rate < self.ERROR_THRESHOLD, + ) + class GuildEventHandler: """Handles guild-related events""" - def __init__(self, cog: "VideoArchiver", tracker: EventTracker): + def __init__(self, cog: "VideoArchiver", tracker: EventTracker) -> None: self.cog = cog self.tracker = tracker async def handle_guild_join(self, guild: discord.Guild) -> None: - """Handle bot joining a new guild""" - self.tracker.record_event("guild_join") - + """ + Handle bot joining a new guild. + + Args: + guild: Discord guild that was joined + + Raises: + EventError: If guild initialization fails + """ + start_time = datetime.utcnow() + self.tracker.record_event(EventType.GUILD_JOIN, guild_id=guild.id) + if not self.cog.ready.is_set(): return @@ -61,29 +149,72 @@ class GuildEventHandler: await initialize_guild_components(self.cog, guild.id) logger.info(f"Initialized components for new guild {guild.id}") except Exception as e: - self.tracker.record_error("guild_join") - logger.error(f"Failed to initialize new guild {guild.id}: {str(e)}") + duration = (datetime.utcnow() - start_time).total_seconds() + self.tracker.record_error(EventType.GUILD_JOIN, str(e), duration) + error = f"Failed to initialize new guild {guild.id}: {str(e)}" + logger.error(error, exc_info=True) + raise EventError( + error, + context=ErrorContext( + "GuildEventHandler", + "handle_guild_join", + {"guild_id": guild.id}, + ErrorSeverity.HIGH, + ), + ) async def handle_guild_remove(self, guild: discord.Guild) -> None: - """Handle bot leaving a guild""" - self.tracker.record_event("guild_remove") - + """ + Handle bot leaving a guild. + + Args: + guild: Discord guild that was left + + Raises: + EventError: If guild cleanup fails + """ + start_time = datetime.utcnow() + self.tracker.record_event(EventType.GUILD_REMOVE, guild_id=guild.id) + try: await cleanup_guild_components(self.cog, guild.id) except Exception as e: - self.tracker.record_error("guild_remove") - logger.error(f"Error cleaning up removed guild {guild.id}: {str(e)}") + duration = (datetime.utcnow() - start_time).total_seconds() + self.tracker.record_error(EventType.GUILD_REMOVE, str(e), duration) + error = f"Error cleaning up removed guild {guild.id}: {str(e)}" + logger.error(error, exc_info=True) + raise EventError( + error, + context=ErrorContext( + "GuildEventHandler", + "handle_guild_remove", + {"guild_id": guild.id}, + ErrorSeverity.HIGH, + ), + ) + class MessageEventHandler: """Handles message-related events""" - def __init__(self, cog: "VideoArchiver", tracker: EventTracker): + def __init__(self, cog: "VideoArchiver", tracker: EventTracker) -> None: self.cog = cog self.tracker = tracker async def handle_message(self, message: discord.Message) -> None: - """Handle new messages for video processing""" - self.tracker.record_event("message") + """ + Handle new messages for video processing. + + Args: + message: Discord message to process + """ + self.tracker.record_event( + EventType.MESSAGE, + guild_id=message.guild.id if message.guild else None, + channel_id=message.channel.id, + message_id=message.id, + user_id=message.author.id, + ) # Skip if not ready or if message is from DM/bot if not self.cog.ready.is_set() or message.guild is None or message.author.bot: @@ -99,21 +230,19 @@ class MessageEventHandler: async def _process_message_background(self, message: discord.Message) -> None: """Process message in background to avoid blocking""" + start_time = datetime.utcnow() try: await self.cog.processor.process_message(message) except Exception as e: - self.tracker.record_error("message_processing") + duration = (datetime.utcnow() - start_time).total_seconds() + self.tracker.record_error(EventType.MESSAGE_PROCESSING, str(e), duration) await self._handle_processing_error(message, e) async def _handle_processing_error( - self, - message: discord.Message, - error: Exception + self, message: discord.Message, error: Exception ) -> None: """Handle message processing errors""" - logger.error( - f"Error processing message {message.id}: {traceback.format_exc()}" - ) + logger.error(f"Error processing message {message.id}: {traceback.format_exc()}") try: log_channel = await self.cog.config_manager.get_channel( message.guild, "log" @@ -126,24 +255,35 @@ class MessageEventHandler: f"Message ID: {message.id}\n" f"Channel: {message.channel.mention}" ), - response_type="error" + response_type=ErrorSeverity.HIGH, ) except Exception as log_error: logger.error(f"Failed to log error to guild: {str(log_error)}") + class ReactionEventHandler: """Handles reaction-related events""" - def __init__(self, cog: "VideoArchiver", tracker: EventTracker): + def __init__(self, cog: "VideoArchiver", tracker: EventTracker) -> None: self.cog = cog self.tracker = tracker async def handle_reaction_add( - self, - payload: discord.RawReactionActionEvent + self, payload: discord.RawReactionActionEvent ) -> None: - """Handle reactions to messages""" - self.tracker.record_event("reaction_add") + """ + Handle reactions to messages. + + Args: + payload: Reaction event payload + """ + self.tracker.record_event( + EventType.REACTION_ADD, + guild_id=payload.guild_id, + channel_id=payload.channel_id, + message_id=payload.message_id, + user_id=payload.user_id, + ) if payload.user_id == self.cog.bot.user.id: return @@ -151,47 +291,80 @@ class ReactionEventHandler: try: await self._process_reaction(payload) except Exception as e: - self.tracker.record_error("reaction_processing") - logger.error(f"Error handling reaction: {e}") + self.tracker.record_error(EventType.REACTION_PROCESSING, str(e)) + logger.error(f"Error handling reaction: {e}", exc_info=True) - async def _process_reaction( - self, - payload: discord.RawReactionActionEvent - ) -> None: - """Process a reaction event""" - # Get the channel and message - channel = self.cog.bot.get_channel(payload.channel_id) - if not channel: - return - - message = await channel.fetch_message(payload.message_id) - if not message: - return + async def _process_reaction(self, payload: discord.RawReactionActionEvent) -> None: + """ + Process a reaction event. + + Args: + payload: Reaction event payload + + Raises: + EventError: If reaction processing fails + """ + try: + # Get the channel and message + channel = self.cog.bot.get_channel(payload.channel_id) + if not channel: + return + + message = await channel.fetch_message(payload.message_id) + if not message: + return + + # Check if it's the archived reaction + if str(payload.emoji) == REACTIONS["archived"]: + # Only process if database is enabled + if self.cog.db: + user = self.cog.bot.get_user(payload.user_id) + asyncio.create_task( + handle_archived_reaction(message, user, self.cog.db) + ) + + except Exception as e: + error = f"Failed to process reaction: {str(e)}" + logger.error(error, exc_info=True) + raise EventError( + error, + context=ErrorContext( + "ReactionEventHandler", + "_process_reaction", + { + "message_id": payload.message_id, + "user_id": payload.user_id, + "emoji": str(payload.emoji), + }, + ErrorSeverity.MEDIUM, + ), + ) - # Check if it's the archived reaction - if str(payload.emoji) == REACTIONS["archived"]: - # Only process if database is enabled - if self.cog.db: - user = self.cog.bot.get_user(payload.user_id) - asyncio.create_task( - handle_archived_reaction(message, user, self.cog.db) - ) class EventManager: """Manages Discord event handling""" - def __init__(self, cog: "VideoArchiver"): + def __init__(self, cog: "VideoArchiver") -> None: self.tracker = EventTracker() self.guild_handler = GuildEventHandler(cog, self.tracker) self.message_handler = MessageEventHandler(cog, self.tracker) self.reaction_handler = ReactionEventHandler(cog, self.tracker) - def get_stats(self) -> Dict[str, Any]: + def get_stats(self) -> EventStats: """Get event statistics""" return self.tracker.get_stats() -def setup_events(cog: "VideoArchiver") -> None: - """Set up event handlers for the cog""" + +def setup_events(cog: "VideoArchiver") -> EventManager: + """ + Set up event handlers for the cog. + + Args: + cog: VideoArchiver cog instance + + Returns: + Configured EventManager instance + """ event_manager = EventManager(cog) @cog.listener() @@ -209,3 +382,5 @@ def setup_events(cog: "VideoArchiver") -> None: @cog.listener() async def on_raw_reaction_add(payload: discord.RawReactionActionEvent) -> None: await event_manager.reaction_handler.handle_reaction_add(payload) + + return event_manager diff --git a/videoarchiver/core/initialization.py b/videoarchiver/core/initialization.py index 6377e1c..2444991 100644 --- a/videoarchiver/core/initialization.py +++ b/videoarchiver/core/initialization.py @@ -1,16 +1,102 @@ """Module for handling VideoArchiver initialization""" -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional, Dict, Any import asyncio +import logging + +from ..utils.exceptions import ( + ComponentError, + ErrorContext, + ErrorSeverity +) +from .lifecycle import LifecycleState if TYPE_CHECKING: from .base import VideoArchiver -# Re-export initialization functions from lifecycle +logger = logging.getLogger("VideoArchiver") + async def initialize_cog(cog: "VideoArchiver") -> None: - """Initialize all components with proper error handling""" - await cog.lifecycle_manager.initialize_cog() + """ + Initialize all components with proper error handling. + + This is a re-export of lifecycle_manager.initialize_cog with additional + error context and logging. + + Args: + cog: VideoArchiver cog instance + + Raises: + ComponentError: If initialization fails + """ + try: + logger.info("Starting cog initialization...") + await cog.lifecycle_manager.initialize_cog() + logger.info("Cog initialization completed successfully") + except Exception as e: + error = f"Failed to initialize cog: {str(e)}" + logger.error(error, exc_info=True) + raise ComponentError( + error, + context=ErrorContext( + "Initialization", + "initialize_cog", + {"state": cog.lifecycle_manager.state_tracker.state.name}, + ErrorSeverity.HIGH + ) + ) def init_callback(cog: "VideoArchiver", task: asyncio.Task) -> None: - """Handle initialization task completion""" - cog.lifecycle_manager.init_callback(task) + """ + Handle initialization task completion. + + This is a re-export of lifecycle_manager.init_callback with additional + error context and logging. + + Args: + cog: VideoArchiver cog instance + task: Initialization task + """ + try: + logger.debug("Processing initialization task completion...") + cog.lifecycle_manager.init_callback(task) + + # Log final state + state = cog.lifecycle_manager.state_tracker.state + if state == LifecycleState.READY: + logger.info("Initialization completed successfully") + elif state == LifecycleState.ERROR: + logger.error("Initialization failed") + else: + logger.warning(f"Unexpected state after initialization: {state.name}") + + except Exception as e: + logger.error(f"Error in initialization callback: {str(e)}", exc_info=True) + # We don't raise here since this is a callback + +def get_init_status(cog: "VideoArchiver") -> Dict[str, Any]: + """ + Get initialization status information. + + Args: + cog: VideoArchiver cog instance + + Returns: + Dictionary containing initialization status + """ + return { + "state": cog.lifecycle_manager.state_tracker.state.name, + "ready": cog.ready.is_set(), + "components_initialized": all( + hasattr(cog, attr) and getattr(cog, attr) is not None + for attr in [ + "processor", + "queue_manager", + "update_checker", + "ffmpeg_mgr", + "components", + "db" + ] + ), + "history": cog.lifecycle_manager.state_tracker.get_state_history() + } diff --git a/videoarchiver/core/lifecycle.py b/videoarchiver/core/lifecycle.py index 0024156..5e52213 100644 --- a/videoarchiver/core/lifecycle.py +++ b/videoarchiver/core/lifecycle.py @@ -3,82 +3,195 @@ import asyncio import logging import traceback -from typing import Optional, Dict, Any, Set, List, Callable -from enum import Enum +from typing import Optional, Dict, Any, Set, List, Callable, TypedDict, ClassVar, Union +from enum import Enum, auto from datetime import datetime from .cleanup import cleanup_resources, force_cleanup_resources -from ..utils.exceptions import VideoArchiverError +from ..utils.exceptions import ( + VideoArchiverError, + ErrorContext, + ErrorSeverity, + ComponentError, + CleanupError +) logger = logging.getLogger("VideoArchiver") class LifecycleState(Enum): """Possible states in the cog lifecycle""" - UNINITIALIZED = "uninitialized" - INITIALIZING = "initializing" - READY = "ready" - UNLOADING = "unloading" - ERROR = "error" + UNINITIALIZED = auto() + INITIALIZING = auto() + READY = auto() + UNLOADING = auto() + ERROR = auto() + +class TaskStatus(Enum): + """Task execution status""" + RUNNING = auto() + COMPLETED = auto() + CANCELLED = auto() + FAILED = auto() + +class TaskHistory(TypedDict): + """Type definition for task history entry""" + start_time: str + end_time: Optional[str] + status: str + error: Optional[str] + duration: float + +class StateHistory(TypedDict): + """Type definition for state history entry""" + state: str + timestamp: str + duration: float + details: Optional[Dict[str, Any]] + +class LifecycleStatus(TypedDict): + """Type definition for lifecycle status""" + state: str + state_history: List[StateHistory] + tasks: Dict[str, Any] + health: bool class TaskManager: """Manages asyncio tasks""" - def __init__(self): + TASK_TIMEOUT: ClassVar[int] = 30 # Default task timeout in seconds + + def __init__(self) -> None: self._tasks: Dict[str, asyncio.Task] = {} - self._task_history: Dict[str, Dict[str, Any]] = {} + self._task_history: Dict[str, TaskHistory] = {} async def create_task( self, name: str, - coro, - callback: Optional[Callable] = None + coro: Callable[..., Any], + callback: Optional[Callable[[asyncio.Task], None]] = None, + timeout: Optional[float] = None ) -> asyncio.Task: - """Create and track a task""" - task = asyncio.create_task(coro) - self._tasks[name] = task - self._task_history[name] = { - "start_time": datetime.utcnow(), - "status": "running" - } + """ + Create and track a task. + + Args: + name: Task name + coro: Coroutine to run + callback: Optional completion callback + timeout: Optional timeout in seconds + + Returns: + Created task + + Raises: + ComponentError: If task creation fails + """ + try: + task = asyncio.create_task(coro) + self._tasks[name] = task + self._task_history[name] = TaskHistory( + start_time=datetime.utcnow().isoformat(), + end_time=None, + status=TaskStatus.RUNNING.name, + error=None, + duration=0.0 + ) - if callback: - task.add_done_callback(lambda t: self._handle_completion(name, t, callback)) - else: - task.add_done_callback(lambda t: self._handle_completion(name, t)) + if timeout: + asyncio.create_task(self._handle_timeout(name, task, timeout)) - return task + if callback: + task.add_done_callback(lambda t: self._handle_completion(name, t, callback)) + else: + task.add_done_callback(lambda t: self._handle_completion(name, t)) + + return task + + except Exception as e: + error = f"Failed to create task {name}: {str(e)}" + logger.error(error, exc_info=True) + raise ComponentError( + error, + context=ErrorContext( + "TaskManager", + "create_task", + {"task_name": name}, + ErrorSeverity.HIGH + ) + ) + + async def _handle_timeout( + self, + name: str, + task: asyncio.Task, + timeout: float + ) -> None: + """Handle task timeout""" + try: + await asyncio.wait_for(asyncio.shield(task), timeout=timeout) + except asyncio.TimeoutError: + if not task.done(): + logger.warning(f"Task {name} timed out after {timeout}s") + task.cancel() + self._update_task_history( + name, + TaskStatus.FAILED, + f"Task timed out after {timeout}s" + ) def _handle_completion( self, name: str, task: asyncio.Task, - callback: Optional[Callable] = None + callback: Optional[Callable[[asyncio.Task], None]] = None ) -> None: """Handle task completion""" try: task.result() # Raises exception if task failed - status = "completed" + status = TaskStatus.COMPLETED + error = None except asyncio.CancelledError: - status = "cancelled" + status = TaskStatus.CANCELLED + error = "Task was cancelled" except Exception as e: - status = "failed" - logger.error(f"Task {name} failed: {e}") + status = TaskStatus.FAILED + error = str(e) + logger.error(f"Task {name} failed: {error}", exc_info=True) - self._task_history[name].update({ - "end_time": datetime.utcnow(), - "status": status - }) + self._update_task_history(name, status, error) if callback: try: callback(task) except Exception as e: - logger.error(f"Task callback error for {name}: {e}") + logger.error(f"Task callback error for {name}: {e}", exc_info=True) self._tasks.pop(name, None) + def _update_task_history( + self, + name: str, + status: TaskStatus, + error: Optional[str] = None + ) -> None: + """Update task history entry""" + if name in self._task_history: + end_time = datetime.utcnow() + start_time = datetime.fromisoformat(self._task_history[name]["start_time"]) + self._task_history[name].update({ + "end_time": end_time.isoformat(), + "status": status.name, + "error": error, + "duration": (end_time - start_time).total_seconds() + }) + async def cancel_task(self, name: str) -> None: - """Cancel a specific task""" + """ + Cancel a specific task. + + Args: + name: Task name to cancel + """ if task := self._tasks.get(name): if not task.done(): task.cancel() @@ -87,7 +200,7 @@ class TaskManager: except asyncio.CancelledError: pass except Exception as e: - logger.error(f"Error cancelling task {name}: {e}") + logger.error(f"Error cancelling task {name}: {e}", exc_info=True) async def cancel_all_tasks(self) -> None: """Cancel all tracked tasks""" @@ -95,7 +208,12 @@ class TaskManager: await self.cancel_task(name) def get_task_status(self) -> Dict[str, Any]: - """Get status of all tasks""" + """ + Get status of all tasks. + + Returns: + Dictionary containing task status information + """ return { "active_tasks": list(self._tasks.keys()), "history": self._task_history.copy() @@ -104,42 +222,80 @@ class TaskManager: class StateTracker: """Tracks lifecycle state and transitions""" - def __init__(self): + def __init__(self) -> None: self.state = LifecycleState.UNINITIALIZED - self.state_history: List[Dict[str, Any]] = [] + self.state_history: List[StateHistory] = [] self._record_state() - def set_state(self, state: LifecycleState) -> None: - """Set current state""" + def set_state( + self, + state: LifecycleState, + details: Optional[Dict[str, Any]] = None + ) -> None: + """ + Set current state. + + Args: + state: New state + details: Optional state transition details + """ self.state = state - self._record_state() + self._record_state(details) - def _record_state(self) -> None: + def _record_state( + self, + details: Optional[Dict[str, Any]] = None + ) -> None: """Record state transition""" - self.state_history.append({ - "state": self.state.value, - "timestamp": datetime.utcnow() - }) + now = datetime.utcnow() + duration = 0.0 + if self.state_history: + last_state = datetime.fromisoformat(self.state_history[-1]["timestamp"]) + duration = (now - last_state).total_seconds() - def get_state_history(self) -> List[Dict[str, Any]]: + self.state_history.append(StateHistory( + state=self.state.name, + timestamp=now.isoformat(), + duration=duration, + details=details + )) + + def get_state_history(self) -> List[StateHistory]: """Get state transition history""" return self.state_history.copy() class LifecycleManager: """Manages the lifecycle of the VideoArchiver cog""" - def __init__(self, cog): + INIT_TIMEOUT: ClassVar[int] = 60 # 1 minute timeout for initialization + UNLOAD_TIMEOUT: ClassVar[int] = 30 # 30 seconds timeout for unloading + CLEANUP_TIMEOUT: ClassVar[int] = 15 # 15 seconds timeout for cleanup + + def __init__(self, cog: Any) -> None: self.cog = cog self.task_manager = TaskManager() self.state_tracker = StateTracker() self._cleanup_handlers: Set[Callable] = set() - def register_cleanup_handler(self, handler: Callable) -> None: - """Register a cleanup handler""" + def register_cleanup_handler( + self, + handler: Union[Callable[[], None], Callable[[], Any]] + ) -> None: + """ + Register a cleanup handler. + + Args: + handler: Cleanup handler function + """ self._cleanup_handlers.add(handler) async def initialize_cog(self) -> None: - """Initialize all components with proper error handling""" + """ + Initialize all components with proper error handling. + + Raises: + ComponentError: If initialization fails + """ try: # Initialize components in sequence await self.cog.component_manager.initialize_components() @@ -149,24 +305,47 @@ class LifecycleManager: logger.info("VideoArchiver initialization completed successfully") except Exception as e: - logger.error(f"Error during initialization: {str(e)}") + error = f"Error during initialization: {str(e)}" + logger.error(error, exc_info=True) await cleanup_resources(self.cog) - raise + raise ComponentError( + error, + context=ErrorContext( + "LifecycleManager", + "initialize_cog", + None, + ErrorSeverity.HIGH + ) + ) def init_callback(self, task: asyncio.Task) -> None: """Handle initialization task completion""" try: task.result() logger.info("Initialization completed successfully") + self.state_tracker.set_state(LifecycleState.READY) except asyncio.CancelledError: logger.warning("Initialization was cancelled") + self.state_tracker.set_state( + LifecycleState.ERROR, + {"reason": "cancelled"} + ) asyncio.create_task(cleanup_resources(self.cog)) except Exception as e: - logger.error(f"Initialization failed: {str(e)}\n{traceback.format_exc()}") + logger.error(f"Initialization failed: {str(e)}", exc_info=True) + self.state_tracker.set_state( + LifecycleState.ERROR, + {"error": str(e)} + ) asyncio.create_task(cleanup_resources(self.cog)) async def handle_load(self) -> None: - """Handle cog loading without blocking""" + """ + Handle cog loading without blocking. + + Raises: + VideoArchiverError: If load fails + """ try: self.state_tracker.set_state(LifecycleState.INITIALIZING) @@ -174,7 +353,8 @@ class LifecycleManager: await self.task_manager.create_task( "initialization", self.initialize_cog(), - self.init_callback + self.init_callback, + timeout=self.INIT_TIMEOUT ) logger.info("Initialization started in background") @@ -184,14 +364,27 @@ class LifecycleManager: try: await asyncio.wait_for( force_cleanup_resources(self.cog), - timeout=15 # CLEANUP_TIMEOUT + timeout=self.CLEANUP_TIMEOUT ) except asyncio.TimeoutError: logger.error("Force cleanup during load error timed out") - raise VideoArchiverError(f"Error during cog load: {str(e)}") + raise VideoArchiverError( + f"Error during cog load: {str(e)}", + context=ErrorContext( + "LifecycleManager", + "handle_load", + None, + ErrorSeverity.HIGH + ) + ) async def handle_unload(self) -> None: - """Clean up when cog is unloaded""" + """ + Clean up when cog is unloaded. + + Raises: + CleanupError: If cleanup fails + """ self.state_tracker.set_state(LifecycleState.UNLOADING) try: @@ -205,9 +398,10 @@ class LifecycleManager: try: cleanup_task = await self.task_manager.create_task( "cleanup", - cleanup_resources(self.cog) + cleanup_resources(self.cog), + timeout=self.UNLOAD_TIMEOUT ) - await asyncio.wait_for(cleanup_task, timeout=30) # UNLOAD_TIMEOUT + await cleanup_task logger.info("Normal cleanup completed") except (asyncio.TimeoutError, Exception) as e: @@ -220,17 +414,50 @@ class LifecycleManager: try: await asyncio.wait_for( force_cleanup_resources(self.cog), - timeout=15 # CLEANUP_TIMEOUT + timeout=self.CLEANUP_TIMEOUT ) logger.info("Force cleanup completed") except asyncio.TimeoutError: - logger.error("Force cleanup timed out") + error = "Force cleanup timed out" + logger.error(error) + raise CleanupError( + error, + context=ErrorContext( + "LifecycleManager", + "handle_unload", + None, + ErrorSeverity.CRITICAL + ) + ) except Exception as e: - logger.error(f"Error during force cleanup: {str(e)}") + error = f"Error during force cleanup: {str(e)}" + logger.error(error) + raise CleanupError( + error, + context=ErrorContext( + "LifecycleManager", + "handle_unload", + None, + ErrorSeverity.CRITICAL + ) + ) except Exception as e: - logger.error(f"Error during cog unload: {str(e)}") - self.state_tracker.set_state(LifecycleState.ERROR) + error = f"Error during cog unload: {str(e)}" + logger.error(error, exc_info=True) + self.state_tracker.set_state( + LifecycleState.ERROR, + {"error": str(e)} + ) + raise CleanupError( + error, + context=ErrorContext( + "LifecycleManager", + "handle_unload", + None, + ErrorSeverity.CRITICAL + ) + ) finally: # Clear all references await self._cleanup_references() @@ -244,7 +471,7 @@ class LifecycleManager: else: handler() except Exception as e: - logger.error(f"Error in cleanup handler: {e}") + logger.error(f"Error in cleanup handler: {e}", exc_info=True) async def _cleanup_references(self) -> None: """Clean up all references""" @@ -257,10 +484,16 @@ class LifecycleManager: self.cog.components.clear() self.cog.db = None - def get_status(self) -> Dict[str, Any]: - """Get current lifecycle status""" - return { - "state": self.state_tracker.state.value, - "state_history": self.state_tracker.get_state_history(), - "tasks": self.task_manager.get_task_status() - } + def get_status(self) -> LifecycleStatus: + """ + Get current lifecycle status. + + Returns: + Dictionary containing lifecycle status information + """ + return LifecycleStatus( + state=self.state_tracker.state.name, + state_history=self.state_tracker.get_state_history(), + tasks=self.task_manager.get_task_status(), + health=self.state_tracker.state == LifecycleState.READY + ) diff --git a/videoarchiver/core/response_handler.py b/videoarchiver/core/response_handler.py index 1fb5526..d1a9fdd 100644 --- a/videoarchiver/core/response_handler.py +++ b/videoarchiver/core/response_handler.py @@ -1,57 +1,116 @@ """Module for handling command responses""" import logging +from enum import Enum, auto +from typing import Optional, Union, Dict, Any, TypedDict, ClassVar +from datetime import datetime import discord -from typing import Optional, Union, Dict, Any from redbot.core.commands import Context +from ..utils.exceptions import ErrorSeverity + logger = logging.getLogger("VideoArchiver") +class ResponseType(Enum): + """Types of responses""" + NORMAL = auto() + SUCCESS = auto() + ERROR = auto() + WARNING = auto() + INFO = auto() + DEBUG = auto() + +class ResponseTheme(TypedDict): + """Type definition for response theme""" + emoji: str + color: discord.Color + +class ResponseFormat(TypedDict): + """Type definition for formatted response""" + content: str + color: discord.Color + timestamp: str + class ResponseFormatter: """Formats responses for consistency""" - @staticmethod - def format_success(message: str) -> Dict[str, Any]: - """Format a success message""" - return { - "content": f"✅ {message}", - "color": discord.Color.green() - } + THEMES: ClassVar[Dict[ResponseType, ResponseTheme]] = { + ResponseType.SUCCESS: ResponseTheme(emoji="✅", color=discord.Color.green()), + ResponseType.ERROR: ResponseTheme(emoji="❌", color=discord.Color.red()), + ResponseType.WARNING: ResponseTheme(emoji="⚠️", color=discord.Color.gold()), + ResponseType.INFO: ResponseTheme(emoji="ℹ️", color=discord.Color.blue()), + ResponseType.DEBUG: ResponseTheme(emoji="🔧", color=discord.Color.greyple()) + } - @staticmethod - def format_error(message: str) -> Dict[str, Any]: - """Format an error message""" - return { - "content": f"❌ {message}", - "color": discord.Color.red() - } + SEVERITY_MAPPING: ClassVar[Dict[ErrorSeverity, ResponseType]] = { + ErrorSeverity.LOW: ResponseType.INFO, + ErrorSeverity.MEDIUM: ResponseType.WARNING, + ErrorSeverity.HIGH: ResponseType.ERROR, + ErrorSeverity.CRITICAL: ResponseType.ERROR + } - @staticmethod - def format_warning(message: str) -> Dict[str, Any]: - """Format a warning message""" - return { - "content": f"⚠️ {message}", - "color": discord.Color.yellow() - } + @classmethod + def format_response( + cls, + message: str, + response_type: ResponseType = ResponseType.NORMAL + ) -> ResponseFormat: + """ + Format a response message. + + Args: + message: Message to format + response_type: Type of response + + Returns: + Formatted response dictionary + """ + theme = cls.THEMES.get(response_type) + if theme: + return ResponseFormat( + content=f"{theme['emoji']} {message}", + color=theme['color'], + timestamp=datetime.utcnow().isoformat() + ) + return ResponseFormat( + content=message, + color=discord.Color.default(), + timestamp=datetime.utcnow().isoformat() + ) - @staticmethod - def format_info(message: str) -> Dict[str, Any]: - """Format an info message""" - return { - "content": f"ℹ️ {message}", - "color": discord.Color.blue() - } + @classmethod + def get_response_type(cls, severity: ErrorSeverity) -> ResponseType: + """ + Get response type for error severity. + + Args: + severity: Error severity level + + Returns: + Appropriate response type + """ + return cls.SEVERITY_MAPPING.get(severity, ResponseType.ERROR) class InteractionHandler: """Handles slash command interactions""" - @staticmethod async def send_initial_response( + self, interaction: discord.Interaction, content: Optional[str] = None, embed: Optional[discord.Embed] = None ) -> bool: - """Send initial interaction response""" + """ + Send initial interaction response. + + Args: + interaction: Discord interaction + content: Optional message content + embed: Optional embed + + Returns: + True if response was sent successfully + """ try: if not interaction.response.is_done(): if embed: @@ -61,16 +120,26 @@ class InteractionHandler: return True return False except Exception as e: - logger.error(f"Error sending initial interaction response: {e}") + logger.error(f"Error sending initial interaction response: {e}", exc_info=True) return False - @staticmethod async def send_followup( + self, interaction: discord.Interaction, content: Optional[str] = None, embed: Optional[discord.Embed] = None ) -> bool: - """Send interaction followup""" + """ + Send interaction followup. + + Args: + interaction: Discord interaction + content: Optional message content + embed: Optional embed + + Returns: + True if followup was sent successfully + """ try: if embed: await interaction.followup.send(content=content, embed=embed) @@ -78,13 +147,13 @@ class InteractionHandler: await interaction.followup.send(content=content) return True except Exception as e: - logger.error(f"Error sending interaction followup: {e}") + logger.error(f"Error sending interaction followup: {e}", exc_info=True) return False class ResponseManager: """Manages command responses""" - def __init__(self): + def __init__(self) -> None: self.formatter = ResponseFormatter() self.interaction_handler = InteractionHandler() @@ -93,25 +162,37 @@ class ResponseManager: ctx: Context, content: Optional[str] = None, embed: Optional[discord.Embed] = None, - response_type: str = "normal" + response_type: Union[ResponseType, str, ErrorSeverity] = ResponseType.NORMAL ) -> None: - """Send a response to a command + """ + Send a response to a command. Args: ctx: Command context content: Optional message content embed: Optional embed - response_type: Type of response (normal, success, error, warning, info) + response_type: Type of response or error severity """ try: - # Format response if type specified - if response_type != "normal": - format_method = getattr(self.formatter, f"format_{response_type}", None) - if format_method and content: - formatted = format_method(content) - content = formatted["content"] - if not embed: - embed = discord.Embed(color=formatted["color"]) + # Convert string response type to enum + if isinstance(response_type, str): + try: + response_type = ResponseType[response_type.upper()] + except KeyError: + response_type = ResponseType.NORMAL + # Convert error severity to response type + elif isinstance(response_type, ErrorSeverity): + response_type = self.formatter.get_response_type(response_type) + + # Format response + if response_type != ResponseType.NORMAL and content: + formatted = self.formatter.format_response(content, response_type) + content = formatted["content"] + if not embed: + embed = discord.Embed( + color=formatted["color"], + timestamp=datetime.fromisoformat(formatted["timestamp"]) + ) # Handle response if self._is_interaction(ctx): @@ -120,7 +201,7 @@ class ResponseManager: await self._handle_regular_response(ctx, content, embed) except Exception as e: - logger.error(f"Error sending response: {e}") + logger.error(f"Error sending response: {e}", exc_info=True) await self._send_fallback_response(ctx, content, embed) def _is_interaction(self, ctx: Context) -> bool: @@ -151,7 +232,7 @@ class ResponseManager: await self._handle_regular_response(ctx, content, embed) except Exception as e: - logger.error(f"Error handling interaction response: {e}") + logger.error(f"Error handling interaction response: {e}", exc_info=True) await self._send_fallback_response(ctx, content, embed) async def _handle_regular_response( @@ -167,7 +248,7 @@ class ResponseManager: else: await ctx.send(content=content) except Exception as e: - logger.error(f"Error sending regular response: {e}") + logger.error(f"Error sending regular response: {e}", exc_info=True) await self._send_fallback_response(ctx, content, embed) async def _send_fallback_response( @@ -183,7 +264,7 @@ class ResponseManager: else: await ctx.send(content=content) except Exception as e: - logger.error(f"Failed to send fallback response: {e}") + logger.error(f"Failed to send fallback response: {e}", exc_info=True) # Global response manager instance response_manager = ResponseManager() @@ -192,7 +273,15 @@ async def handle_response( ctx: Context, content: Optional[str] = None, embed: Optional[discord.Embed] = None, - response_type: str = "normal" + response_type: Union[ResponseType, str, ErrorSeverity] = ResponseType.NORMAL ) -> None: - """Helper function to handle responses using the response manager""" + """ + Helper function to handle responses using the response manager. + + Args: + ctx: Command context + content: Optional message content + embed: Optional embed + response_type: Type of response or error severity + """ await response_manager.send_response(ctx, content, embed, response_type) diff --git a/videoarchiver/core/settings.py b/videoarchiver/core/settings.py index 95d7f60..544fc2c 100644 --- a/videoarchiver/core/settings.py +++ b/videoarchiver/core/settings.py @@ -1,8 +1,14 @@ """Module for managing VideoArchiver settings""" -from typing import Dict, Any, List, Optional -from dataclasses import dataclass -from enum import Enum +from typing import Dict, Any, List, Optional, Union, TypedDict, ClassVar +from dataclasses import dataclass, field +from enum import Enum, auto + +from ..utils.exceptions import ( + ConfigurationError, + ErrorContext, + ErrorSeverity +) class VideoFormat(Enum): """Supported video formats""" @@ -17,133 +23,177 @@ class VideoQuality(Enum): HIGH = "high" # 1080p ULTRA = "ultra" # 4K +class SettingCategory(Enum): + """Setting categories""" + GENERAL = auto() + CHANNELS = auto() + PERMISSIONS = auto() + VIDEO = auto() + MESSAGES = auto() + PERFORMANCE = auto() + FEATURES = auto() + +class ValidationResult(TypedDict): + """Type definition for validation result""" + valid: bool + error: Optional[str] + details: Dict[str, Any] + @dataclass class SettingDefinition: """Defines a setting's properties""" name: str - category: str + category: SettingCategory default_value: Any description: str data_type: type required: bool = True - min_value: Optional[int] = None - max_value: Optional[int] = None + min_value: Optional[Union[int, float]] = None + max_value: Optional[Union[int, float]] = None choices: Optional[List[Any]] = None depends_on: Optional[str] = None + validation_func: Optional[callable] = None + error_message: Optional[str] = None -class SettingCategory(Enum): - """Setting categories""" - GENERAL = "general" - CHANNELS = "channels" - PERMISSIONS = "permissions" - VIDEO = "video" - MESSAGES = "messages" - PERFORMANCE = "performance" - FEATURES = "features" + def __post_init__(self) -> None: + """Validate setting definition""" + if self.choices and self.default_value not in self.choices: + raise ConfigurationError( + f"Default value {self.default_value} not in choices {self.choices}", + context=ErrorContext( + "Settings", + "definition_validation", + {"setting": self.name}, + ErrorSeverity.HIGH + ) + ) + + if self.min_value is not None and self.max_value is not None: + if self.min_value > self.max_value: + raise ConfigurationError( + f"Min value {self.min_value} greater than max value {self.max_value}", + context=ErrorContext( + "Settings", + "definition_validation", + {"setting": self.name}, + ErrorSeverity.HIGH + ) + ) class Settings: """Manages VideoArchiver settings""" # Setting definitions - SETTINGS = { + SETTINGS: ClassVar[Dict[str, SettingDefinition]] = { "enabled": SettingDefinition( name="enabled", - category=SettingCategory.GENERAL.value, + category=SettingCategory.GENERAL, default_value=False, description="Whether the archiver is enabled for this guild", data_type=bool ), "archive_channel": SettingDefinition( name="archive_channel", - category=SettingCategory.CHANNELS.value, + category=SettingCategory.CHANNELS, default_value=None, description="Channel where archived videos are posted", data_type=int, - required=False + required=False, + error_message="Archive channel must be a valid channel ID" ), "log_channel": SettingDefinition( name="log_channel", - category=SettingCategory.CHANNELS.value, + category=SettingCategory.CHANNELS, default_value=None, description="Channel for logging archiver actions", data_type=int, - required=False + required=False, + error_message="Log channel must be a valid channel ID" ), "enabled_channels": SettingDefinition( name="enabled_channels", - category=SettingCategory.CHANNELS.value, + category=SettingCategory.CHANNELS, default_value=[], description="Channels to monitor (empty means all channels)", - data_type=list + data_type=list, + error_message="Enabled channels must be a list of valid channel IDs" ), "allowed_roles": SettingDefinition( name="allowed_roles", - category=SettingCategory.PERMISSIONS.value, + category=SettingCategory.PERMISSIONS, default_value=[], description="Roles allowed to use archiver (empty means all roles)", - data_type=list + data_type=list, + error_message="Allowed roles must be a list of valid role IDs" ), "video_format": SettingDefinition( name="video_format", - category=SettingCategory.VIDEO.value, + category=SettingCategory.VIDEO, default_value=VideoFormat.MP4.value, description="Format for archived videos", data_type=str, - choices=[format.value for format in VideoFormat] + choices=[format.value for format in VideoFormat], + error_message=f"Video format must be one of: {', '.join(f.value for f in VideoFormat)}" ), "video_quality": SettingDefinition( name="video_quality", - category=SettingCategory.VIDEO.value, + category=SettingCategory.VIDEO, default_value=VideoQuality.HIGH.value, description="Quality preset for archived videos", data_type=str, - choices=[quality.value for quality in VideoQuality] + choices=[quality.value for quality in VideoQuality], + error_message=f"Video quality must be one of: {', '.join(q.value for q in VideoQuality)}" ), "max_file_size": SettingDefinition( name="max_file_size", - category=SettingCategory.VIDEO.value, + category=SettingCategory.VIDEO, default_value=8, description="Maximum file size in MB", data_type=int, min_value=1, - max_value=100 + max_value=100, + error_message="Max file size must be between 1 and 100 MB" ), "message_duration": SettingDefinition( name="message_duration", - category=SettingCategory.MESSAGES.value, + category=SettingCategory.MESSAGES, default_value=30, description="Duration to show status messages (seconds)", data_type=int, min_value=5, - max_value=300 + max_value=300, + error_message="Message duration must be between 5 and 300 seconds" ), "message_template": SettingDefinition( name="message_template", - category=SettingCategory.MESSAGES.value, + category=SettingCategory.MESSAGES, default_value="{author} archived a video from {channel}", description="Template for archive messages", - data_type=str + data_type=str, + error_message="Message template must contain {author} and {channel} placeholders" ), "concurrent_downloads": SettingDefinition( name="concurrent_downloads", - category=SettingCategory.PERFORMANCE.value, + category=SettingCategory.PERFORMANCE, default_value=2, description="Maximum concurrent downloads", data_type=int, min_value=1, - max_value=5 + max_value=5, + error_message="Concurrent downloads must be between 1 and 5" ), "enabled_sites": SettingDefinition( name="enabled_sites", - category=SettingCategory.FEATURES.value, + category=SettingCategory.FEATURES, default_value=None, description="Sites to enable archiving for (None means all sites)", data_type=list, - required=False + required=False, + error_message="Enabled sites must be a list of valid site identifiers" ), "use_database": SettingDefinition( name="use_database", - category=SettingCategory.FEATURES.value, + category=SettingCategory.FEATURES, default_value=False, description="Enable database tracking of archived videos", data_type=bool @@ -152,12 +202,28 @@ class Settings: @classmethod def get_setting_definition(cls, setting: str) -> Optional[SettingDefinition]: - """Get definition for a setting""" + """ + Get definition for a setting. + + Args: + setting: Setting name + + Returns: + Setting definition or None if not found + """ return cls.SETTINGS.get(setting) @classmethod - def get_settings_by_category(cls, category: str) -> Dict[str, SettingDefinition]: - """Get all settings in a category""" + def get_settings_by_category(cls, category: SettingCategory) -> Dict[str, SettingDefinition]: + """ + Get all settings in a category. + + Args: + category: Setting category + + Returns: + Dictionary of settings in the category + """ return { name: definition for name, definition in cls.SETTINGS.items() @@ -165,36 +231,109 @@ class Settings: } @classmethod - def validate_setting(cls, setting: str, value: Any) -> bool: - """Validate a setting value""" + def validate_setting(cls, setting: str, value: Any) -> ValidationResult: + """ + Validate a setting value. + + Args: + setting: Setting name + value: Value to validate + + Returns: + Validation result dictionary + + Raises: + ConfigurationError: If setting definition is not found + """ definition = cls.get_setting_definition(setting) if not definition: - return False + raise ConfigurationError( + f"Unknown setting: {setting}", + context=ErrorContext( + "Settings", + "validation", + {"setting": setting}, + ErrorSeverity.HIGH + ) + ) + + details = { + "setting": setting, + "value": value, + "type": type(value).__name__, + "expected_type": definition.data_type.__name__ + } # Check type if not isinstance(value, definition.data_type): - return False + return ValidationResult( + valid=False, + error=f"Invalid type: expected {definition.data_type.__name__}, got {type(value).__name__}", + details=details + ) # Check required if definition.required and value is None: - return False + return ValidationResult( + valid=False, + error="Required setting cannot be None", + details=details + ) # Check choices if definition.choices and value not in definition.choices: - return False + return ValidationResult( + valid=False, + error=f"Value must be one of: {', '.join(map(str, definition.choices))}", + details=details + ) # Check numeric bounds if isinstance(value, (int, float)): if definition.min_value is not None and value < definition.min_value: - return False + return ValidationResult( + valid=False, + error=f"Value must be at least {definition.min_value}", + details=details + ) if definition.max_value is not None and value > definition.max_value: - return False + return ValidationResult( + valid=False, + error=f"Value must be at most {definition.max_value}", + details=details + ) - return True + # Custom validation + if definition.validation_func: + try: + result = definition.validation_func(value) + if not result: + return ValidationResult( + valid=False, + error=definition.error_message or "Validation failed", + details=details + ) + except Exception as e: + return ValidationResult( + valid=False, + error=str(e), + details=details + ) + + return ValidationResult( + valid=True, + error=None, + details=details + ) @property def default_guild_settings(self) -> Dict[str, Any]: - """Default settings for guild configuration""" + """ + Default settings for guild configuration. + + Returns: + Dictionary of default settings + """ return { name: definition.default_value for name, definition in self.SETTINGS.items() @@ -202,14 +341,22 @@ class Settings: @classmethod def get_setting_help(cls, setting: str) -> Optional[str]: - """Get help text for a setting""" + """ + Get help text for a setting. + + Args: + setting: Setting name + + Returns: + Help text or None if setting not found + """ definition = cls.get_setting_definition(setting) if not definition: return None help_text = [ f"Setting: {definition.name}", - f"Category: {definition.category}", + f"Category: {definition.category.name}", f"Description: {definition.description}", f"Type: {definition.data_type.__name__}", f"Required: {definition.required}", @@ -224,5 +371,7 @@ class Settings: help_text.append(f"Maximum: {definition.max_value}") if definition.depends_on: help_text.append(f"Depends on: {definition.depends_on}") + if definition.error_message: + help_text.append(f"Error: {definition.error_message}") return "\n".join(help_text) diff --git a/videoarchiver/database/schema_manager.py b/videoarchiver/database/schema_manager.py index 545a36c..7d75e82 100644 --- a/videoarchiver/database/schema_manager.py +++ b/videoarchiver/database/schema_manager.py @@ -3,81 +3,283 @@ import logging import sqlite3 from pathlib import Path -from typing import List +from typing import List, Dict, Any, Optional, TypedDict, ClassVar, Union +from enum import Enum, auto +from datetime import datetime + +from ..utils.exceptions import DatabaseError, ErrorContext, ErrorSeverity logger = logging.getLogger("DBSchemaManager") +class SchemaState(Enum): + """Schema states""" + + UNINITIALIZED = auto() + INITIALIZING = auto() + READY = auto() + MIGRATING = auto() + ERROR = auto() + + +class MigrationType(Enum): + """Migration types""" + + CREATE = auto() + ALTER = auto() + INDEX = auto() + DATA = auto() + + +class SchemaVersion(TypedDict): + """Type definition for schema version""" + + version: int + last_updated: str + migrations_applied: List[str] + + +class MigrationResult(TypedDict): + """Type definition for migration result""" + + success: bool + error: Optional[str] + migration_type: str + duration: float + timestamp: str + + +class SchemaStatus(TypedDict): + """Type definition for schema status""" + + state: str + current_version: int + target_version: int + last_migration: Optional[str] + error: Optional[str] + initialized: bool + + class DatabaseSchemaManager: """Manages database schema creation and updates""" - SCHEMA_VERSION = 1 # Increment when schema changes + SCHEMA_VERSION: ClassVar[int] = 1 # Increment when schema changes + MIGRATION_TIMEOUT: ClassVar[float] = 30.0 # Seconds - def __init__(self, db_path: Path): + def __init__(self, db_path: Path) -> None: + """ + Initialize schema manager. + + Args: + db_path: Path to SQLite database file + """ self.db_path = db_path + self.state = SchemaState.UNINITIALIZED + self.last_error: Optional[str] = None + self.last_migration: Optional[str] = None def initialize_schema(self) -> None: - """Initialize or update the database schema""" + """ + Initialize or update the database schema. + + Raises: + DatabaseError: If schema initialization fails + """ try: + self.state = SchemaState.INITIALIZING self._create_schema_version_table() current_version = self._get_schema_version() if current_version < self.SCHEMA_VERSION: + self.state = SchemaState.MIGRATING self._apply_migrations(current_version) self._update_schema_version() + self.state = SchemaState.READY + except sqlite3.Error as e: - logger.error(f"Schema initialization error: {e}") - raise + self.state = SchemaState.ERROR + self.last_error = str(e) + error = f"Schema initialization failed: {str(e)}" + logger.error(error, exc_info=True) + raise DatabaseError( + error, + context=ErrorContext( + "SchemaManager", + "initialize_schema", + {"current_version": current_version}, + ErrorSeverity.CRITICAL, + ), + ) def _create_schema_version_table(self) -> None: - """Create schema version tracking table""" - with sqlite3.connect(self.db_path) as conn: - cursor = conn.cursor() - cursor.execute( - """ - CREATE TABLE IF NOT EXISTS schema_version ( - version INTEGER PRIMARY KEY + """ + Create schema version tracking table. + + Raises: + DatabaseError: If table creation fails + """ + try: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS schema_version ( + version INTEGER PRIMARY KEY, + last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + migrations_applied TEXT + ) + """ ) - """ + # Insert initial version if table is empty + cursor.execute( + """ + INSERT OR IGNORE INTO schema_version (version, migrations_applied) + VALUES (0, '[]') + """ + ) + conn.commit() + + except sqlite3.Error as e: + error = f"Failed to create schema version table: {str(e)}" + logger.error(error, exc_info=True) + raise DatabaseError( + error, + context=ErrorContext( + "SchemaManager", + "create_schema_version_table", + None, + ErrorSeverity.CRITICAL, + ), ) - # Insert initial version if table is empty - cursor.execute("INSERT OR IGNORE INTO schema_version VALUES (0)") - conn.commit() def _get_schema_version(self) -> int: - """Get current schema version""" - with sqlite3.connect(self.db_path) as conn: - cursor = conn.cursor() - cursor.execute("SELECT version FROM schema_version LIMIT 1") - result = cursor.fetchone() - return result[0] if result else 0 + """ + Get current schema version. + + Returns: + Current schema version + + Raises: + DatabaseError: If version query fails + """ + try: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + cursor.execute("SELECT version FROM schema_version LIMIT 1") + result = cursor.fetchone() + return result[0] if result else 0 + + except sqlite3.Error as e: + error = f"Failed to get schema version: {str(e)}" + logger.error(error, exc_info=True) + raise DatabaseError( + error, + context=ErrorContext( + "SchemaManager", "get_schema_version", None, ErrorSeverity.HIGH + ), + ) def _update_schema_version(self) -> None: - """Update schema version to current""" - with sqlite3.connect(self.db_path) as conn: - cursor = conn.cursor() - cursor.execute( - "UPDATE schema_version SET version = ?", (self.SCHEMA_VERSION,) + """ + Update schema version to current. + + Raises: + DatabaseError: If version update fails + """ + try: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + cursor.execute( + """ + UPDATE schema_version + SET version = ?, last_updated = CURRENT_TIMESTAMP + """, + (self.SCHEMA_VERSION,), + ) + conn.commit() + + except sqlite3.Error as e: + error = f"Failed to update schema version: {str(e)}" + logger.error(error, exc_info=True) + raise DatabaseError( + error, + context=ErrorContext( + "SchemaManager", + "update_schema_version", + {"target_version": self.SCHEMA_VERSION}, + ErrorSeverity.HIGH, + ), ) - conn.commit() def _apply_migrations(self, current_version: int) -> None: - """Apply necessary schema migrations""" + """ + Apply necessary schema migrations. + + Args: + current_version: Current schema version + + Raises: + DatabaseError: If migrations fail + """ migrations = self._get_migrations(current_version) + results: List[MigrationResult] = [] with sqlite3.connect(self.db_path) as conn: cursor = conn.cursor() for migration in migrations: + start_time = datetime.utcnow() try: cursor.executescript(migration) conn.commit() + self.last_migration = migration + + results.append( + MigrationResult( + success=True, + error=None, + migration_type=MigrationType.ALTER.name, + duration=(datetime.utcnow() - start_time).total_seconds(), + timestamp=datetime.utcnow().isoformat(), + ) + ) + except sqlite3.Error as e: - logger.error(f"Migration failed: {e}") - raise + error = f"Migration failed: {str(e)}" + logger.error(error, exc_info=True) + results.append( + MigrationResult( + success=False, + error=str(e), + migration_type=MigrationType.ALTER.name, + duration=(datetime.utcnow() - start_time).total_seconds(), + timestamp=datetime.utcnow().isoformat(), + ) + ) + raise DatabaseError( + error, + context=ErrorContext( + "SchemaManager", + "apply_migrations", + { + "current_version": current_version, + "migration": migration, + "results": results, + }, + ErrorSeverity.CRITICAL, + ), + ) def _get_migrations(self, current_version: int) -> List[str]: - """Get list of migrations to apply""" + """ + Get list of migrations to apply. + + Args: + current_version: Current schema version + + Returns: + List of migration scripts + """ migrations = [] # Version 0 to 1: Initial schema @@ -95,7 +297,11 @@ class DatabaseSchemaManager: duration INTEGER, format TEXT, resolution TEXT, - bitrate INTEGER + bitrate INTEGER, + error_count INTEGER DEFAULT 0, + last_error TEXT, + last_accessed TIMESTAMP, + metadata TEXT ); CREATE INDEX IF NOT EXISTS idx_guild_channel @@ -103,6 +309,9 @@ class DatabaseSchemaManager: CREATE INDEX IF NOT EXISTS idx_archived_at ON archived_videos(archived_at); + + CREATE INDEX IF NOT EXISTS idx_last_accessed + ON archived_videos(last_accessed); """ ) @@ -111,3 +320,57 @@ class DatabaseSchemaManager: # migrations.append(...) return migrations + + def get_status(self) -> SchemaStatus: + """ + Get current schema status. + + Returns: + Schema status information + """ + return SchemaStatus( + state=self.state.name, + current_version=self._get_schema_version(), + target_version=self.SCHEMA_VERSION, + last_migration=self.last_migration, + error=self.last_error, + initialized=self.state == SchemaState.READY, + ) + + def get_version_info(self) -> SchemaVersion: + """ + Get detailed version information. + + Returns: + Schema version information + + Raises: + DatabaseError: If version query fails + """ + try: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + cursor.execute( + """ + SELECT version, last_updated, migrations_applied + FROM schema_version LIMIT 1 + """ + ) + result = cursor.fetchone() + if result: + return SchemaVersion( + version=result[0], + last_updated=result[1], + migrations_applied=result[2].split(",") if result[2] else [], + ) + return SchemaVersion(version=0, last_updated="", migrations_applied=[]) + + except sqlite3.Error as e: + error = f"Failed to get version info: {str(e)}" + logger.error(error, exc_info=True) + raise DatabaseError( + error, + context=ErrorContext( + "SchemaManager", "get_version_info", None, ErrorSeverity.HIGH + ), + ) diff --git a/videoarchiver/processor/__init__.py b/videoarchiver/processor/__init__.py index bdca8e4..4e24c1a 100644 --- a/videoarchiver/processor/__init__.py +++ b/videoarchiver/processor/__init__.py @@ -1,45 +1,212 @@ """Video processing module for VideoArchiver""" +from typing import Dict, Any, Optional, Union, List, Tuple +import discord + from .core import VideoProcessor -from .constants import REACTIONS -from .progress_tracker import ProgressTracker +from .constants import ( + REACTIONS, + ReactionType, + ReactionEmojis, + ProgressEmojis, + get_reaction, + get_progress_emoji +) +from .url_extractor import ( + URLExtractor, + URLMetadata, + URLPattern, + URLType, + URLPatternManager, + URLValidator, + URLMetadataExtractor +) +from .message_validator import ( + MessageValidator, + ValidationContext, + ValidationRule, + ValidationResult, + ValidationRuleManager, + ValidationCache, + ValidationStats, + ValidationCacheEntry, + ValidationError +) from .message_handler import MessageHandler from .queue_handler import QueueHandler +from .reactions import ( + handle_archived_reaction, + update_queue_position_reaction, + update_progress_reaction, + update_download_progress_reaction +) # Export public classes and constants __all__ = [ + # Core components "VideoProcessor", - "REACTIONS", - "ProgressTracker", "MessageHandler", "QueueHandler", + + # URL Extraction + "URLExtractor", + "URLMetadata", + "URLPattern", + "URLType", + "URLPatternManager", + "URLValidator", + "URLMetadataExtractor", + + # Message Validation + "MessageValidator", + "ValidationContext", + "ValidationRule", + "ValidationResult", + "ValidationRuleManager", + "ValidationCache", + "ValidationStats", + "ValidationCacheEntry", + "ValidationError", + + # Constants and enums + "REACTIONS", + "ReactionType", + "ReactionEmojis", + "ProgressEmojis", + + # Helper functions + "get_reaction", + "get_progress_emoji", + "extract_urls", + "validate_message", + "update_download_progress", + "complete_download", + "increment_download_retries", + "get_download_progress", + "get_active_operations", + "get_validation_stats", + "clear_caches", + + # Reaction handlers + "handle_archived_reaction", + "update_queue_position_reaction", + "update_progress_reaction", + "update_download_progress_reaction", ] -# Create a shared progress tracker instance for module-level access -progress_tracker = ProgressTracker() +# Version information +__version__ = "1.0.0" +__author__ = "VideoArchiver Team" +__description__ = "Video processing module for archiving Discord videos" +# Create shared instances for module-level access +url_extractor = URLExtractor() +message_validator = MessageValidator() -# Export progress tracking functions that wrap the instance methods -def update_download_progress(url, progress_data): - """Update download progress for a specific URL""" +# URL extraction helper functions +async def extract_urls( + message: discord.Message, + enabled_sites: Optional[List[str]] = None +) -> List[URLMetadata]: + """ + Extract video URLs from a Discord message. + + Args: + message: Discord message to extract URLs from + enabled_sites: Optional list of enabled site identifiers + + Returns: + List of URLMetadata objects for extracted URLs + """ + return await url_extractor.extract_urls(message, enabled_sites) + +async def validate_message( + message: discord.Message, + settings: Dict[str, Any] +) -> Tuple[bool, Optional[str]]: + """ + Validate a Discord message. + + Args: + message: Discord message to validate + settings: Guild settings dictionary + + Returns: + Tuple of (is_valid, reason) + + Raises: + ValidationError: If validation fails unexpectedly + """ + return await message_validator.validate_message(message, settings) + +# Progress tracking helper functions +def update_download_progress(url: str, progress_data: Dict[str, Any]) -> None: + """ + Update download progress for a specific URL. + + Args: + url: The URL being downloaded + progress_data: Dictionary containing progress information + """ progress_tracker.update_download_progress(url, progress_data) - -def complete_download(url): - """Mark a download as complete""" +def complete_download(url: str) -> None: + """ + Mark a download as complete. + + Args: + url: The URL that completed downloading + """ progress_tracker.complete_download(url) - -def increment_download_retries(url): - """Increment retry count for a download""" +def increment_download_retries(url: str) -> None: + """ + Increment retry count for a download. + + Args: + url: The URL being retried + """ progress_tracker.increment_download_retries(url) - -def get_download_progress(url=None): - """Get download progress for a specific URL or all downloads""" +def get_download_progress(url: Optional[str] = None) -> Union[Dict[str, Any], Dict[str, Dict[str, Any]]]: + """ + Get download progress for a specific URL or all downloads. + + Args: + url: Optional URL to get progress for. If None, returns all download progress. + + Returns: + Dictionary containing progress information for one or all downloads + """ return progress_tracker.get_download_progress(url) - -def get_active_operations(): - """Get all active operations""" +def get_active_operations() -> Dict[str, Dict[str, Any]]: + """ + Get all active operations. + + Returns: + Dictionary containing information about all active operations + """ return progress_tracker.get_active_operations() + +def get_validation_stats() -> ValidationStats: + """ + Get message validation statistics. + + Returns: + Dictionary containing validation statistics and rule information + """ + return message_validator.get_stats() + +def clear_caches(message_id: Optional[int] = None) -> None: + """ + Clear URL and validation caches. + + Args: + message_id: Optional message ID to clear caches for. If None, clears all caches. + """ + url_extractor.clear_cache(message_id) + message_validator.clear_cache(message_id) + +# Initialize shared progress tracker instance +progress_tracker = ProgressTracker() diff --git a/videoarchiver/processor/cleanup_manager.py b/videoarchiver/processor/cleanup_manager.py index 2331aef..d8f75d8 100644 --- a/videoarchiver/processor/cleanup_manager.py +++ b/videoarchiver/processor/cleanup_manager.py @@ -2,25 +2,37 @@ import logging import asyncio -from enum import Enum -from dataclasses import dataclass -from typing import Optional, Dict, Any, List, Set -from datetime import datetime +from enum import Enum, auto +from dataclasses import dataclass, field +from typing import Optional, Dict, Any, List, Set, TypedDict, ClassVar, Callable, Awaitable, Tuple +from datetime import datetime, timedelta + +from .queue_handler import QueueHandler +from ..ffmpeg.ffmpeg_manager import FFmpegManager +from ..utils.exceptions import CleanupError logger = logging.getLogger("VideoArchiver") class CleanupStage(Enum): """Cleanup stages""" - QUEUE = "queue" - FFMPEG = "ffmpeg" - TASKS = "tasks" - RESOURCES = "resources" + QUEUE = auto() + FFMPEG = auto() + TASKS = auto() + RESOURCES = auto() class CleanupStrategy(Enum): """Cleanup strategies""" - NORMAL = "normal" - FORCE = "force" - GRACEFUL = "graceful" + NORMAL = auto() + FORCE = auto() + GRACEFUL = auto() + +class CleanupStats(TypedDict): + """Type definition for cleanup statistics""" + total_cleanups: int + active_cleanups: int + success_rate: float + average_duration: float + stage_success_rates: Dict[str, float] @dataclass class CleanupResult: @@ -29,33 +41,64 @@ class CleanupResult: stage: CleanupStage error: Optional[str] = None duration: float = 0.0 + timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat()) + +@dataclass +class CleanupOperation: + """Represents a cleanup operation""" + stage: CleanupStage + func: Callable[[], Awaitable[None]] + force_func: Optional[Callable[[], Awaitable[None]]] = None + timeout: float = 30.0 # Default timeout in seconds class CleanupTracker: """Tracks cleanup operations""" - def __init__(self): + MAX_HISTORY: ClassVar[int] = 1000 # Maximum number of cleanup operations to track + + def __init__(self) -> None: self.cleanup_history: List[Dict[str, Any]] = [] self.active_cleanups: Set[str] = set() self.start_times: Dict[str, datetime] = {} self.stage_results: Dict[str, List[CleanupResult]] = {} def start_cleanup(self, cleanup_id: str) -> None: - """Start tracking a cleanup operation""" + """ + Start tracking a cleanup operation. + + Args: + cleanup_id: Unique identifier for the cleanup operation + """ self.active_cleanups.add(cleanup_id) self.start_times[cleanup_id] = datetime.utcnow() self.stage_results[cleanup_id] = [] + # Cleanup old history if needed + if len(self.cleanup_history) >= self.MAX_HISTORY: + self.cleanup_history = self.cleanup_history[-self.MAX_HISTORY:] + def record_stage_result( self, cleanup_id: str, result: CleanupResult ) -> None: - """Record result of a cleanup stage""" + """ + Record result of a cleanup stage. + + Args: + cleanup_id: Cleanup operation identifier + result: Result of the cleanup stage + """ if cleanup_id in self.stage_results: self.stage_results[cleanup_id].append(result) def end_cleanup(self, cleanup_id: str) -> None: - """End tracking a cleanup operation""" + """ + End tracking a cleanup operation. + + Args: + cleanup_id: Cleanup operation identifier + """ if cleanup_id in self.active_cleanups: end_time = datetime.utcnow() self.cleanup_history.append({ @@ -69,15 +112,20 @@ class CleanupTracker: self.start_times.pop(cleanup_id) self.stage_results.pop(cleanup_id) - def get_cleanup_stats(self) -> Dict[str, Any]: - """Get cleanup statistics""" - return { - "total_cleanups": len(self.cleanup_history), - "active_cleanups": len(self.active_cleanups), - "success_rate": self._calculate_success_rate(), - "average_duration": self._calculate_average_duration(), - "stage_success_rates": self._calculate_stage_success_rates() - } + def get_cleanup_stats(self) -> CleanupStats: + """ + Get cleanup statistics. + + Returns: + Dictionary containing cleanup statistics + """ + return CleanupStats( + total_cleanups=len(self.cleanup_history), + active_cleanups=len(self.active_cleanups), + success_rate=self._calculate_success_rate(), + average_duration=self._calculate_average_duration(), + stage_success_rates=self._calculate_stage_success_rates() + ) def _calculate_success_rate(self) -> float: """Calculate overall cleanup success rate""" @@ -116,20 +164,49 @@ class CleanupTracker: class CleanupManager: """Manages cleanup operations for the video processor""" + CLEANUP_TIMEOUT: ClassVar[int] = 60 # Default timeout for entire cleanup operation + def __init__( self, - queue_handler, - ffmpeg_mgr: Optional[object] = None, + queue_handler: QueueHandler, + ffmpeg_mgr: Optional[FFmpegManager] = None, strategy: CleanupStrategy = CleanupStrategy.NORMAL - ): + ) -> None: self.queue_handler = queue_handler self.ffmpeg_mgr = ffmpeg_mgr self.strategy = strategy self._queue_task: Optional[asyncio.Task] = None self.tracker = CleanupTracker() + # Define cleanup operations + self.cleanup_operations: List[CleanupOperation] = [ + CleanupOperation( + stage=CleanupStage.QUEUE, + func=self._cleanup_queue, + force_func=self._force_cleanup_queue, + timeout=30.0 + ), + CleanupOperation( + stage=CleanupStage.FFMPEG, + func=self._cleanup_ffmpeg, + force_func=self._force_cleanup_ffmpeg, + timeout=15.0 + ), + CleanupOperation( + stage=CleanupStage.TASKS, + func=self._cleanup_tasks, + force_func=self._force_cleanup_tasks, + timeout=15.0 + ) + ] + async def cleanup(self) -> None: - """Perform normal cleanup of resources""" + """ + Perform normal cleanup of resources. + + Raises: + CleanupError: If cleanup fails + """ cleanup_id = f"cleanup_{datetime.utcnow().timestamp()}" self.tracker.start_cleanup(cleanup_id) @@ -137,35 +214,45 @@ class CleanupManager: logger.info("Starting normal cleanup...") # Clean up in stages - stages = [ - (CleanupStage.QUEUE, self._cleanup_queue), - (CleanupStage.FFMPEG, self._cleanup_ffmpeg), - (CleanupStage.TASKS, self._cleanup_tasks) - ] - - for stage, cleanup_func in stages: + for operation in self.cleanup_operations: try: start_time = datetime.utcnow() - await cleanup_func() + await asyncio.wait_for( + operation.func(), + timeout=operation.timeout + ) duration = (datetime.utcnow() - start_time).total_seconds() self.tracker.record_stage_result( cleanup_id, - CleanupResult(True, stage, duration=duration) + CleanupResult(True, operation.stage, duration=duration) ) - except Exception as e: - logger.error(f"Error in {stage.value} cleanup: {e}") + except asyncio.TimeoutError: + error = f"Cleanup stage {operation.stage.value} timed out" + logger.error(error) self.tracker.record_stage_result( cleanup_id, - CleanupResult(False, stage, str(e)) + CleanupResult(False, operation.stage, error) ) if self.strategy != CleanupStrategy.GRACEFUL: - raise + raise CleanupError(error) + except Exception as e: + error = f"Error in {operation.stage.value} cleanup: {e}" + logger.error(error) + self.tracker.record_stage_result( + cleanup_id, + CleanupResult(False, operation.stage, str(e)) + ) + if self.strategy != CleanupStrategy.GRACEFUL: + raise CleanupError(error) logger.info("Normal cleanup completed successfully") - except Exception as e: - logger.error(f"Error during normal cleanup: {str(e)}", exc_info=True) + except CleanupError: raise + except Exception as e: + error = f"Unexpected error during cleanup: {str(e)}" + logger.error(error, exc_info=True) + raise CleanupError(error) finally: self.tracker.end_cleanup(cleanup_id) @@ -178,26 +265,26 @@ class CleanupManager: logger.info("Starting force cleanup...") # Force cleanup in stages - stages = [ - (CleanupStage.QUEUE, self._force_cleanup_queue), - (CleanupStage.FFMPEG, self._force_cleanup_ffmpeg), - (CleanupStage.TASKS, self._force_cleanup_tasks) - ] + for operation in self.cleanup_operations: + if not operation.force_func: + continue - for stage, cleanup_func in stages: try: start_time = datetime.utcnow() - await cleanup_func() + await asyncio.wait_for( + operation.force_func(), + timeout=operation.timeout + ) duration = (datetime.utcnow() - start_time).total_seconds() self.tracker.record_stage_result( cleanup_id, - CleanupResult(True, stage, duration=duration) + CleanupResult(True, operation.stage, duration=duration) ) except Exception as e: - logger.error(f"Error in force {stage.value} cleanup: {e}") + logger.error(f"Error in force {operation.stage.value} cleanup: {e}") self.tracker.record_stage_result( cleanup_id, - CleanupResult(False, stage, str(e)) + CleanupResult(False, operation.stage, str(e)) ) logger.info("Force cleanup completed") @@ -209,6 +296,8 @@ class CleanupManager: async def _cleanup_queue(self) -> None: """Clean up queue handler""" + if not self.queue_handler: + raise CleanupError("Queue handler not initialized") await self.queue_handler.cleanup() async def _cleanup_ffmpeg(self) -> None: @@ -224,15 +313,22 @@ class CleanupManager: await self._queue_task except asyncio.CancelledError: pass + except Exception as e: + raise CleanupError(f"Error cleaning up queue task: {str(e)}") async def _force_cleanup_queue(self) -> None: """Force clean up queue handler""" + if not self.queue_handler: + raise CleanupError("Queue handler not initialized") await self.queue_handler.force_cleanup() async def _force_cleanup_ffmpeg(self) -> None: """Force clean up FFmpeg manager""" if self.ffmpeg_mgr: - self.ffmpeg_mgr.kill_all_processes() + try: + self.ffmpeg_mgr.kill_all_processes() + except Exception as e: + logger.error(f"Error force cleaning FFmpeg processes: {e}") async def _force_cleanup_tasks(self) -> None: """Force clean up tasks""" @@ -240,13 +336,31 @@ class CleanupManager: self._queue_task.cancel() def set_queue_task(self, task: asyncio.Task) -> None: - """Set the queue processing task for cleanup purposes""" + """ + Set the queue processing task for cleanup purposes. + + Args: + task: Queue processing task to track + """ self._queue_task = task def get_cleanup_stats(self) -> Dict[str, Any]: - """Get cleanup statistics""" + """ + Get cleanup statistics. + + Returns: + Dictionary containing cleanup statistics and status + """ return { "stats": self.tracker.get_cleanup_stats(), "strategy": self.strategy.value, - "active_cleanups": len(self.tracker.active_cleanups) + "active_cleanups": len(self.tracker.active_cleanups), + "operations": [ + { + "stage": op.stage.value, + "timeout": op.timeout, + "has_force_cleanup": op.force_func is not None + } + for op in self.cleanup_operations + ] } diff --git a/videoarchiver/processor/constants.py b/videoarchiver/processor/constants.py index 71b988b..f200dd4 100644 --- a/videoarchiver/processor/constants.py +++ b/videoarchiver/processor/constants.py @@ -1,13 +1,77 @@ """Constants for VideoProcessor""" -# Reaction emojis -REACTIONS = { - 'queued': '📹', - 'processing': '⚙️', - 'success': '✅', - 'error': '❌', - 'archived': '🔄', # New reaction for already archived videos - 'numbers': ['1️⃣', '2️⃣', '3️⃣', '4️⃣', '5️⃣'], - 'progress': ['⬛', '🟨', '🟩'], - 'download': ['0️⃣', '2️⃣', '4️⃣', '6️⃣', '8️⃣', '🔟'] +from typing import Dict, List, Union +from dataclasses import dataclass +from enum import Enum + +class ReactionType(Enum): + """Types of reactions used in the processor""" + QUEUED = 'queued' + PROCESSING = 'processing' + SUCCESS = 'success' + ERROR = 'error' + ARCHIVED = 'archived' + NUMBERS = 'numbers' + PROGRESS = 'progress' + DOWNLOAD = 'download' + +@dataclass(frozen=True) +class ReactionEmojis: + """Emoji constants for different reaction types""" + QUEUED: str = '📹' + PROCESSING: str = '⚙️' + SUCCESS: str = '✅' + ERROR: str = '❌' + ARCHIVED: str = '🔄' + +@dataclass(frozen=True) +class ProgressEmojis: + """Emoji sequences for progress indicators""" + NUMBERS: List[str] = ('1️⃣', '2️⃣', '3️⃣', '4️⃣', '5️⃣') + PROGRESS: List[str] = ('⬛', '🟨', '🟩') + DOWNLOAD: List[str] = ('0️⃣', '2️⃣', '4️⃣', '6️⃣', '8️⃣', '🔟') + +# Main reactions dictionary with type hints +REACTIONS: Dict[str, Union[str, List[str]]] = { + ReactionType.QUEUED.value: ReactionEmojis.QUEUED, + ReactionType.PROCESSING.value: ReactionEmojis.PROCESSING, + ReactionType.SUCCESS.value: ReactionEmojis.SUCCESS, + ReactionType.ERROR.value: ReactionEmojis.ERROR, + ReactionType.ARCHIVED.value: ReactionEmojis.ARCHIVED, + ReactionType.NUMBERS.value: ProgressEmojis.NUMBERS, + ReactionType.PROGRESS.value: ProgressEmojis.PROGRESS, + ReactionType.DOWNLOAD.value: ProgressEmojis.DOWNLOAD } + +def get_reaction(reaction_type: Union[ReactionType, str]) -> Union[str, List[str]]: + """ + Get reaction emoji(s) for a given reaction type. + + Args: + reaction_type: The type of reaction to get, either as ReactionType enum or string + + Returns: + Either a single emoji string or a list of emoji strings + + Raises: + KeyError: If the reaction type doesn't exist + """ + key = reaction_type.value if isinstance(reaction_type, ReactionType) else reaction_type + return REACTIONS[key] + +def get_progress_emoji(progress: float, emoji_list: List[str]) -> str: + """ + Get the appropriate progress emoji based on a progress value. + + Args: + progress: Progress value between 0 and 1 + emoji_list: List of emojis to choose from + + Returns: + The emoji representing the current progress + """ + if not 0 <= progress <= 1: + raise ValueError("Progress must be between 0 and 1") + + index = int(progress * (len(emoji_list) - 1)) + return emoji_list[index] diff --git a/videoarchiver/processor/core.py b/videoarchiver/processor/core.py index cc29a59..333982c 100644 --- a/videoarchiver/processor/core.py +++ b/videoarchiver/processor/core.py @@ -2,43 +2,76 @@ import logging import asyncio -from enum import Enum -from typing import Optional, Tuple, Dict, Any, List -from datetime import datetime +from enum import Enum, auto +from typing import Optional, Tuple, Dict, Any, List, TypedDict, ClassVar +from datetime import datetime, timedelta import discord from discord.ext import commands from .message_handler import MessageHandler from .queue_handler import QueueHandler -from .progress_tracker import ProgressTracker +from ..utils.progress_tracker import ProgressTracker from .status_display import StatusDisplay -from .cleanup_manager import CleanupManager +from .cleanup_manager import CleanupManager, CleanupStrategy from .constants import REACTIONS +from ..queue.manager import EnhancedVideoQueueManager +from ..ffmpeg.ffmpeg_manager import FFmpegManager +from ..database.video_archive_db import VideoArchiveDB +from ..config_manager import ConfigManager +from ..utils.exceptions import ProcessorError logger = logging.getLogger("VideoArchiver") class ProcessorState(Enum): """Possible states of the video processor""" - INITIALIZING = "initializing" - READY = "ready" - PROCESSING = "processing" - PAUSED = "paused" - ERROR = "error" - SHUTDOWN = "shutdown" + INITIALIZING = auto() + READY = auto() + PROCESSING = auto() + PAUSED = auto() + ERROR = auto() + SHUTDOWN = auto() class OperationType(Enum): """Types of processor operations""" - MESSAGE_PROCESSING = "message_processing" - VIDEO_PROCESSING = "video_processing" - QUEUE_MANAGEMENT = "queue_management" - CLEANUP = "cleanup" + MESSAGE_PROCESSING = auto() + VIDEO_PROCESSING = auto() + QUEUE_MANAGEMENT = auto() + CLEANUP = auto() + +class OperationDetails(TypedDict): + """Type definition for operation details""" + type: str + start_time: datetime + end_time: Optional[datetime] + status: str + details: Dict[str, Any] + error: Optional[str] + +class OperationStats(TypedDict): + """Type definition for operation statistics""" + total_operations: int + active_operations: int + success_count: int + error_count: int + success_rate: float + +class ProcessorStatus(TypedDict): + """Type definition for processor status""" + state: str + health: bool + operations: OperationStats + active_operations: Dict[str, OperationDetails] + last_health_check: Optional[str] + health_status: Dict[str, bool] class OperationTracker: """Tracks processor operations""" - def __init__(self): - self.operations: Dict[str, Dict[str, Any]] = {} - self.operation_history: List[Dict[str, Any]] = [] + MAX_HISTORY: ClassVar[int] = 1000 # Maximum number of operations to track + + def __init__(self) -> None: + self.operations: Dict[str, OperationDetails] = {} + self.operation_history: List[OperationDetails] = [] self.error_count = 0 self.success_count = 0 @@ -47,14 +80,25 @@ class OperationTracker: op_type: OperationType, details: Dict[str, Any] ) -> str: - """Start tracking an operation""" + """ + Start tracking an operation. + + Args: + op_type: Type of operation + details: Operation details + + Returns: + Operation ID string + """ op_id = f"{op_type.value}_{datetime.utcnow().timestamp()}" - self.operations[op_id] = { - "type": op_type.value, - "start_time": datetime.utcnow(), - "status": "running", - "details": details - } + self.operations[op_id] = OperationDetails( + type=op_type.value, + start_time=datetime.utcnow(), + end_time=None, + status="running", + details=details, + error=None + ) return op_id def end_operation( @@ -63,7 +107,14 @@ class OperationTracker: success: bool, error: Optional[str] = None ) -> None: - """End tracking an operation""" + """ + End tracking an operation. + + Args: + op_id: Operation ID + success: Whether operation succeeded + error: Optional error message + """ if op_id in self.operations: self.operations[op_id].update({ "end_time": datetime.utcnow(), @@ -78,28 +129,43 @@ class OperationTracker: else: self.error_count += 1 - def get_active_operations(self) -> Dict[str, Dict[str, Any]]: - """Get currently active operations""" + # Cleanup old history if needed + if len(self.operation_history) > self.MAX_HISTORY: + self.operation_history = self.operation_history[-self.MAX_HISTORY:] + + def get_active_operations(self) -> Dict[str, OperationDetails]: + """ + Get currently active operations. + + Returns: + Dictionary of active operations + """ return self.operations.copy() - def get_operation_stats(self) -> Dict[str, Any]: - """Get operation statistics""" - return { - "total_operations": len(self.operation_history) + len(self.operations), - "active_operations": len(self.operations), - "success_count": self.success_count, - "error_count": self.error_count, - "success_rate": ( - self.success_count / (self.success_count + self.error_count) - if (self.success_count + self.error_count) > 0 - else 0 - ) - } + def get_operation_stats(self) -> OperationStats: + """ + Get operation statistics. + + Returns: + Dictionary containing operation statistics + """ + total = self.success_count + self.error_count + return OperationStats( + total_operations=len(self.operation_history) + len(self.operations), + active_operations=len(self.operations), + success_count=self.success_count, + error_count=self.error_count, + success_rate=self.success_count / total if total > 0 else 0.0 + ) class HealthMonitor: """Monitors processor health""" - def __init__(self, processor: 'VideoProcessor'): + HEALTH_CHECK_INTERVAL: ClassVar[int] = 60 # Seconds between health checks + ERROR_CHECK_INTERVAL: ClassVar[int] = 30 # Seconds between checks after error + SUCCESS_RATE_THRESHOLD: ClassVar[float] = 0.9 # 90% success rate threshold + + def __init__(self, processor: 'VideoProcessor') -> None: self.processor = processor self.last_check: Optional[datetime] = None self.health_status: Dict[str, bool] = {} @@ -117,6 +183,8 @@ class HealthMonitor: await self._monitor_task except asyncio.CancelledError: pass + except Exception as e: + logger.error(f"Error stopping health monitor: {e}") async def _monitor_health(self) -> None: """Monitor processor health""" @@ -134,17 +202,22 @@ class HealthMonitor: # Check operation health op_stats = self.processor.operation_tracker.get_operation_stats() self.health_status["operations"] = ( - op_stats["success_rate"] >= 0.9 # 90% success rate threshold + op_stats["success_rate"] >= self.SUCCESS_RATE_THRESHOLD ) - await asyncio.sleep(60) # Check every minute + await asyncio.sleep(self.HEALTH_CHECK_INTERVAL) except Exception as e: - logger.error(f"Health monitoring error: {e}") - await asyncio.sleep(30) # Shorter interval on error + logger.error(f"Health monitoring error: {e}", exc_info=True) + await asyncio.sleep(self.ERROR_CHECK_INTERVAL) def is_healthy(self) -> bool: - """Check if processor is healthy""" + """ + Check if processor is healthy. + + Returns: + True if all components are healthy, False otherwise + """ return all(self.health_status.values()) class VideoProcessor: @@ -152,13 +225,13 @@ class VideoProcessor: def __init__( self, - bot, - config_manager, - components, - queue_manager=None, - ffmpeg_mgr=None, - db=None - ): + bot: commands.Bot, + config_manager: ConfigManager, + components: Dict[int, Dict[str, Any]], + queue_manager: Optional[EnhancedVideoQueueManager] = None, + ffmpeg_mgr: Optional[FFmpegManager] = None, + db: Optional[VideoArchiveDB] = None + ) -> None: self.bot = bot self.config = config_manager self.components = components @@ -171,29 +244,61 @@ class VideoProcessor: self.operation_tracker = OperationTracker() self.health_monitor = HealthMonitor(self) - # Initialize handlers - self.queue_handler = QueueHandler(bot, config_manager, components) - self.message_handler = MessageHandler(bot, config_manager, queue_manager) - self.progress_tracker = ProgressTracker() - self.cleanup_manager = CleanupManager(self.queue_handler, ffmpeg_mgr) + try: + # Initialize handlers + self.queue_handler = QueueHandler(bot, config_manager, components) + self.message_handler = MessageHandler(bot, config_manager, queue_manager) + self.progress_tracker = ProgressTracker() + self.cleanup_manager = CleanupManager( + self.queue_handler, + ffmpeg_mgr, + CleanupStrategy.NORMAL + ) - # Pass db to queue handler if it exists - if self.db: - self.queue_handler.db = self.db + # Pass db to queue handler if it exists + if self.db: + self.queue_handler.db = self.db - # Store queue task reference - self._queue_task = None - - # Mark as ready - self.state = ProcessorState.READY - logger.info("VideoProcessor initialized successfully") + # Store queue task reference + self._queue_task: Optional[asyncio.Task] = None + + # Mark as ready + self.state = ProcessorState.READY + logger.info("VideoProcessor initialized successfully") + + except Exception as e: + self.state = ProcessorState.ERROR + logger.error(f"Error initializing VideoProcessor: {e}", exc_info=True) + raise ProcessorError(f"Failed to initialize processor: {str(e)}") async def start(self) -> None: - """Start processor operations""" - await self.health_monitor.start_monitoring() + """ + Start processor operations. + + Raises: + ProcessorError: If startup fails + """ + try: + await self.health_monitor.start_monitoring() + logger.info("VideoProcessor started successfully") + except Exception as e: + error = f"Failed to start processor: {str(e)}" + logger.error(error, exc_info=True) + raise ProcessorError(error) - async def process_video(self, item) -> Tuple[bool, Optional[str]]: - """Process a video from the queue""" + async def process_video(self, item: Any) -> Tuple[bool, Optional[str]]: + """ + Process a video from the queue. + + Args: + item: Queue item to process + + Returns: + Tuple of (success, error_message) + + Raises: + ProcessorError: If processing fails + """ op_id = self.operation_tracker.start_operation( OperationType.VIDEO_PROCESSING, {"item": str(item)} @@ -207,13 +312,23 @@ class VideoProcessor: self.operation_tracker.end_operation(op_id, success, error) return result except Exception as e: - self.operation_tracker.end_operation(op_id, False, str(e)) - raise + error = f"Video processing failed: {str(e)}" + self.operation_tracker.end_operation(op_id, False, error) + logger.error(error, exc_info=True) + raise ProcessorError(error) finally: self.state = ProcessorState.READY async def process_message(self, message: discord.Message) -> None: - """Process a message for video content""" + """ + Process a message for video content. + + Args: + message: Discord message to process + + Raises: + ProcessorError: If processing fails + """ op_id = self.operation_tracker.start_operation( OperationType.MESSAGE_PROCESSING, {"message_id": message.id} @@ -223,11 +338,18 @@ class VideoProcessor: await self.message_handler.process_message(message) self.operation_tracker.end_operation(op_id, True) except Exception as e: - self.operation_tracker.end_operation(op_id, False, str(e)) - raise + error = f"Message processing failed: {str(e)}" + self.operation_tracker.end_operation(op_id, False, error) + logger.error(error, exc_info=True) + raise ProcessorError(error) async def cleanup(self) -> None: - """Clean up resources and stop processing""" + """ + Clean up resources and stop processing. + + Raises: + ProcessorError: If cleanup fails + """ op_id = self.operation_tracker.start_operation( OperationType.CLEANUP, {"type": "normal"} @@ -239,12 +361,18 @@ class VideoProcessor: await self.cleanup_manager.cleanup() self.operation_tracker.end_operation(op_id, True) except Exception as e: - self.operation_tracker.end_operation(op_id, False, str(e)) - logger.error(f"Error during cleanup: {e}", exc_info=True) - raise + error = f"Cleanup failed: {str(e)}" + self.operation_tracker.end_operation(op_id, False, error) + logger.error(error, exc_info=True) + raise ProcessorError(error) async def force_cleanup(self) -> None: - """Force cleanup of resources""" + """ + Force cleanup of resources. + + Raises: + ProcessorError: If force cleanup fails + """ op_id = self.operation_tracker.start_operation( OperationType.CLEANUP, {"type": "force"} @@ -256,11 +384,18 @@ class VideoProcessor: await self.cleanup_manager.force_cleanup() self.operation_tracker.end_operation(op_id, True) except Exception as e: - self.operation_tracker.end_operation(op_id, False, str(e)) - raise + error = f"Force cleanup failed: {str(e)}" + self.operation_tracker.end_operation(op_id, False, error) + logger.error(error, exc_info=True) + raise ProcessorError(error) async def show_queue_details(self, ctx: commands.Context) -> None: - """Display detailed queue status""" + """ + Display detailed queue status. + + Args: + ctx: Command context + """ try: if not self.queue_manager: await ctx.send("Queue manager is not initialized.") @@ -280,25 +415,36 @@ class VideoProcessor: await ctx.send(embed=embed) except Exception as e: - logger.error(f"Error showing queue details: {e}", exc_info=True) + error = f"Failed to show queue details: {str(e)}" + logger.error(error, exc_info=True) await ctx.send(f"Error getting queue details: {str(e)}") def set_queue_task(self, task: asyncio.Task) -> None: - """Set the queue processing task""" + """ + Set the queue processing task. + + Args: + task: Queue processing task + """ self._queue_task = task self.cleanup_manager.set_queue_task(task) - def get_status(self) -> Dict[str, Any]: - """Get processor status""" - return { - "state": self.state.value, - "health": self.health_monitor.is_healthy(), - "operations": self.operation_tracker.get_operation_stats(), - "active_operations": self.operation_tracker.get_active_operations(), - "last_health_check": ( + def get_status(self) -> ProcessorStatus: + """ + Get processor status. + + Returns: + Dictionary containing processor status information + """ + return ProcessorStatus( + state=self.state.value, + health=self.health_monitor.is_healthy(), + operations=self.operation_tracker.get_operation_stats(), + active_operations=self.operation_tracker.get_active_operations(), + last_health_check=( self.health_monitor.last_check.isoformat() if self.health_monitor.last_check else None ), - "health_status": self.health_monitor.health_status - } + health_status=self.health_monitor.health_status + ) diff --git a/videoarchiver/processor/message_handler.py b/videoarchiver/processor/message_handler.py index fb1c542..9c2d843 100644 --- a/videoarchiver/processor/message_handler.py +++ b/videoarchiver/processor/message_handler.py @@ -2,52 +2,85 @@ import logging import asyncio -from enum import Enum -from typing import Optional, Dict, Any, List, Tuple -from datetime import datetime +from enum import Enum, auto +from typing import Optional, Dict, Any, List, Tuple, Set, TypedDict, ClassVar +from datetime import datetime, timedelta import discord +from discord.ext import commands -from .url_extractor import URLExtractor -from .message_validator import MessageValidator -from .queue_processor import QueueProcessor +from .url_extractor import URLExtractor, URLMetadata +from .message_validator import MessageValidator, ValidationError +from .queue_processor import QueueProcessor, QueuePriority from .constants import REACTIONS +from ..queue.manager import EnhancedVideoQueueManager +from ..config_manager import ConfigManager +from ..utils.exceptions import MessageHandlerError logger = logging.getLogger("VideoArchiver") class MessageState(Enum): """Possible states of message processing""" - RECEIVED = "received" - VALIDATING = "validating" - EXTRACTING = "extracting" - PROCESSING = "processing" - COMPLETED = "completed" - FAILED = "failed" - IGNORED = "ignored" + RECEIVED = auto() + VALIDATING = auto() + EXTRACTING = auto() + PROCESSING = auto() + COMPLETED = auto() + FAILED = auto() + IGNORED = auto() class ProcessingStage(Enum): """Message processing stages""" - VALIDATION = "validation" - EXTRACTION = "extraction" - QUEUEING = "queueing" - COMPLETION = "completion" + VALIDATION = auto() + EXTRACTION = auto() + QUEUEING = auto() + COMPLETION = auto() + +class MessageCacheEntry(TypedDict): + """Type definition for message cache entry""" + valid: bool + reason: Optional[str] + timestamp: str + +class MessageStatus(TypedDict): + """Type definition for message status""" + state: Optional[MessageState] + stage: Optional[ProcessingStage] + error: Optional[str] + start_time: Optional[datetime] + end_time: Optional[datetime] + duration: Optional[float] class MessageCache: """Caches message validation results""" - def __init__(self, max_size: int = 1000): + def __init__(self, max_size: int = 1000) -> None: self.max_size = max_size - self._cache: Dict[int, Dict[str, Any]] = {} + self._cache: Dict[int, MessageCacheEntry] = {} self._access_times: Dict[int, datetime] = {} - def add(self, message_id: int, result: Dict[str, Any]) -> None: - """Add a result to cache""" + def add(self, message_id: int, result: MessageCacheEntry) -> None: + """ + Add a result to cache. + + Args: + message_id: Discord message ID + result: Validation result entry + """ if len(self._cache) >= self.max_size: self._cleanup_oldest() self._cache[message_id] = result self._access_times[message_id] = datetime.utcnow() - def get(self, message_id: int) -> Optional[Dict[str, Any]]: - """Get a cached result""" + def get(self, message_id: int) -> Optional[MessageCacheEntry]: + """ + Get a cached result. + + Args: + message_id: Discord message ID + + Returns: + Cached validation entry or None if not found + """ if message_id in self._cache: self._access_times[message_id] = datetime.utcnow() return self._cache[message_id] @@ -64,7 +97,9 @@ class MessageCache: class ProcessingTracker: """Tracks message processing state and progress""" - def __init__(self): + MAX_PROCESSING_TIME: ClassVar[int] = 300 # 5 minutes in seconds + + def __init__(self) -> None: self.states: Dict[int, MessageState] = {} self.stages: Dict[int, ProcessingStage] = {} self.errors: Dict[int, str] = {} @@ -72,7 +107,12 @@ class ProcessingTracker: self.end_times: Dict[int, datetime] = {} def start_processing(self, message_id: int) -> None: - """Start tracking a message""" + """ + Start tracking a message. + + Args: + message_id: Discord message ID + """ self.states[message_id] = MessageState.RECEIVED self.start_times[message_id] = datetime.utcnow() @@ -83,7 +123,15 @@ class ProcessingTracker: stage: Optional[ProcessingStage] = None, error: Optional[str] = None ) -> None: - """Update message state""" + """ + Update message state. + + Args: + message_id: Discord message ID + state: New message state + stage: Optional processing stage + error: Optional error message + """ self.states[message_id] = state if stage: self.stages[message_id] = stage @@ -92,25 +140,61 @@ class ProcessingTracker: if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED): self.end_times[message_id] = datetime.utcnow() - def get_status(self, message_id: int) -> Dict[str, Any]: - """Get processing status for a message""" - return { - "state": self.states.get(message_id), - "stage": self.stages.get(message_id), - "error": self.errors.get(message_id), - "start_time": self.start_times.get(message_id), - "end_time": self.end_times.get(message_id), - "duration": ( - (self.end_times[message_id] - self.start_times[message_id]).total_seconds() - if message_id in self.end_times and message_id in self.start_times + def get_status(self, message_id: int) -> MessageStatus: + """ + Get processing status for a message. + + Args: + message_id: Discord message ID + + Returns: + Dictionary containing message status information + """ + end_time = self.end_times.get(message_id) + start_time = self.start_times.get(message_id) + + return MessageStatus( + state=self.states.get(message_id), + stage=self.stages.get(message_id), + error=self.errors.get(message_id), + start_time=start_time, + end_time=end_time, + duration=( + (end_time - start_time).total_seconds() + if end_time and start_time else None ) - } + ) + + def is_message_stuck(self, message_id: int) -> bool: + """ + Check if a message is stuck in processing. + + Args: + message_id: Discord message ID + + Returns: + True if message is stuck, False otherwise + """ + if message_id not in self.states or message_id not in self.start_times: + return False + + state = self.states[message_id] + if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED): + return False + + processing_time = (datetime.utcnow() - self.start_times[message_id]).total_seconds() + return processing_time > self.MAX_PROCESSING_TIME class MessageHandler: """Handles processing of messages for video content""" - def __init__(self, bot, config_manager, queue_manager): + def __init__( + self, + bot: discord.Client, + config_manager: ConfigManager, + queue_manager: EnhancedVideoQueueManager + ) -> None: self.bot = bot self.config_manager = config_manager self.url_extractor = URLExtractor() @@ -123,7 +207,15 @@ class MessageHandler: self._processing_lock = asyncio.Lock() async def process_message(self, message: discord.Message) -> None: - """Process a message for video content""" + """ + Process a message for video content. + + Args: + message: Discord message to process + + Raises: + MessageHandlerError: If there's an error during processing + """ # Start tracking self.tracker.start_processing(message.id) @@ -139,11 +231,19 @@ class MessageHandler: ) try: await message.add_reaction(REACTIONS["error"]) - except: - pass + except Exception as react_error: + logger.error(f"Failed to add error reaction: {react_error}") async def _process_message_internal(self, message: discord.Message) -> None: - """Internal message processing logic""" + """ + Internal message processing logic. + + Args: + message: Discord message to process + + Raises: + MessageHandlerError: If there's an error during processing + """ try: # Get guild settings settings = await self.config_manager.get_guild_settings(message.guild.id) @@ -164,15 +264,19 @@ class MessageHandler: MessageState.VALIDATING, ProcessingStage.VALIDATION ) - is_valid, reason = await self.message_validator.validate_message( - message, - settings - ) - # Cache result - self.validation_cache.add(message.id, { - "valid": is_valid, - "reason": reason - }) + try: + is_valid, reason = await self.message_validator.validate_message( + message, + settings + ) + # Cache result + self.validation_cache.add(message.id, MessageCacheEntry( + valid=is_valid, + reason=reason, + timestamp=datetime.utcnow().isoformat() + )) + except ValidationError as e: + raise MessageHandlerError(f"Validation failed: {str(e)}") if not is_valid: logger.debug(f"Message validation failed: {reason}") @@ -189,14 +293,17 @@ class MessageHandler: MessageState.EXTRACTING, ProcessingStage.EXTRACTION ) - urls = await self.url_extractor.extract_urls( - message, - enabled_sites=settings.get("enabled_sites") - ) - if not urls: - logger.debug("No valid URLs found in message") - self.tracker.update_state(message.id, MessageState.IGNORED) - return + try: + urls: List[URLMetadata] = await self.url_extractor.extract_urls( + message, + enabled_sites=settings.get("enabled_sites") + ) + if not urls: + logger.debug("No valid URLs found in message") + self.tracker.update_state(message.id, MessageState.IGNORED) + return + except Exception as e: + raise MessageHandlerError(f"URL extraction failed: {str(e)}") # Process URLs self.tracker.update_state( @@ -204,7 +311,14 @@ class MessageHandler: MessageState.PROCESSING, ProcessingStage.QUEUEING ) - await self.queue_processor.process_urls(message, urls) + try: + await self.queue_processor.process_urls( + message, + urls, + priority=QueuePriority.NORMAL + ) + except Exception as e: + raise MessageHandlerError(f"Queue processing failed: {str(e)}") # Mark completion self.tracker.update_state( @@ -213,13 +327,10 @@ class MessageHandler: ProcessingStage.COMPLETION ) - except Exception as e: - self.tracker.update_state( - message.id, - MessageState.FAILED, - error=str(e) - ) + except MessageHandlerError: raise + except Exception as e: + raise MessageHandlerError(f"Unexpected error: {str(e)}") async def format_archive_message( self, @@ -227,30 +338,49 @@ class MessageHandler: channel: discord.TextChannel, url: str ) -> str: - """Format message for archive channel""" + """ + Format message for archive channel. + + Args: + author: Optional message author + channel: Channel the message was posted in + url: URL being archived + + Returns: + Formatted message string + """ return await self.queue_processor.format_archive_message( author, channel, url ) - def get_message_status(self, message_id: int) -> Dict[str, Any]: - """Get processing status for a message""" + def get_message_status(self, message_id: int) -> MessageStatus: + """ + Get processing status for a message. + + Args: + message_id: Discord message ID + + Returns: + Dictionary containing message status information + """ return self.tracker.get_status(message_id) def is_healthy(self) -> bool: - """Check if handler is healthy""" - # Check for any stuck messages - current_time = datetime.utcnow() - for message_id, start_time in self.tracker.start_times.items(): - if ( - message_id in self.tracker.states and - self.tracker.states[message_id] not in ( - MessageState.COMPLETED, - MessageState.FAILED, - MessageState.IGNORED - ) and - (current_time - start_time).total_seconds() > 300 # 5 minutes timeout - ): - return False - return True + """ + Check if handler is healthy. + + Returns: + True if handler is healthy, False otherwise + """ + try: + # Check for any stuck messages + for message_id in self.tracker.states: + if self.tracker.is_message_stuck(message_id): + logger.warning(f"Message {message_id} appears to be stuck in processing") + return False + return True + except Exception as e: + logger.error(f"Error checking health: {e}") + return False diff --git a/videoarchiver/processor/message_validator.py b/videoarchiver/processor/message_validator.py index a3ac955..7c08716 100644 --- a/videoarchiver/processor/message_validator.py +++ b/videoarchiver/processor/message_validator.py @@ -1,19 +1,36 @@ """Message validation functionality for video processing""" import logging -from enum import Enum -from dataclasses import dataclass -from typing import Dict, Optional, Tuple, List, Any, Callable, Set +from enum import Enum, auto +from dataclasses import dataclass, field +from typing import Dict, Optional, Tuple, List, Any, Callable, Set, TypedDict, ClassVar from datetime import datetime import discord +from ..utils.exceptions import ValidationError + logger = logging.getLogger("VideoArchiver") class ValidationResult(Enum): """Possible validation results""" - VALID = "valid" - INVALID = "invalid" - IGNORED = "ignored" + VALID = auto() + INVALID = auto() + IGNORED = auto() + +class ValidationStats(TypedDict): + """Type definition for validation statistics""" + total: int + valid: int + invalid: int + ignored: int + cached: int + +class ValidationCacheEntry(TypedDict): + """Type definition for validation cache entry""" + valid: bool + reason: Optional[str] + rule: Optional[str] + timestamp: str @dataclass class ValidationContext: @@ -28,22 +45,43 @@ class ValidationContext: attachment_count: int is_bot: bool timestamp: datetime + validation_time: str = field(default_factory=lambda: datetime.utcnow().isoformat()) @classmethod def from_message(cls, message: discord.Message, settings: Dict[str, Any]) -> 'ValidationContext': - """Create context from message""" - return cls( - message=message, - settings=settings, - guild_id=message.guild.id, - channel_id=message.channel.id, - author_id=message.author.id, - roles={role.id for role in message.author.roles}, - content_length=len(message.content) if message.content else 0, - attachment_count=len(message.attachments), - is_bot=message.author.bot, - timestamp=message.created_at - ) + """ + Create context from message. + + Args: + message: Discord message to validate + settings: Guild settings dictionary + + Returns: + ValidationContext instance + + Raises: + ValidationError: If message or settings are invalid + """ + if not message.guild: + raise ValidationError("Message must be from a guild") + if not settings: + raise ValidationError("Settings dictionary cannot be empty") + + try: + return cls( + message=message, + settings=settings, + guild_id=message.guild.id, + channel_id=message.channel.id, + author_id=message.author.id, + roles={role.id for role in message.author.roles}, + content_length=len(message.content) if message.content else 0, + attachment_count=len(message.attachments), + is_bot=message.author.bot, + timestamp=message.created_at + ) + except Exception as e: + raise ValidationError(f"Failed to create validation context: {str(e)}") @dataclass class ValidationRule: @@ -53,24 +91,48 @@ class ValidationRule: validate: Callable[[ValidationContext], Tuple[bool, Optional[str]]] enabled: bool = True priority: int = 0 + error_count: int = field(default=0) + last_error: Optional[str] = field(default=None) + last_run: Optional[str] = field(default=None) + + def __post_init__(self) -> None: + """Validate rule after initialization""" + if not callable(self.validate): + raise ValueError("Validate must be a callable") + if self.priority < 0: + raise ValueError("Priority must be non-negative") class ValidationCache: """Caches validation results""" - def __init__(self, max_size: int = 1000): + def __init__(self, max_size: int = 1000) -> None: self.max_size = max_size - self._cache: Dict[int, Dict[str, Any]] = {} + self._cache: Dict[int, ValidationCacheEntry] = {} self._access_times: Dict[int, datetime] = {} - def add(self, message_id: int, result: Dict[str, Any]) -> None: - """Add validation result to cache""" + def add(self, message_id: int, result: ValidationCacheEntry) -> None: + """ + Add validation result to cache. + + Args: + message_id: Discord message ID + result: Validation result entry + """ if len(self._cache) >= self.max_size: self._cleanup_oldest() self._cache[message_id] = result self._access_times[message_id] = datetime.utcnow() - def get(self, message_id: int) -> Optional[Dict[str, Any]]: - """Get cached validation result""" + def get(self, message_id: int) -> Optional[ValidationCacheEntry]: + """ + Get cached validation result. + + Args: + message_id: Discord message ID + + Returns: + Cached validation entry or None if not found + """ if message_id in self._cache: self._access_times[message_id] = datetime.utcnow() return self._cache[message_id] @@ -87,33 +149,28 @@ class ValidationCache: class ValidationRuleManager: """Manages validation rules""" - def __init__(self): - self.rules: List[ValidationRule] = [ - ValidationRule( - name="content_check", - description="Check if message has content to process", - validate=self._validate_content, - priority=1 - ), - ValidationRule( - name="guild_enabled", - description="Check if archiving is enabled for guild", - validate=self._validate_guild_enabled, - priority=2 - ), - ValidationRule( - name="channel_enabled", - description="Check if channel is enabled for archiving", - validate=self._validate_channel, - priority=3 - ), - ValidationRule( - name="user_roles", - description="Check if user has required roles", - validate=self._validate_user_roles, - priority=4 - ) - ] + DEFAULT_RULES: ClassVar[List[Tuple[str, str, int]]] = [ + ("content_check", "Check if message has content to process", 1), + ("guild_enabled", "Check if archiving is enabled for guild", 2), + ("channel_enabled", "Check if channel is enabled for archiving", 3), + ("user_roles", "Check if user has required roles", 4) + ] + + def __init__(self) -> None: + self.rules: List[ValidationRule] = [] + self._initialize_rules() + + def _initialize_rules(self) -> None: + """Initialize default validation rules""" + for name, description, priority in self.DEFAULT_RULES: + validate_method = getattr(self, f"_validate_{name}", None) + if validate_method: + self.rules.append(ValidationRule( + name=name, + description=description, + validate=validate_method, + priority=priority + )) self.rules.sort(key=lambda x: x.priority) def _validate_content(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]: @@ -145,10 +202,10 @@ class ValidationRuleManager: class MessageValidator: """Handles validation of messages for video processing""" - def __init__(self): + def __init__(self) -> None: self.rule_manager = ValidationRuleManager() self.cache = ValidationCache() - self.validation_stats: Dict[str, int] = { + self.validation_stats: ValidationStats = { "total": 0, "valid": 0, "invalid": 0, @@ -159,50 +216,80 @@ class MessageValidator: async def validate_message( self, message: discord.Message, - settings: Dict + settings: Dict[str, Any] ) -> Tuple[bool, Optional[str]]: - """Validate if a message should be processed""" - self.validation_stats["total"] += 1 + """ + Validate if a message should be processed. + + Args: + message: Discord message to validate + settings: Guild settings dictionary + + Returns: + Tuple of (is_valid, reason) + + Raises: + ValidationError: If validation fails unexpectedly + """ + try: + self.validation_stats["total"] += 1 - # Check cache - cached = self.cache.get(message.id) - if cached: - self.validation_stats["cached"] += 1 - return cached["valid"], cached.get("reason") + # Check cache + cached = self.cache.get(message.id) + if cached: + self.validation_stats["cached"] += 1 + return cached["valid"], cached.get("reason") - # Create validation context - ctx = ValidationContext.from_message(message, settings) + # Create validation context + ctx = ValidationContext.from_message(message, settings) - # Run validation rules - for rule in self.rule_manager.rules: - if not rule.enabled: - continue + # Run validation rules + for rule in self.rule_manager.rules: + if not rule.enabled: + continue - try: - valid, reason = rule.validate(ctx) - if not valid: - self.validation_stats["invalid"] += 1 - # Cache result - self.cache.add(message.id, { - "valid": False, - "reason": reason, - "rule": rule.name - }) - return False, reason - except Exception as e: - logger.error(f"Error in validation rule {rule.name}: {e}") - return False, f"Validation error: {str(e)}" + try: + rule.last_run = datetime.utcnow().isoformat() + valid, reason = rule.validate(ctx) + if not valid: + self.validation_stats["invalid"] += 1 + # Cache result + self.cache.add(message.id, ValidationCacheEntry( + valid=False, + reason=reason, + rule=rule.name, + timestamp=datetime.utcnow().isoformat() + )) + return False, reason + except Exception as e: + rule.error_count += 1 + rule.last_error = str(e) + logger.error(f"Error in validation rule {rule.name}: {e}", exc_info=True) + raise ValidationError(f"Validation rule {rule.name} failed: {str(e)}") - # Message passed all rules - self.validation_stats["valid"] += 1 - self.cache.add(message.id, { - "valid": True, - "reason": None - }) - return True, None + # Message passed all rules + self.validation_stats["valid"] += 1 + self.cache.add(message.id, ValidationCacheEntry( + valid=True, + reason=None, + rule=None, + timestamp=datetime.utcnow().isoformat() + )) + return True, None + + except ValidationError: + raise + except Exception as e: + logger.error(f"Unexpected error in message validation: {e}", exc_info=True) + raise ValidationError(f"Validation failed: {str(e)}") def get_stats(self) -> Dict[str, Any]: - """Get validation statistics""" + """ + Get validation statistics. + + Returns: + Dictionary containing validation statistics and rule information + """ return { "validation_stats": self.validation_stats.copy(), "rules": [ @@ -210,16 +297,27 @@ class MessageValidator: "name": rule.name, "description": rule.description, "enabled": rule.enabled, - "priority": rule.priority + "priority": rule.priority, + "error_count": rule.error_count, + "last_error": rule.last_error, + "last_run": rule.last_run } for rule in self.rule_manager.rules ] } def clear_cache(self, message_id: Optional[int] = None) -> None: - """Clear validation cache""" - if message_id: - self.cache._cache.pop(message_id, None) - self.cache._access_times.pop(message_id, None) - else: - self.cache = ValidationCache(self.cache.max_size) + """ + Clear validation cache. + + Args: + message_id: Optional message ID to clear cache for. If None, clears all cache. + """ + try: + if message_id: + self.cache._cache.pop(message_id, None) + self.cache._access_times.pop(message_id, None) + else: + self.cache = ValidationCache(self.cache.max_size) + except Exception as e: + logger.error(f"Error clearing validation cache: {e}", exc_info=True) diff --git a/videoarchiver/processor/queue_handler.py b/videoarchiver/processor/queue_handler.py index b58db80..9833eb7 100644 --- a/videoarchiver/processor/queue_handler.py +++ b/videoarchiver/processor/queue_handler.py @@ -1,21 +1,55 @@ -"""Queue processing and video handling operations""" +"""Queue handling functionality for video processing""" -import os import logging import asyncio -import discord -from typing import Dict, Optional, Tuple, Any +import os +from enum import Enum, auto +from typing import Optional, Dict, Any, List, Tuple, Set, TypedDict, ClassVar, Callable from datetime import datetime +import discord +from ..utils.progress_tracker import ProgressTracker +from ..database.video_archive_db import VideoArchiveDB +from ..utils.download_manager import DownloadManager +from ..utils.message_manager import MessageManager +from ..utils.exceptions import QueueHandlerError +from ..queue.models import QueueItem +from ..config_manager import ConfigManager from .constants import REACTIONS -from .progress_tracker import ProgressTracker logger = logging.getLogger("VideoArchiver") +class QueueItemStatus(Enum): + """Status of a queue item""" + PENDING = auto() + PROCESSING = auto() + COMPLETED = auto() + FAILED = auto() + CANCELLED = auto() + +class QueueStats(TypedDict): + """Type definition for queue statistics""" + active_downloads: int + processing_items: int + completed_items: int + failed_items: int + average_processing_time: float + last_processed: Optional[str] + is_healthy: bool + class QueueHandler: """Handles queue processing and video operations""" - def __init__(self, bot, config_manager, components, db=None): + DOWNLOAD_TIMEOUT: ClassVar[int] = 3600 # 1 hour in seconds + MAX_RETRIES: ClassVar[int] = 3 + + def __init__( + self, + bot: discord.Client, + config_manager: ConfigManager, + components: Dict[int, Dict[str, Any]], + db: Optional[VideoArchiveDB] = None + ) -> None: self.bot = bot self.config_manager = config_manager self.components = components @@ -24,101 +58,240 @@ class QueueHandler: self._active_downloads: Dict[str, asyncio.Task] = {} self._active_downloads_lock = asyncio.Lock() self.progress_tracker = ProgressTracker() + self._stats: QueueStats = { + "active_downloads": 0, + "processing_items": 0, + "completed_items": 0, + "failed_items": 0, + "average_processing_time": 0.0, + "last_processed": None, + "is_healthy": True + } - async def process_video(self, item) -> Tuple[bool, Optional[str]]: - """Process a video from the queue""" + async def process_video(self, item: QueueItem) -> Tuple[bool, Optional[str]]: + """ + Process a video from the queue. + + Args: + item: Queue item to process + + Returns: + Tuple of (success, error_message) + + Raises: + QueueHandlerError: If there's an error during processing + """ if self._unloading: return False, "Processor is unloading" file_path = None original_message = None download_task = None + start_time = datetime.utcnow() try: - # Start processing + self._stats["processing_items"] += 1 item.start_processing() logger.info(f"Started processing video: {item.url}") # Check if video is already archived - if self.db and self.db.is_url_archived(item.url): - logger.info(f"Video already archived: {item.url}") - if original_message := await self._get_original_message(item): - await original_message.add_reaction(REACTIONS["success"]) - archived_info = self.db.get_archived_video(item.url) - if archived_info: - await original_message.reply(f"This video was already archived. You can find it here: {archived_info[0]}") - item.finish_processing(True) + if self.db and await self._check_archived_video(item): + self._update_stats(True, start_time) return True, None - guild_id = item.guild_id - if guild_id not in self.components: - error = f"No components found for guild {guild_id}" - item.finish_processing(False, error) - return False, error - - components = self.components[guild_id] + # Get components + components = await self._get_components(item.guild_id) downloader = components.get("downloader") message_manager = components.get("message_manager") if not downloader or not message_manager: - error = f"Missing required components for guild {guild_id}" - item.finish_processing(False, error) - return False, error + raise QueueHandlerError(f"Missing required components for guild {item.guild_id}") # Get original message and update reactions original_message = await self._get_original_message(item) if original_message: - await original_message.remove_reaction(REACTIONS["queued"], self.bot.user) - await original_message.add_reaction(REACTIONS["processing"]) - logger.info(f"Started processing message {item.message_id}") + await self._update_message_reactions(original_message, QueueItemStatus.PROCESSING) - # Create progress callback - progress_callback = self._create_progress_callback(original_message, item.url) - - # Download video - success, file_path, error = await self._download_video( - downloader, item.url, progress_callback + # Download and archive video + file_path = await self._process_video_file( + downloader, message_manager, item, original_message ) - if not success: - if original_message: - await original_message.add_reaction(REACTIONS["error"]) - logger.error(f"Download failed for message {item.message_id}: {error}") - item.finish_processing(False, f"Failed to download video: {error}") - return False, f"Failed to download video: {error}" - # Archive video - success, error = await self._archive_video( - guild_id, original_message, message_manager, item.url, file_path - ) - - # Finish processing - item.finish_processing(success, error if not success else None) - return success, error + # Success + self._update_stats(True, start_time) + item.finish_processing(True) + if original_message: + await self._update_message_reactions(original_message, QueueItemStatus.COMPLETED) + return True, None + except QueueHandlerError as e: + logger.error(f"Queue handler error: {str(e)}") + self._handle_processing_error(item, original_message, str(e)) + return False, str(e) except Exception as e: logger.error(f"Error processing video: {str(e)}", exc_info=True) - item.finish_processing(False, str(e)) + self._handle_processing_error(item, original_message, str(e)) return False, str(e) finally: - # Clean up downloaded file - if file_path and os.path.exists(file_path): - try: - os.unlink(file_path) - except Exception as e: - logger.error(f"Failed to clean up file {file_path}: {e}") + await self._cleanup_file(file_path) - async def _archive_video(self, guild_id: int, original_message: Optional[discord.Message], - message_manager, url: str, file_path: str) -> Tuple[bool, Optional[str]]: - """Archive downloaded video""" + async def _check_archived_video(self, item: QueueItem) -> bool: + """Check if video is already archived and handle accordingly""" + if not self.db: + return False + + if self.db.is_url_archived(item.url): + logger.info(f"Video already archived: {item.url}") + if original_message := await self._get_original_message(item): + await self._update_message_reactions(original_message, QueueItemStatus.COMPLETED) + archived_info = self.db.get_archived_video(item.url) + if archived_info: + await original_message.reply( + f"This video was already archived. You can find it here: {archived_info[0]}" + ) + item.finish_processing(True) + return True + return False + + async def _get_components( + self, + guild_id: int + ) -> Dict[str, Any]: + """Get required components for processing""" + if guild_id not in self.components: + raise QueueHandlerError(f"No components found for guild {guild_id}") + return self.components[guild_id] + + async def _process_video_file( + self, + downloader: DownloadManager, + message_manager: MessageManager, + item: QueueItem, + original_message: Optional[discord.Message] + ) -> Optional[str]: + """Download and process video file""" + # Create progress callback + progress_callback = self._create_progress_callback(original_message, item.url) + + # Download video + success, file_path, error = await self._download_video( + downloader, item.url, progress_callback + ) + if not success: + raise QueueHandlerError(f"Failed to download video: {error}") + + # Archive video + success, error = await self._archive_video( + item.guild_id, + original_message, + message_manager, + item.url, + file_path + ) + if not success: + raise QueueHandlerError(f"Failed to archive video: {error}") + + return file_path + + def _handle_processing_error( + self, + item: QueueItem, + message: Optional[discord.Message], + error: str + ) -> None: + """Handle processing error""" + self._update_stats(False, datetime.utcnow()) + item.finish_processing(False, error) + if message: + asyncio.create_task(self._update_message_reactions(message, QueueItemStatus.FAILED)) + + def _update_stats(self, success: bool, start_time: datetime) -> None: + """Update queue statistics""" + processing_time = (datetime.utcnow() - start_time).total_seconds() + self._stats["processing_items"] -= 1 + if success: + self._stats["completed_items"] += 1 + else: + self._stats["failed_items"] += 1 + + # Update average processing time + total_items = self._stats["completed_items"] + self._stats["failed_items"] + if total_items > 0: + current_total = self._stats["average_processing_time"] * (total_items - 1) + self._stats["average_processing_time"] = (current_total + processing_time) / total_items + + self._stats["last_processed"] = datetime.utcnow().isoformat() + + async def _update_message_reactions( + self, + message: discord.Message, + status: QueueItemStatus + ) -> None: + """Update message reactions based on status""" + try: + # Remove existing reactions + for reaction in [ + REACTIONS["queued"], + REACTIONS["processing"], + REACTIONS["success"], + REACTIONS["error"] + ]: + try: + await message.remove_reaction(reaction, self.bot.user) + except: + pass + + # Add new reaction + if status == QueueItemStatus.PROCESSING: + await message.add_reaction(REACTIONS["processing"]) + elif status == QueueItemStatus.COMPLETED: + await message.add_reaction(REACTIONS["success"]) + elif status == QueueItemStatus.FAILED: + await message.add_reaction(REACTIONS["error"]) + except Exception as e: + logger.error(f"Error updating message reactions: {e}") + + async def _cleanup_file(self, file_path: Optional[str]) -> None: + """Clean up downloaded file""" + if file_path and os.path.exists(file_path): + try: + os.unlink(file_path) + except Exception as e: + logger.error(f"Failed to clean up file {file_path}: {e}") + + async def _archive_video( + self, + guild_id: int, + original_message: Optional[discord.Message], + message_manager: MessageManager, + url: str, + file_path: str + ) -> Tuple[bool, Optional[str]]: + """ + Archive downloaded video. + + Args: + guild_id: Discord guild ID + original_message: Original message containing the video + message_manager: Message manager instance + url: Video URL + file_path: Path to downloaded video file + + Returns: + Tuple of (success, error_message) + + Raises: + QueueHandlerError: If archiving fails + """ try: # Get archive channel guild = self.bot.get_guild(guild_id) if not guild: - return False, f"Guild {guild_id} not found" + raise QueueHandlerError(f"Guild {guild_id} not found") archive_channel = await self.config_manager.get_channel(guild, "archive") if not archive_channel: - return False, "Archive channel not configured" + raise QueueHandlerError("Archive channel not configured") # Format message try: @@ -128,13 +301,16 @@ class QueueHandler: author=author, channel=channel, url=url ) except Exception as e: - return False, f"Failed to format message: {str(e)}" + raise QueueHandlerError(f"Failed to format message: {str(e)}") # Upload to archive channel if not os.path.exists(file_path): - return False, "Processed file not found" + raise QueueHandlerError("Processed file not found") - archive_message = await archive_channel.send(content=message, file=discord.File(file_path)) + archive_message = await archive_channel.send( + content=message, + file=discord.File(file_path) + ) # Store in database if available if self.db and archive_message.attachments: @@ -148,26 +324,28 @@ class QueueHandler: ) logger.info(f"Added video to archive database: {url} -> {discord_url}") - if original_message: - await original_message.remove_reaction(REACTIONS["processing"], self.bot.user) - await original_message.add_reaction(REACTIONS["success"]) - logger.info(f"Successfully processed message {original_message.id}") - return True, None except discord.HTTPException as e: - if original_message: - await original_message.add_reaction(REACTIONS["error"]) logger.error(f"Failed to upload to Discord: {str(e)}") - return False, f"Failed to upload to Discord: {str(e)}" + raise QueueHandlerError(f"Failed to upload to Discord: {str(e)}") except Exception as e: - if original_message: - await original_message.add_reaction(REACTIONS["error"]) logger.error(f"Failed to archive video: {str(e)}") - return False, f"Failed to archive video: {str(e)}" + raise QueueHandlerError(f"Failed to archive video: {str(e)}") - async def _get_original_message(self, item) -> Optional[discord.Message]: - """Retrieve the original message""" + async def _get_original_message( + self, + item: QueueItem + ) -> Optional[discord.Message]: + """ + Retrieve the original message. + + Args: + item: Queue item containing message details + + Returns: + Original Discord message or None if not found + """ try: channel = self.bot.get_channel(item.channel_id) if not channel: @@ -179,8 +357,21 @@ class QueueHandler: logger.error(f"Error fetching original message: {e}") return None - def _create_progress_callback(self, message: Optional[discord.Message], url: str): - """Create progress callback function for download tracking""" + def _create_progress_callback( + self, + message: Optional[discord.Message], + url: str + ) -> Callable[[float], None]: + """ + Create progress callback function for download tracking. + + Args: + message: Discord message to update with progress + url: URL being downloaded + + Returns: + Callback function for progress updates + """ def progress_callback(progress: float) -> None: if message: try: @@ -204,22 +395,45 @@ class QueueHandler: logger.error(f"Error in progress callback: {e}") return progress_callback - async def _download_video(self, downloader, url: str, progress_callback) -> Tuple[bool, Optional[str], Optional[str]]: - """Download video with progress tracking""" + async def _download_video( + self, + downloader: DownloadManager, + url: str, + progress_callback: Callable[[float], None] + ) -> Tuple[bool, Optional[str], Optional[str]]: + """ + Download video with progress tracking. + + Args: + downloader: Download manager instance + url: URL to download + progress_callback: Callback for progress updates + + Returns: + Tuple of (success, file_path, error_message) + """ download_task = asyncio.create_task( downloader.download_video(url, progress_callback=progress_callback) ) async with self._active_downloads_lock: self._active_downloads[url] = download_task + self._stats["active_downloads"] += 1 try: - success, file_path, error = await download_task + success, file_path, error = await asyncio.wait_for( + download_task, + timeout=self.DOWNLOAD_TIMEOUT + ) if success: self.progress_tracker.complete_download(url) else: self.progress_tracker.increment_download_retries(url) return success, file_path, error + + except asyncio.TimeoutError: + logger.error(f"Download timed out for {url}") + return False, None, "Download timed out" except asyncio.CancelledError: logger.info(f"Download cancelled for {url}") return False, None, "Download cancelled" @@ -229,9 +443,15 @@ class QueueHandler: finally: async with self._active_downloads_lock: self._active_downloads.pop(url, None) + self._stats["active_downloads"] -= 1 - async def cleanup(self): - """Clean up resources and stop processing""" + async def cleanup(self) -> None: + """ + Clean up resources and stop processing. + + Raises: + QueueHandlerError: If cleanup fails + """ try: logger.info("Starting QueueHandler cleanup...") self._unloading = True @@ -248,14 +468,15 @@ class QueueHandler: except Exception as e: logger.error(f"Error cancelling download task for {url}: {e}") self._active_downloads.clear() + self._stats["active_downloads"] = 0 logger.info("QueueHandler cleanup completed successfully") except Exception as e: logger.error(f"Error during QueueHandler cleanup: {str(e)}", exc_info=True) - raise + raise QueueHandlerError(f"Cleanup failed: {str(e)}") - async def force_cleanup(self): + async def force_cleanup(self) -> None: """Force cleanup of resources when normal cleanup fails""" try: logger.info("Starting force cleanup of QueueHandler...") @@ -266,13 +487,18 @@ class QueueHandler: if not task.done(): task.cancel() self._active_downloads.clear() + self._stats["active_downloads"] = 0 logger.info("QueueHandler force cleanup completed") except Exception as e: logger.error(f"Error during QueueHandler force cleanup: {str(e)}", exc_info=True) - async def _update_download_progress_reaction(self, message: discord.Message, progress: float): + async def _update_download_progress_reaction( + self, + message: discord.Message, + progress: float + ) -> None: """Update download progress reaction on message""" if not message: return @@ -307,12 +533,41 @@ class QueueHandler: logger.error(f"Failed to update download progress reaction: {e}") def is_healthy(self) -> bool: - """Check if handler is healthy""" - # Check if any downloads are stuck - current_time = datetime.utcnow() - for url, task in self._active_downloads.items(): - if not task.done() and task.get_coro().cr_frame.f_locals.get('start_time'): - start_time = task.get_coro().cr_frame.f_locals['start_time'] - if (current_time - start_time).total_seconds() > 3600: # 1 hour timeout + """ + Check if handler is healthy. + + Returns: + True if handler is healthy, False otherwise + """ + try: + # Check if any downloads are stuck + current_time = datetime.utcnow() + for url, task in self._active_downloads.items(): + if not task.done() and task.get_coro().cr_frame.f_locals.get('start_time'): + start_time = task.get_coro().cr_frame.f_locals['start_time'] + if (current_time - start_time).total_seconds() > self.DOWNLOAD_TIMEOUT: + self._stats["is_healthy"] = False + return False + + # Check processing metrics + if self._stats["processing_items"] > 0: + if self._stats["average_processing_time"] > self.DOWNLOAD_TIMEOUT: + self._stats["is_healthy"] = False return False - return True + + self._stats["is_healthy"] = True + return True + + except Exception as e: + logger.error(f"Error checking health: {e}") + self._stats["is_healthy"] = False + return False + + def get_stats(self) -> QueueStats: + """ + Get queue handler statistics. + + Returns: + Dictionary containing queue statistics + """ + return self._stats.copy() diff --git a/videoarchiver/processor/queue_processor.py b/videoarchiver/processor/queue_processor.py index 4731be4..7799787 100644 --- a/videoarchiver/processor/queue_processor.py +++ b/videoarchiver/processor/queue_processor.py @@ -2,21 +2,24 @@ import logging import asyncio -from enum import Enum -from typing import List, Optional, Dict, Any, Set +from enum import Enum, auto +from typing import List, Optional, Dict, Any, Set, Union, TypedDict, ClassVar from datetime import datetime import discord from ..queue.models import QueueItem +from ..queue.manager import EnhancedVideoQueueManager from .constants import REACTIONS +from .url_extractor import URLMetadata +from ..utils.exceptions import QueueProcessingError logger = logging.getLogger("VideoArchiver") class QueuePriority(Enum): """Queue item priorities""" - HIGH = 0 - NORMAL = 1 - LOW = 2 + HIGH = auto() + NORMAL = auto() + LOW = auto() class ProcessingStrategy(Enum): """Available processing strategies""" @@ -24,10 +27,22 @@ class ProcessingStrategy(Enum): PRIORITY = "priority" # Process by priority SMART = "smart" # Smart processing based on various factors +class QueueStats(TypedDict): + """Type definition for queue statistics""" + total_processed: int + successful: int + failed: int + success_rate: float + average_processing_time: float + error_counts: Dict[str, int] + last_processed: Optional[str] + class QueueMetrics: """Tracks queue processing metrics""" - def __init__(self): + MAX_PROCESSING_TIME: ClassVar[float] = 3600.0 # 1 hour in seconds + + def __init__(self) -> None: self.total_processed = 0 self.successful = 0 self.failed = 0 @@ -36,49 +51,67 @@ class QueueMetrics: self.last_processed: Optional[datetime] = None def record_success(self, processing_time: float) -> None: - """Record successful processing""" + """ + Record successful processing. + + Args: + processing_time: Time taken to process in seconds + """ + if processing_time > self.MAX_PROCESSING_TIME: + logger.warning(f"Unusually long processing time: {processing_time} seconds") + self.total_processed += 1 self.successful += 1 self.processing_times.append(processing_time) self.last_processed = datetime.utcnow() def record_failure(self, error: str) -> None: - """Record processing failure""" + """ + Record processing failure. + + Args: + error: Error message describing the failure + """ self.total_processed += 1 self.failed += 1 self.errors[error] = self.errors.get(error, 0) + 1 self.last_processed = datetime.utcnow() - def get_stats(self) -> Dict[str, Any]: - """Get queue metrics""" + def get_stats(self) -> QueueStats: + """ + Get queue metrics. + + Returns: + Dictionary containing queue statistics + """ avg_time = ( sum(self.processing_times) / len(self.processing_times) if self.processing_times else 0 ) - return { - "total_processed": self.total_processed, - "successful": self.successful, - "failed": self.failed, - "success_rate": ( + return QueueStats( + total_processed=self.total_processed, + successful=self.successful, + failed=self.failed, + success_rate=( self.successful / self.total_processed if self.total_processed > 0 else 0 ), - "average_processing_time": avg_time, - "error_counts": self.errors.copy(), - "last_processed": self.last_processed - } + average_processing_time=avg_time, + error_counts=self.errors.copy(), + last_processed=self.last_processed.isoformat() if self.last_processed else None + ) class QueueProcessor: """Handles adding videos to the processing queue""" def __init__( self, - queue_manager, + queue_manager: EnhancedVideoQueueManager, strategy: ProcessingStrategy = ProcessingStrategy.SMART, max_retries: int = 3 - ): + ) -> None: self.queue_manager = queue_manager self.strategy = strategy self.max_retries = max_retries @@ -89,16 +122,34 @@ class QueueProcessor: async def process_urls( self, message: discord.Message, - urls: List[str], + urls: Union[List[str], Set[str], List[URLMetadata]], priority: QueuePriority = QueuePriority.NORMAL ) -> None: - """Process extracted URLs by adding them to the queue""" - for url in urls: + """ + Process extracted URLs by adding them to the queue. + + Args: + message: Discord message containing the URLs + urls: List or set of URLs or URLMetadata objects to process + priority: Priority level for queue processing + + Raises: + QueueProcessingError: If there's an error adding URLs to the queue + """ + processed_urls: Set[str] = set() + + for url_data in urls: + url = url_data.url if isinstance(url_data, URLMetadata) else url_data + + if url in processed_urls: + logger.debug(f"Skipping duplicate URL: {url}") + continue + try: logger.info(f"Adding URL to queue: {url}") await message.add_reaction(REACTIONS['queued']) - # Create queue item using the model from queue.models + # Create queue item item = QueueItem( url=url, message_id=message.id, @@ -111,15 +162,24 @@ class QueueProcessor: # Add to queue with appropriate strategy await self._add_to_queue(item) + processed_urls.add(url) logger.info(f"Successfully added video to queue: {url}") except Exception as e: - logger.error(f"Failed to add video to queue: {str(e)}") + logger.error(f"Failed to add video to queue: {str(e)}", exc_info=True) await message.add_reaction(REACTIONS['error']) - continue + raise QueueProcessingError(f"Failed to add URL to queue: {str(e)}") async def _add_to_queue(self, item: QueueItem) -> None: - """Add item to queue using current strategy""" + """ + Add item to queue using current strategy. + + Args: + item: Queue item to add + + Raises: + QueueProcessingError: If there's an error adding the item + """ async with self._processing_lock: if item.url in self._processing: logger.debug(f"URL already being processed: {item.url}") @@ -136,6 +196,9 @@ class QueueProcessor: else: # FIFO await self._add_fifo(item) + except Exception as e: + logger.error(f"Error adding item to queue: {e}", exc_info=True) + raise QueueProcessingError(f"Failed to add item to queue: {str(e)}") finally: async with self._processing_lock: self._processing.remove(item.url) @@ -153,7 +216,6 @@ class QueueProcessor: async def _add_with_smart_strategy(self, item: QueueItem) -> None: """Add item using smart processing strategy""" - # Calculate priority based on various factors priority = await self._calculate_smart_priority(item) await self.queue_manager.add_to_queue( @@ -177,7 +239,15 @@ class QueueProcessor: ) async def _calculate_smart_priority(self, item: QueueItem) -> int: - """Calculate priority using smart strategy""" + """ + Calculate priority using smart strategy. + + Args: + item: Queue item to calculate priority for + + Returns: + Calculated priority value + """ base_priority = item.priority # Adjust based on queue metrics @@ -203,7 +273,17 @@ class QueueProcessor: channel: discord.TextChannel, url: str ) -> str: - """Format message for archive channel""" + """ + Format message for archive channel. + + Args: + author: Optional message author + channel: Channel the message was posted in + url: URL being archived + + Returns: + Formatted message string + """ author_mention = author.mention if author else "Unknown User" channel_mention = channel.mention if channel else "Unknown Channel" @@ -213,7 +293,12 @@ class QueueProcessor: ) def get_metrics(self) -> Dict[str, Any]: - """Get queue processing metrics""" + """ + Get queue processing metrics. + + Returns: + Dictionary containing queue metrics and status + """ return { "metrics": self.metrics.get_stats(), "strategy": self.strategy.value, diff --git a/videoarchiver/processor/reactions.py b/videoarchiver/processor/reactions.py index 60c8cd9..58307d1 100644 --- a/videoarchiver/processor/reactions.py +++ b/videoarchiver/processor/reactions.py @@ -2,112 +2,184 @@ import logging import asyncio +import re +from typing import List, Optional import discord +from urllib.parse import urlparse -from .constants import REACTIONS +from .constants import REACTIONS, ReactionType, get_reaction, get_progress_emoji +from ..database.video_archive_db import VideoArchiveDB logger = logging.getLogger("VideoArchiver") -async def handle_archived_reaction(message: discord.Message, user: discord.User, db) -> None: - """Handle reaction to archived video message""" +async def handle_archived_reaction( + message: discord.Message, + user: discord.User, + db: VideoArchiveDB +) -> None: + """ + Handle reaction to archived video message. + + Args: + message: The Discord message that was reacted to + user: The user who added the reaction + db: Database instance for checking archived videos + """ try: # Check if the reaction is from a user (not the bot) and is the archived reaction - if user.bot or str(message.reactions[0].emoji) != REACTIONS['archived']: + if user.bot or str(message.reactions[0].emoji) != get_reaction(ReactionType.ARCHIVED): return - # Extract URLs from the message - urls = [] - if message.content: - for word in message.content.split(): - if any(s in word.lower() for s in ['http://', 'https://']): - urls.append(word) + # Extract URLs from the message using regex + url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+') + urls = url_pattern.findall(message.content) if message.content else [] # Check each URL in the database for url in urls: + # Ensure URL has proper scheme + if url.startswith('www.'): + url = 'http://' + url + + # Validate URL + try: + result = urlparse(url) + if not all([result.scheme, result.netloc]): + continue + except Exception: + continue + result = db.get_archived_video(url) if result: discord_url = result[0] - await message.reply(f"This video was already archived. You can find it here: {discord_url}") + await message.reply( + f"This video was already archived. You can find it here: {discord_url}" + ) return except Exception as e: - logger.error(f"Error handling archived reaction: {e}") + logger.error(f"Error handling archived reaction: {e}", exc_info=True) -async def update_queue_position_reaction(message: discord.Message, position: int, bot_user) -> None: - """Update queue position reaction""" +async def update_queue_position_reaction( + message: discord.Message, + position: int, + bot_user: discord.ClientUser +) -> None: + """ + Update queue position reaction. + + Args: + message: The Discord message to update reactions on + position: Queue position (0-based index) + bot_user: The bot's user instance for managing reactions + """ try: - for reaction in REACTIONS["numbers"]: + numbers = get_reaction(ReactionType.NUMBERS) + if not isinstance(numbers, list): + logger.error("Numbers reaction is not a list") + return + + # Remove old reactions + for reaction in numbers: try: await message.remove_reaction(reaction, bot_user) - except: - pass + except discord.HTTPException as e: + logger.warning(f"Failed to remove number reaction: {e}") + except Exception as e: + logger.error(f"Unexpected error removing number reaction: {e}") + + # Add new reaction if position is valid + if 0 <= position < len(numbers): + try: + await message.add_reaction(numbers[position]) + logger.info( + f"Updated queue position reaction to {position + 1} for message {message.id}" + ) + except discord.HTTPException as e: + logger.error(f"Failed to add queue position reaction: {e}") - if 0 <= position < len(REACTIONS["numbers"]): - await message.add_reaction(REACTIONS["numbers"][position]) - logger.info( - f"Updated queue position reaction to {position + 1} for message {message.id}" - ) except Exception as e: - logger.error(f"Failed to update queue position reaction: {e}") + logger.error(f"Failed to update queue position reaction: {e}", exc_info=True) -async def update_progress_reaction(message: discord.Message, progress: float, bot_user) -> None: - """Update progress reaction based on FFmpeg progress""" +async def update_progress_reaction( + message: discord.Message, + progress: float, + bot_user: discord.ClientUser +) -> None: + """ + Update progress reaction based on FFmpeg progress. + + Args: + message: The Discord message to update reactions on + progress: Progress value between 0 and 100 + bot_user: The bot's user instance for managing reactions + """ if not message: return try: - # Remove old reactions in the event loop - for reaction in REACTIONS["progress"]: + progress_emojis = get_reaction(ReactionType.PROGRESS) + if not isinstance(progress_emojis, list): + logger.error("Progress reaction is not a list") + return + + # Remove old reactions + for reaction in progress_emojis: try: await message.remove_reaction(reaction, bot_user) + except discord.HTTPException as e: + logger.warning(f"Failed to remove progress reaction: {e}") except Exception as e: - logger.error(f"Failed to remove progress reaction: {e}") - continue + logger.error(f"Unexpected error removing progress reaction: {e}") # Add new reaction based on progress try: - if progress < 33: - await message.add_reaction(REACTIONS["progress"][0]) - elif progress < 66: - await message.add_reaction(REACTIONS["progress"][1]) - else: - await message.add_reaction(REACTIONS["progress"][2]) + normalized_progress = progress / 100 # Convert to 0-1 range + emoji = get_progress_emoji(normalized_progress, progress_emojis) + await message.add_reaction(emoji) except Exception as e: logger.error(f"Failed to add progress reaction: {e}") except Exception as e: - logger.error(f"Failed to update progress reaction: {e}") + logger.error(f"Failed to update progress reaction: {e}", exc_info=True) -async def update_download_progress_reaction(message: discord.Message, progress: float, bot_user) -> None: - """Update download progress reaction""" +async def update_download_progress_reaction( + message: discord.Message, + progress: float, + bot_user: discord.ClientUser +) -> None: + """ + Update download progress reaction. + + Args: + message: The Discord message to update reactions on + progress: Progress value between 0 and 100 + bot_user: The bot's user instance for managing reactions + """ if not message: return try: - # Remove old reactions in the event loop - for reaction in REACTIONS["download"]: + download_emojis = get_reaction(ReactionType.DOWNLOAD) + if not isinstance(download_emojis, list): + logger.error("Download reaction is not a list") + return + + # Remove old reactions + for reaction in download_emojis: try: await message.remove_reaction(reaction, bot_user) + except discord.HTTPException as e: + logger.warning(f"Failed to remove download reaction: {e}") except Exception as e: - logger.error(f"Failed to remove download reaction: {e}") - continue + logger.error(f"Unexpected error removing download reaction: {e}") # Add new reaction based on progress try: - if progress <= 20: - await message.add_reaction(REACTIONS["download"][0]) - elif progress <= 40: - await message.add_reaction(REACTIONS["download"][1]) - elif progress <= 60: - await message.add_reaction(REACTIONS["download"][2]) - elif progress <= 80: - await message.add_reaction(REACTIONS["download"][3]) - elif progress < 100: - await message.add_reaction(REACTIONS["download"][4]) - else: - await message.add_reaction(REACTIONS["download"][5]) + normalized_progress = progress / 100 # Convert to 0-1 range + emoji = get_progress_emoji(normalized_progress, download_emojis) + await message.add_reaction(emoji) except Exception as e: logger.error(f"Failed to add download reaction: {e}") except Exception as e: - logger.error(f"Failed to update download progress reaction: {e}") + logger.error(f"Failed to update download progress reaction: {e}", exc_info=True) diff --git a/videoarchiver/processor/status_display.py b/videoarchiver/processor/status_display.py index 9f00377..64c2637 100644 --- a/videoarchiver/processor/status_display.py +++ b/videoarchiver/processor/status_display.py @@ -1,23 +1,39 @@ """Module for handling queue status display and formatting""" -import discord -from enum import Enum -from dataclasses import dataclass -from datetime import datetime -from typing import Dict, Any, List, Optional import logging +from enum import Enum, auto +from dataclasses import dataclass, field +from datetime import datetime +from typing import Dict, Any, List, Optional, Callable, TypeVar, Union, TypedDict, ClassVar +import discord + +from ..utils.exceptions import DisplayError logger = logging.getLogger("VideoArchiver") -class DisplayTheme: - """Defines display themes""" - DEFAULT = { - "title_color": discord.Color.blue(), - "success_color": discord.Color.green(), - "warning_color": discord.Color.gold(), - "error_color": discord.Color.red(), - "info_color": discord.Color.blurple() - } +T = TypeVar('T') + +class DisplayTheme(TypedDict): + """Type definition for display theme""" + title_color: discord.Color + success_color: discord.Color + warning_color: discord.Color + error_color: discord.Color + info_color: discord.Color + +class DisplaySection(Enum): + """Available display sections""" + QUEUE_STATS = auto() + DOWNLOADS = auto() + COMPRESSIONS = auto() + ERRORS = auto() + HARDWARE = auto() + +class DisplayCondition(Enum): + """Display conditions for sections""" + HAS_ERRORS = "has_errors" + HAS_DOWNLOADS = "has_downloads" + HAS_COMPRESSIONS = "has_compressions" @dataclass class DisplayTemplate: @@ -26,48 +42,116 @@ class DisplayTemplate: format_string: str inline: bool = False order: int = 0 - condition: Optional[str] = None - -class DisplaySection(Enum): - """Available display sections""" - QUEUE_STATS = "queue_stats" - DOWNLOADS = "downloads" - COMPRESSIONS = "compressions" - ERRORS = "errors" - HARDWARE = "hardware" + condition: Optional[DisplayCondition] = None + formatter: Optional[Callable[[Dict[str, Any]], str]] = None + max_items: int = field(default=5) # Maximum items to display in lists class StatusFormatter: """Formats status information for display""" + BYTE_UNITS: ClassVar[List[str]] = ['B', 'KB', 'MB', 'GB', 'TB'] + TIME_THRESHOLDS: ClassVar[List[Tuple[float, str]]] = [ + (60, 's'), + (3600, 'm'), + (float('inf'), 'h') + ] + @staticmethod - def format_bytes(bytes: int) -> str: - """Format bytes into human readable format""" - for unit in ['B', 'KB', 'MB', 'GB']: - if bytes < 1024: - return f"{bytes:.1f}{unit}" - bytes /= 1024 - return f"{bytes:.1f}TB" + def format_bytes(bytes_value: Union[int, float]) -> str: + """ + Format bytes into human readable format. + + Args: + bytes_value: Number of bytes to format + + Returns: + Formatted string with appropriate unit + + Raises: + ValueError: If bytes_value is negative + """ + if bytes_value < 0: + raise ValueError("Bytes value cannot be negative") + + bytes_num = float(bytes_value) + for unit in StatusFormatter.BYTE_UNITS: + if bytes_num < 1024: + return f"{bytes_num:.1f}{unit}" + bytes_num /= 1024 + return f"{bytes_num:.1f}TB" @staticmethod def format_time(seconds: float) -> str: - """Format time duration""" - if seconds < 60: - return f"{seconds:.1f}s" - minutes = seconds / 60 - if minutes < 60: - return f"{minutes:.1f}m" - hours = minutes / 60 - return f"{hours:.1f}h" + """ + Format time duration. + + Args: + seconds: Number of seconds to format + + Returns: + Formatted time string + + Raises: + ValueError: If seconds is negative + """ + if seconds < 0: + raise ValueError("Time value cannot be negative") + + for threshold, unit in StatusFormatter.TIME_THRESHOLDS: + if seconds < threshold: + return f"{seconds:.1f}{unit}" + seconds /= 60 + return f"{seconds:.1f}h" @staticmethod def format_percentage(value: float) -> str: - """Format percentage value""" + """ + Format percentage value. + + Args: + value: Percentage value to format (0-100) + + Returns: + Formatted percentage string + + Raises: + ValueError: If value is outside valid range + """ + if not 0 <= value <= 100: + raise ValueError("Percentage must be between 0 and 100") return f"{value:.1f}%" + @staticmethod + def truncate_url(url: str, max_length: int = 50) -> str: + """ + Truncate URL to specified length. + + Args: + url: URL to truncate + max_length: Maximum length for URL + + Returns: + Truncated URL string + + Raises: + ValueError: If max_length is less than 4 + """ + if max_length < 4: # Need room for "..." + raise ValueError("max_length must be at least 4") + return f"{url[:max_length]}..." if len(url) > max_length else url + class DisplayManager: """Manages status display configuration""" - def __init__(self): + DEFAULT_THEME: ClassVar[DisplayTheme] = DisplayTheme( + title_color=discord.Color.blue(), + success_color=discord.Color.green(), + warning_color=discord.Color.gold(), + error_color=discord.Color.red(), + info_color=discord.Color.blurple() + ) + + def __init__(self) -> None: self.templates: Dict[DisplaySection, DisplayTemplate] = { DisplaySection.QUEUE_STATS: DisplayTemplate( name="Queue Statistics", @@ -96,7 +180,8 @@ class DisplayManager: "Retries: {retries}\n" "```" ), - order=2 + order=2, + condition=DisplayCondition.HAS_DOWNLOADS ), DisplaySection.COMPRESSIONS: DisplayTemplate( name="Active Compressions", @@ -112,12 +197,13 @@ class DisplayManager: "Hardware Accel: {hardware_accel}\n" "```" ), - order=3 + order=3, + condition=DisplayCondition.HAS_COMPRESSIONS ), DisplaySection.ERRORS: DisplayTemplate( name="Error Statistics", format_string="```\n{error_stats}```", - condition="has_errors", + condition=DisplayCondition.HAS_ERRORS, order=4 ), DisplaySection.HARDWARE: DisplayTemplate( @@ -132,63 +218,99 @@ class DisplayManager: order=5 ) } - self.theme = DisplayTheme.DEFAULT + self.theme = self.DEFAULT_THEME.copy() class StatusDisplay: """Handles formatting and display of queue status information""" - def __init__(self): + def __init__(self) -> None: self.display_manager = DisplayManager() self.formatter = StatusFormatter() + @classmethod async def create_queue_status_embed( - self, + cls, queue_status: Dict[str, Any], active_ops: Dict[str, Any] ) -> discord.Embed: - """Create an embed displaying queue status and active operations""" - embed = discord.Embed( - title="Queue Status Details", - color=self.display_manager.theme["title_color"], - timestamp=datetime.utcnow() - ) + """ + Create an embed displaying queue status and active operations. + + Args: + queue_status: Dictionary containing queue status information + active_ops: Dictionary containing active operations information + + Returns: + Discord embed containing formatted status information + + Raises: + DisplayError: If there's an error creating the embed + """ + try: + display = cls() + embed = discord.Embed( + title="Queue Status Details", + color=display.display_manager.theme["title_color"], + timestamp=datetime.utcnow() + ) - # Add sections in order - sections = sorted( - self.display_manager.templates.items(), - key=lambda x: x[1].order - ) + # Add sections in order + sections = sorted( + display.display_manager.templates.items(), + key=lambda x: x[1].order + ) - for section, template in sections: - # Check condition if exists - if template.condition: - if not self._check_condition(template.condition, queue_status, active_ops): - continue + for section, template in sections: + try: + # Check condition if exists + if template.condition: + if not display._check_condition( + template.condition, + queue_status, + active_ops + ): + continue - # Add section based on type - if section == DisplaySection.QUEUE_STATS: - self._add_queue_statistics(embed, queue_status, template) - elif section == DisplaySection.DOWNLOADS: - self._add_active_downloads(embed, active_ops.get('downloads', {}), template) - elif section == DisplaySection.COMPRESSIONS: - self._add_active_compressions(embed, active_ops.get('compressions', {}), template) - elif section == DisplaySection.ERRORS: - self._add_error_statistics(embed, queue_status, template) - elif section == DisplaySection.HARDWARE: - self._add_hardware_statistics(embed, queue_status, template) + # Add section based on type + if section == DisplaySection.QUEUE_STATS: + display._add_queue_statistics(embed, queue_status, template) + elif section == DisplaySection.DOWNLOADS: + display._add_active_downloads(embed, active_ops.get('downloads', {}), template) + elif section == DisplaySection.COMPRESSIONS: + display._add_active_compressions(embed, active_ops.get('compressions', {}), template) + elif section == DisplaySection.ERRORS: + display._add_error_statistics(embed, queue_status, template) + elif section == DisplaySection.HARDWARE: + display._add_hardware_statistics(embed, queue_status, template) + except Exception as e: + logger.error(f"Error adding section {section.value}: {e}") + # Continue with other sections - return embed + return embed + + except Exception as e: + error = f"Error creating status embed: {str(e)}" + logger.error(error, exc_info=True) + raise DisplayError(error) def _check_condition( self, - condition: str, + condition: DisplayCondition, queue_status: Dict[str, Any], active_ops: Dict[str, Any] ) -> bool: """Check if condition for displaying section is met""" - if condition == "has_errors": - return bool(queue_status["metrics"]["errors_by_type"]) - return True + try: + if condition == DisplayCondition.HAS_ERRORS: + return bool(queue_status.get("metrics", {}).get("errors_by_type")) + elif condition == DisplayCondition.HAS_DOWNLOADS: + return bool(active_ops.get("downloads")) + elif condition == DisplayCondition.HAS_COMPRESSIONS: + return bool(active_ops.get("compressions")) + return True + except Exception as e: + logger.error(f"Error checking condition {condition}: {e}") + return False def _add_queue_statistics( self, @@ -197,22 +319,31 @@ class StatusDisplay: template: DisplayTemplate ) -> None: """Add queue statistics to the embed""" - embed.add_field( - name=template.name, - value=template.format_string.format( - pending=queue_status['pending'], - processing=queue_status['processing'], - completed=queue_status['completed'], - failed=queue_status['failed'], - success_rate=self.formatter.format_percentage( - queue_status['metrics']['success_rate'] * 100 + try: + metrics = queue_status.get('metrics', {}) + embed.add_field( + name=template.name, + value=template.format_string.format( + pending=queue_status.get('pending', 0), + processing=queue_status.get('processing', 0), + completed=queue_status.get('completed', 0), + failed=queue_status.get('failed', 0), + success_rate=self.formatter.format_percentage( + metrics.get('success_rate', 0) * 100 + ), + avg_processing_time=self.formatter.format_time( + metrics.get('avg_processing_time', 0) + ) ), - avg_processing_time=self.formatter.format_time( - queue_status['metrics']['avg_processing_time'] - ) - ), - inline=template.inline - ) + inline=template.inline + ) + except Exception as e: + logger.error(f"Error adding queue statistics: {e}") + embed.add_field( + name=template.name, + value="```\nError displaying queue statistics```", + inline=template.inline + ) def _add_active_downloads( self, @@ -221,28 +352,44 @@ class StatusDisplay: template: DisplayTemplate ) -> None: """Add active downloads information to the embed""" - if downloads: - content = [] - for url, progress in downloads.items(): - content.append(template.format_string.format( - url=url[:50] + "..." if len(url) > 50 else url, - percent=self.formatter.format_percentage(progress.get('percent', 0)), - speed=progress.get('speed', 'N/A'), - eta=progress.get('eta', 'N/A'), - size=f"{self.formatter.format_bytes(progress.get('downloaded_bytes', 0))}/" - f"{self.formatter.format_bytes(progress.get('total_bytes', 0))}", - start_time=progress.get('start_time', 'N/A'), - retries=progress.get('retries', 0) - )) + try: + if downloads: + content = [] + for url, progress in list(downloads.items())[:template.max_items]: + try: + content.append(template.format_string.format( + url=self.formatter.truncate_url(url), + percent=self.formatter.format_percentage(progress.get('percent', 0)), + speed=progress.get('speed', 'N/A'), + eta=progress.get('eta', 'N/A'), + size=f"{self.formatter.format_bytes(progress.get('downloaded_bytes', 0))}/" + f"{self.formatter.format_bytes(progress.get('total_bytes', 0))}", + start_time=progress.get('start_time', 'N/A'), + retries=progress.get('retries', 0) + )) + except Exception as e: + logger.error(f"Error formatting download {url}: {e}") + continue + + if len(downloads) > template.max_items: + content.append(f"\n... and {len(downloads) - template.max_items} more") + + embed.add_field( + name=template.name, + value="".join(content) if content else "```\nNo active downloads```", + inline=template.inline + ) + else: + embed.add_field( + name=template.name, + value="```\nNo active downloads```", + inline=template.inline + ) + except Exception as e: + logger.error(f"Error adding active downloads: {e}") embed.add_field( name=template.name, - value="".join(content), - inline=template.inline - ) - else: - embed.add_field( - name=template.name, - value="```\nNo active downloads```", + value="```\nError displaying downloads```", inline=template.inline ) @@ -253,28 +400,44 @@ class StatusDisplay: template: DisplayTemplate ) -> None: """Add active compressions information to the embed""" - if compressions: - content = [] - for file_id, progress in compressions.items(): - content.append(template.format_string.format( - filename=progress.get('filename', 'Unknown'), - percent=self.formatter.format_percentage(progress.get('percent', 0)), - elapsed_time=progress.get('elapsed_time', 'N/A'), - input_size=self.formatter.format_bytes(progress.get('input_size', 0)), - current_size=self.formatter.format_bytes(progress.get('current_size', 0)), - target_size=self.formatter.format_bytes(progress.get('target_size', 0)), - codec=progress.get('codec', 'Unknown'), - hardware_accel=progress.get('hardware_accel', False) - )) + try: + if compressions: + content = [] + for file_id, progress in list(compressions.items())[:template.max_items]: + try: + content.append(template.format_string.format( + filename=progress.get('filename', 'Unknown'), + percent=self.formatter.format_percentage(progress.get('percent', 0)), + elapsed_time=progress.get('elapsed_time', 'N/A'), + input_size=self.formatter.format_bytes(progress.get('input_size', 0)), + current_size=self.formatter.format_bytes(progress.get('current_size', 0)), + target_size=self.formatter.format_bytes(progress.get('target_size', 0)), + codec=progress.get('codec', 'Unknown'), + hardware_accel=progress.get('hardware_accel', False) + )) + except Exception as e: + logger.error(f"Error formatting compression {file_id}: {e}") + continue + + if len(compressions) > template.max_items: + content.append(f"\n... and {len(compressions) - template.max_items} more") + + embed.add_field( + name=template.name, + value="".join(content) if content else "```\nNo active compressions```", + inline=template.inline + ) + else: + embed.add_field( + name=template.name, + value="```\nNo active compressions```", + inline=template.inline + ) + except Exception as e: + logger.error(f"Error adding active compressions: {e}") embed.add_field( name=template.name, - value="".join(content), - inline=template.inline - ) - else: - embed.add_field( - name=template.name, - value="```\nNo active compressions```", + value="```\nError displaying compressions```", inline=template.inline ) @@ -285,14 +448,26 @@ class StatusDisplay: template: DisplayTemplate ) -> None: """Add error statistics to the embed""" - if queue_status["metrics"]["errors_by_type"]: - error_stats = "\n".join( - f"{error_type}: {count}" - for error_type, count in queue_status["metrics"]["errors_by_type"].items() - ) + try: + metrics = queue_status.get('metrics', {}) + errors_by_type = metrics.get('errors_by_type', {}) + if errors_by_type: + error_stats = "\n".join( + f"{error_type}: {count}" + for error_type, count in list(errors_by_type.items())[:template.max_items] + ) + if len(errors_by_type) > template.max_items: + error_stats += f"\n... and {len(errors_by_type) - template.max_items} more" + embed.add_field( + name=template.name, + value=template.format_string.format(error_stats=error_stats), + inline=template.inline + ) + except Exception as e: + logger.error(f"Error adding error statistics: {e}") embed.add_field( name=template.name, - value=template.format_string.format(error_stats=error_stats), + value="```\nError displaying error statistics```", inline=template.inline ) @@ -303,14 +478,23 @@ class StatusDisplay: template: DisplayTemplate ) -> None: """Add hardware statistics to the embed""" - embed.add_field( - name=template.name, - value=template.format_string.format( - hw_failures=queue_status['metrics']['hardware_accel_failures'], - comp_failures=queue_status['metrics']['compression_failures'], - memory_usage=self.formatter.format_bytes( - queue_status['metrics']['peak_memory_usage'] * 1024 * 1024 # Convert MB to bytes - ) - ), - inline=template.inline - ) + try: + metrics = queue_status.get('metrics', {}) + embed.add_field( + name=template.name, + value=template.format_string.format( + hw_failures=metrics.get('hardware_accel_failures', 0), + comp_failures=metrics.get('compression_failures', 0), + memory_usage=self.formatter.format_bytes( + metrics.get('peak_memory_usage', 0) * 1024 * 1024 # Convert MB to bytes + ) + ), + inline=template.inline + ) + except Exception as e: + logger.error(f"Error adding hardware statistics: {e}") + embed.add_field( + name=template.name, + value="```\nError displaying hardware statistics```", + inline=template.inline + ) diff --git a/videoarchiver/processor/url_extractor.py b/videoarchiver/processor/url_extractor.py index fafa633..06c50c2 100644 --- a/videoarchiver/processor/url_extractor.py +++ b/videoarchiver/processor/url_extractor.py @@ -3,10 +3,11 @@ import logging import re from enum import Enum -from dataclasses import dataclass -from typing import List, Dict, Optional, Set, Pattern +from dataclasses import dataclass, field +from typing import List, Dict, Optional, Set, Pattern, ClassVar +from datetime import datetime import discord -from urllib.parse import urlparse, parse_qs +from urllib.parse import urlparse, parse_qs, ParseResult logger = logging.getLogger("VideoArchiver") @@ -19,6 +20,11 @@ class URLPattern: supports_timestamp: bool = False supports_playlist: bool = False + def __post_init__(self) -> None: + """Validate pattern after initialization""" + if not isinstance(self.pattern, Pattern): + raise ValueError("Pattern must be a compiled regular expression") + @dataclass class URLMetadata: """Metadata about an extracted URL""" @@ -28,6 +34,7 @@ class URLMetadata: playlist_id: Optional[str] = None video_id: Optional[str] = None quality: Optional[str] = None + extraction_time: str = field(default_factory=lambda: datetime.utcnow().isoformat()) class URLType(Enum): """Types of video URLs""" @@ -38,84 +45,137 @@ class URLType(Enum): class URLPatternManager: """Manages URL patterns for different video sites""" - def __init__(self): + YOUTUBE_PATTERN: ClassVar[Pattern] = re.compile( + r'(?:https?://)?(?:www\.)?' + r'(?:youtube\.com/watch\?v=|youtu\.be/)' + r'([a-zA-Z0-9_-]{11})' + ) + VIMEO_PATTERN: ClassVar[Pattern] = re.compile( + r'(?:https?://)?(?:www\.)?' + r'vimeo\.com/(?:channels/(?:\w+/)?|groups/(?:[^/]*/)*|)' + r'(\d+)(?:|/\w+)*' + ) + TWITTER_PATTERN: ClassVar[Pattern] = re.compile( + r'(?:https?://)?(?:www\.)?' + r'(?:twitter\.com|x\.com)/\w+/status/(\d+)' + ) + + def __init__(self) -> None: self.patterns: Dict[str, URLPattern] = { "youtube": URLPattern( site="youtube", - pattern=re.compile( - r'(?:https?://)?(?:www\.)?' - r'(?:youtube\.com/watch\?v=|youtu\.be/)' - r'([a-zA-Z0-9_-]{11})' - ), + pattern=self.YOUTUBE_PATTERN, supports_timestamp=True, supports_playlist=True ), "vimeo": URLPattern( site="vimeo", - pattern=re.compile( - r'(?:https?://)?(?:www\.)?' - r'vimeo\.com/(?:channels/(?:\w+/)?|groups/(?:[^/]*/)*|)' - r'(\d+)(?:|/\w+)*' - ), + pattern=self.VIMEO_PATTERN, supports_timestamp=True ), "twitter": URLPattern( site="twitter", - pattern=re.compile( - r'(?:https?://)?(?:www\.)?' - r'(?:twitter\.com|x\.com)/\w+/status/(\d+)' - ), + pattern=self.TWITTER_PATTERN, requires_api=True - ), - # Add more patterns as needed + ) } - self.direct_extensions = {'.mp4', '.mov', '.avi', '.webm', '.mkv'} + self.direct_extensions: Set[str] = {'.mp4', '.mov', '.avi', '.webm', '.mkv'} def get_pattern(self, site: str) -> Optional[URLPattern]: - """Get pattern for a site""" + """ + Get pattern for a site. + + Args: + site: Site identifier + + Returns: + URLPattern for the site or None if not found + """ return self.patterns.get(site.lower()) def is_supported_site(self, url: str, enabled_sites: Optional[List[str]]) -> bool: - """Check if URL is from a supported site""" + """ + Check if URL is from a supported site. + + Args: + url: URL to check + enabled_sites: List of enabled site identifiers + + Returns: + True if site is supported, False otherwise + """ if not enabled_sites: return True - parsed = urlparse(url.lower()) - domain = parsed.netloc.replace('www.', '') - return any(site.lower() in domain for site in enabled_sites) + try: + parsed = urlparse(url.lower()) + domain = parsed.netloc.replace('www.', '') + return any(site.lower() in domain for site in enabled_sites) + except Exception as e: + logger.error(f"Error checking site support for {url}: {e}") + return False class URLValidator: """Validates extracted URLs""" - def __init__(self, pattern_manager: URLPatternManager): + def __init__(self, pattern_manager: URLPatternManager) -> None: self.pattern_manager = pattern_manager def get_url_type(self, url: str) -> URLType: - """Determine URL type""" - parsed = urlparse(url) - if any(parsed.path.lower().endswith(ext) for ext in self.pattern_manager.direct_extensions): - return URLType.DIRECT - if any(pattern.pattern.match(url) for pattern in self.pattern_manager.patterns.values()): - return URLType.PLATFORM - return URLType.UNKNOWN + """ + Determine URL type. + + Args: + url: URL to check + + Returns: + URLType indicating the type of URL + """ + try: + parsed = urlparse(url) + if any(parsed.path.lower().endswith(ext) for ext in self.pattern_manager.direct_extensions): + return URLType.DIRECT + if any(pattern.pattern.match(url) for pattern in self.pattern_manager.patterns.values()): + return URLType.PLATFORM + return URLType.UNKNOWN + except Exception as e: + logger.error(f"Error determining URL type for {url}: {e}") + return URLType.UNKNOWN def is_valid_url(self, url: str) -> bool: - """Validate URL format""" + """ + Validate URL format. + + Args: + url: URL to validate + + Returns: + True if URL is valid, False otherwise + """ try: result = urlparse(url) return all([result.scheme, result.netloc]) - except Exception: + except Exception as e: + logger.error(f"Error validating URL {url}: {e}") return False class URLMetadataExtractor: """Extracts metadata from URLs""" - def __init__(self, pattern_manager: URLPatternManager): + def __init__(self, pattern_manager: URLPatternManager) -> None: self.pattern_manager = pattern_manager def extract_metadata(self, url: str) -> Optional[URLMetadata]: - """Extract metadata from URL""" + """ + Extract metadata from URL. + + Args: + url: URL to extract metadata from + + Returns: + URLMetadata object or None if extraction fails + """ try: parsed = urlparse(url) @@ -143,33 +203,41 @@ class URLMetadataExtractor: return None except Exception as e: - logger.error(f"Error extracting metadata from URL {url}: {e}") + logger.error(f"Error extracting metadata from URL {url}: {e}", exc_info=True) return None - def _extract_timestamp(self, parsed_url: urlparse) -> Optional[int]: + def _extract_timestamp(self, parsed_url: ParseResult) -> Optional[int]: """Extract timestamp from URL""" try: params = parse_qs(parsed_url.query) if 't' in params: return int(params['t'][0]) return None - except Exception: + except (ValueError, IndexError) as e: + logger.debug(f"Error extracting timestamp: {e}") + return None + except Exception as e: + logger.error(f"Unexpected error extracting timestamp: {e}") return None - def _extract_playlist_id(self, parsed_url: urlparse) -> Optional[str]: + def _extract_playlist_id(self, parsed_url: ParseResult) -> Optional[str]: """Extract playlist ID from URL""" try: params = parse_qs(parsed_url.query) if 'list' in params: return params['list'][0] return None - except Exception: + except (KeyError, IndexError) as e: + logger.debug(f"Error extracting playlist ID: {e}") + return None + except Exception as e: + logger.error(f"Unexpected error extracting playlist ID: {e}") return None class URLExtractor: """Handles extraction of video URLs from messages""" - def __init__(self): + def __init__(self) -> None: self.pattern_manager = URLPatternManager() self.validator = URLValidator(self.pattern_manager) self.metadata_extractor = URLMetadataExtractor(self.pattern_manager) @@ -180,85 +248,113 @@ class URLExtractor: message: discord.Message, enabled_sites: Optional[List[str]] = None ) -> List[URLMetadata]: - """Extract video URLs from message content and attachments""" - urls = [] + """ + Extract video URLs from message content and attachments. - # Check cache - cache_key = f"{message.id}_{'-'.join(enabled_sites) if enabled_sites else 'all'}" - if cache_key in self._url_cache: - return [ - self.metadata_extractor.extract_metadata(url) - for url in self._url_cache[cache_key] - if url # Filter out None values - ] + Args: + message: Discord message to extract URLs from + enabled_sites: Optional list of enabled site identifiers + + Returns: + List of URLMetadata objects for extracted URLs + """ + urls: List[URLMetadata] = [] + + try: + # Check cache + cache_key = f"{message.id}_{'-'.join(enabled_sites) if enabled_sites else 'all'}" + if cache_key in self._url_cache: + return [ + metadata for url in self._url_cache[cache_key] + if (metadata := self.metadata_extractor.extract_metadata(url)) + ] - # Extract URLs - content_urls = await self._extract_from_content(message.content, enabled_sites) - attachment_urls = await self._extract_from_attachments(message.attachments) - - # Process all URLs - all_urls = content_urls + attachment_urls - valid_urls = [] - - for url in all_urls: - if not self.validator.is_valid_url(url): - logger.debug(f"Invalid URL format: {url}") - continue - - if not self.pattern_manager.is_supported_site(url, enabled_sites): - logger.debug(f"URL {url} doesn't match any enabled sites") - continue - - metadata = self.metadata_extractor.extract_metadata(url) - if metadata: - urls.append(metadata) - valid_urls.append(url) - else: - logger.debug(f"Could not extract metadata from URL: {url}") + # Extract URLs + content_urls = await self._extract_from_content(message.content, enabled_sites) + attachment_urls = await self._extract_from_attachments(message.attachments) + + # Process all URLs + all_urls = content_urls + attachment_urls + valid_urls: Set[str] = set() + + for url in all_urls: + if not self.validator.is_valid_url(url): + logger.debug(f"Invalid URL format: {url}") + continue + + if not self.pattern_manager.is_supported_site(url, enabled_sites): + logger.debug(f"URL {url} doesn't match any enabled sites") + continue + + metadata = self.metadata_extractor.extract_metadata(url) + if metadata: + urls.append(metadata) + valid_urls.add(url) + else: + logger.debug(f"Could not extract metadata from URL: {url}") - # Update cache - self._url_cache[cache_key] = set(valid_urls) - - return urls + # Update cache + self._url_cache[cache_key] = valid_urls + + return urls + + except Exception as e: + logger.error(f"Error extracting URLs from message {message.id}: {e}", exc_info=True) + return [] async def _extract_from_content( self, - content: str, + content: Optional[str], enabled_sites: Optional[List[str]] ) -> List[str]: """Extract video URLs from message content""" if not content: return [] - urls = [] - for word in content.split(): - if self.validator.get_url_type(word) != URLType.UNKNOWN: - urls.append(word) - - return urls + try: + urls = [] + for word in content.split(): + if self.validator.get_url_type(word) != URLType.UNKNOWN: + urls.append(word) + return urls + except Exception as e: + logger.error(f"Error extracting URLs from content: {e}", exc_info=True) + return [] async def _extract_from_attachments( self, attachments: List[discord.Attachment] ) -> List[str]: """Extract video URLs from message attachments""" - return [ - attachment.url - for attachment in attachments - if any( - attachment.filename.lower().endswith(ext) - for ext in self.pattern_manager.direct_extensions - ) - ] + try: + return [ + attachment.url + for attachment in attachments + if any( + attachment.filename.lower().endswith(ext) + for ext in self.pattern_manager.direct_extensions + ) + ] + except Exception as e: + logger.error(f"Error extracting URLs from attachments: {e}", exc_info=True) + return [] def clear_cache(self, message_id: Optional[int] = None) -> None: - """Clear URL cache""" - if message_id: - keys_to_remove = [ - key for key in self._url_cache - if key.startswith(f"{message_id}_") - ] - for key in keys_to_remove: - self._url_cache.pop(key, None) - else: - self._url_cache.clear() + """ + Clear URL cache. + + Args: + message_id: Optional message ID to clear cache for. If None, clears all cache. + """ + try: + if message_id: + keys_to_remove = [ + key for key in self._url_cache + if key.startswith(f"{message_id}_") + ] + for key in keys_to_remove: + self._url_cache.pop(key, None) + else: + self._url_cache.clear() + except Exception as e: + logger.error(f"Error clearing URL cache: {e}", exc_info=True) diff --git a/videoarchiver/utils/__init__.py b/videoarchiver/utils/__init__.py index 031d2b1..c5ae106 100644 --- a/videoarchiver/utils/__init__.py +++ b/videoarchiver/utils/__init__.py @@ -1,5 +1,7 @@ """Utility functions and classes for VideoArchiver""" +from typing import Dict, Optional, Any, Union, List + from .file_ops import ( cleanup_downloads, ensure_directory, @@ -12,16 +14,65 @@ from .directory_manager import DirectoryManager from .permission_manager import PermissionManager from .download_manager import DownloadManager from .compression_manager import CompressionManager -from .progress_tracker import ProgressTracker +from .progress_tracker import ( + ProgressTracker, + ProgressStatus, + DownloadProgress, + CompressionProgress, + CompressionParams +) from .path_manager import PathManager from .exceptions import ( + # Base exception + VideoArchiverError, + ErrorSeverity, + ErrorContext, + + # File operations FileOperationError, DirectoryError, PermissionError, - DownloadError, - CompressionError, - TrackingError, - PathError + FileCleanupError, + + # Video operations + VideoDownloadError, + VideoProcessingError, + VideoVerificationError, + VideoUploadError, + VideoCleanupError, + + # Resource management + ResourceError, + ResourceExhaustedError, + + # Network and API + NetworkError, + DiscordAPIError, + + # Component operations + ComponentError, + ConfigurationError, + DatabaseError, + FFmpegError, + + # Queue operations + QueueError, + QueueHandlerError, + QueueProcessorError, + + # Processing operations + ProcessingError, + ProcessorError, + ValidationError, + DisplayError, + URLExtractionError, + MessageHandlerError, + + # Cleanup operations + CleanupError, + + # Health monitoring + HealthCheckError ) __all__ = [ @@ -41,16 +92,75 @@ __all__ = [ 'ProgressTracker', 'PathManager', - # Exceptions + # Progress Tracking Types + 'ProgressStatus', + 'DownloadProgress', + 'CompressionProgress', + 'CompressionParams', + + # Base Exceptions + 'VideoArchiverError', + 'ErrorSeverity', + 'ErrorContext', + + # File Operation Exceptions 'FileOperationError', 'DirectoryError', 'PermissionError', - 'DownloadError', - 'CompressionError', - 'TrackingError', - 'PathError' + 'FileCleanupError', + + # Video Operation Exceptions + 'VideoDownloadError', + 'VideoProcessingError', + 'VideoVerificationError', + 'VideoUploadError', + 'VideoCleanupError', + + # Resource Exceptions + 'ResourceError', + 'ResourceExhaustedError', + + # Network and API Exceptions + 'NetworkError', + 'DiscordAPIError', + + # Component Exceptions + 'ComponentError', + 'ConfigurationError', + 'DatabaseError', + 'FFmpegError', + + # Queue Exceptions + 'QueueError', + 'QueueHandlerError', + 'QueueProcessorError', + + # Processing Exceptions + 'ProcessingError', + 'ProcessorError', + 'ValidationError', + 'DisplayError', + 'URLExtractionError', + 'MessageHandlerError', + + # Cleanup Exceptions + 'CleanupError', + + # Health Monitoring Exceptions + 'HealthCheckError', + + # Helper Functions + 'get_download_progress', + 'get_compression_progress', + 'get_active_downloads', + 'get_active_compressions' ] +# Version information +__version__ = "1.0.0" +__author__ = "VideoArchiver Team" +__description__ = "Utility functions and classes for VideoArchiver" + # Initialize shared instances for module-level access directory_manager = DirectoryManager() permission_manager = PermissionManager() @@ -58,3 +168,93 @@ download_manager = DownloadManager() compression_manager = CompressionManager() progress_tracker = ProgressTracker() path_manager = PathManager() + +# Progress tracking helper functions +def get_download_progress(url: Optional[str] = None) -> Union[Dict[str, DownloadProgress], Optional[DownloadProgress]]: + """ + Get progress information for a download. + + Args: + url: Optional URL to get progress for. If None, returns all progress. + + Returns: + If url is provided, returns progress for that URL or None if not found. + If url is None, returns dictionary of all download progress. + + Raises: + TrackingError: If there's an error getting progress information + """ + try: + return progress_tracker.get_download_progress(url) + except Exception as e: + raise TrackingError(f"Failed to get download progress: {str(e)}") + +def get_compression_progress(input_file: Optional[str] = None) -> Union[Dict[str, CompressionProgress], Optional[CompressionProgress]]: + """ + Get progress information for a compression operation. + + Args: + input_file: Optional file to get progress for. If None, returns all progress. + + Returns: + If input_file is provided, returns progress for that file or None if not found. + If input_file is None, returns dictionary of all compression progress. + + Raises: + TrackingError: If there's an error getting progress information + """ + try: + return progress_tracker.get_compression_progress(input_file) + except Exception as e: + raise TrackingError(f"Failed to get compression progress: {str(e)}") + +def get_active_downloads() -> Dict[str, DownloadProgress]: + """ + Get all active downloads. + + Returns: + Dictionary mapping URLs to their download progress information + + Raises: + TrackingError: If there's an error getting active downloads + """ + try: + return progress_tracker.get_active_downloads() + except Exception as e: + raise TrackingError(f"Failed to get active downloads: {str(e)}") + +def get_active_compressions() -> Dict[str, CompressionProgress]: + """ + Get all active compression operations. + + Returns: + Dictionary mapping file paths to their compression progress information + + Raises: + TrackingError: If there's an error getting active compressions + """ + try: + return progress_tracker.get_active_compressions() + except Exception as e: + raise TrackingError(f"Failed to get active compressions: {str(e)}") + +# Error handling helper functions +def create_error_context( + component: str, + operation: str, + details: Optional[Dict[str, Any]] = None, + severity: ErrorSeverity = ErrorSeverity.MEDIUM +) -> ErrorContext: + """ + Create an error context object. + + Args: + component: Component where error occurred + operation: Operation that failed + details: Optional error details + severity: Error severity level + + Returns: + ErrorContext object + """ + return ErrorContext(component, operation, details, severity) diff --git a/videoarchiver/utils/compression_handler.py b/videoarchiver/utils/compression_handler.py new file mode 100644 index 0000000..d82debf --- /dev/null +++ b/videoarchiver/utils/compression_handler.py @@ -0,0 +1,210 @@ +"""Video compression handling utilities""" + +import os +import asyncio +import logging +import subprocess +from datetime import datetime +from typing import Dict, Optional, Callable, Set, Tuple + +from videoarchiver.ffmpeg.ffmpeg_manager import FFmpegManager +from videoarchiver.ffmpeg.exceptions import CompressionError +from videoarchiver.utils.exceptions import VideoVerificationError +from videoarchiver.utils.file_operations import FileOperations +from videoarchiver.utils.progress_handler import ProgressHandler + +logger = logging.getLogger("VideoArchiver") + +class CompressionHandler: + """Handles video compression operations""" + + def __init__(self, ffmpeg_mgr: FFmpegManager, progress_handler: ProgressHandler, + file_ops: FileOperations): + self.ffmpeg_mgr = ffmpeg_mgr + self.progress_handler = progress_handler + self.file_ops = file_ops + self._active_processes: Set[subprocess.Popen] = set() + self._processes_lock = asyncio.Lock() + self._shutting_down = False + self.max_file_size = 0 # Will be set during compression + + async def cleanup(self) -> None: + """Clean up compression resources""" + self._shutting_down = True + try: + async with self._processes_lock: + for process in self._active_processes: + try: + process.terminate() + await asyncio.sleep(0.1) + if process.poll() is None: + process.kill() + except Exception as e: + logger.error(f"Error killing compression process: {e}") + self._active_processes.clear() + finally: + self._shutting_down = False + + async def compress_video( + self, + input_file: str, + output_file: str, + max_size_mb: int, + progress_callback: Optional[Callable[[float], None]] = None + ) -> Tuple[bool, str]: + """Compress video to target size""" + if self._shutting_down: + return False, "Compression handler is shutting down" + + self.max_file_size = max_size_mb + + try: + # Get optimal compression parameters + compression_params = self.ffmpeg_mgr.get_compression_params( + input_file, max_size_mb + ) + + # Try hardware acceleration first + success = await self._try_compression( + input_file, + output_file, + compression_params, + progress_callback, + use_hardware=True + ) + + # Fall back to CPU if hardware acceleration fails + if not success: + logger.warning("Hardware acceleration failed, falling back to CPU encoding") + success = await self._try_compression( + input_file, + output_file, + compression_params, + progress_callback, + use_hardware=False + ) + + if not success: + return False, "Failed to compress with both hardware and CPU encoding" + + # Verify compressed file + if not self.file_ops.verify_video_file(output_file, str(self.ffmpeg_mgr.get_ffprobe_path())): + return False, "Compressed file verification failed" + + # Check final size + within_limit, final_size = self.file_ops.check_file_size(output_file, max_size_mb) + if not within_limit: + return False, f"Failed to compress to target size: {final_size} bytes" + + return True, "" + + except Exception as e: + return False, str(e) + + async def _try_compression( + self, + input_file: str, + output_file: str, + params: Dict[str, str], + progress_callback: Optional[Callable[[float], None]] = None, + use_hardware: bool = True, + ) -> bool: + """Attempt video compression with given parameters""" + if self._shutting_down: + return False + + try: + # Build FFmpeg command + ffmpeg_path = str(self.ffmpeg_mgr.get_ffmpeg_path()) + cmd = [ffmpeg_path, "-y", "-i", input_file] + + # Add progress monitoring + cmd.extend(["-progress", "pipe:1"]) + + # Modify parameters based on hardware acceleration preference + if use_hardware: + gpu_info = self.ffmpeg_mgr.gpu_info + if gpu_info["nvidia"] and params.get("c:v") == "libx264": + params["c:v"] = "h264_nvenc" + elif gpu_info["amd"] and params.get("c:v") == "libx264": + params["c:v"] = "h264_amf" + elif gpu_info["intel"] and params.get("c:v") == "libx264": + params["c:v"] = "h264_qsv" + else: + params["c:v"] = "libx264" + + # Add all parameters to command + for key, value in params.items(): + cmd.extend([f"-{key}", str(value)]) + + # Add output file + cmd.append(output_file) + + # Get video duration for progress calculation + duration = self.file_ops.get_video_duration(input_file, str(self.ffmpeg_mgr.get_ffprobe_path())) + + # Initialize compression progress + self.progress_handler.update(input_file, { + "active": True, + "filename": os.path.basename(input_file), + "start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), + "percent": 0, + "elapsed_time": "0:00", + "input_size": os.path.getsize(input_file), + "current_size": 0, + "target_size": self.max_file_size * 1024 * 1024, + "codec": params.get("c:v", "unknown"), + "hardware_accel": use_hardware, + "preset": params.get("preset", "unknown"), + "crf": params.get("crf", "unknown"), + "duration": duration, + "bitrate": params.get("b:v", "unknown"), + "audio_codec": params.get("c:a", "unknown"), + "audio_bitrate": params.get("b:a", "unknown"), + }) + + # Run compression with progress monitoring + try: + process = await asyncio.create_subprocess_exec( + *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + + # Track the process + async with self._processes_lock: + self._active_processes.add(process) + + start_time = datetime.utcnow() + + while True: + if self._shutting_down: + process.terminate() + return False + + line = await process.stdout.readline() + if not line: + break + + try: + line = line.decode().strip() + if line.startswith("out_time_ms="): + current_time = int(line.split("=")[1]) / 1000000 + self.progress_handler.handle_compression_progress( + input_file, current_time, duration, + output_file, start_time, progress_callback + ) + except Exception as e: + logger.error(f"Error parsing FFmpeg progress: {e}") + + await process.wait() + return os.path.exists(output_file) + + except Exception as e: + logger.error(f"Error during compression process: {e}") + return False + finally: + # Remove process from tracking + async with self._processes_lock: + self._active_processes.discard(process) + + except Exception as e: + logger.error(f"Compression attempt failed: {str \ No newline at end of file diff --git a/videoarchiver/utils/download_core.py b/videoarchiver/utils/download_core.py new file mode 100644 index 0000000..b001b57 --- /dev/null +++ b/videoarchiver/utils/download_core.py @@ -0,0 +1,271 @@ +"""Core download functionality for video archiver""" + +import os +import asyncio +import logging +import yt_dlp +from typing import Dict, Optional, Callable, Tuple +from pathlib import Path + +from videoarchiver.utils.url_validator import check_url_support +from videoarchiver.utils.progress_handler import ProgressHandler, CancellableYTDLLogger +from videoarchiver.utils.file_operations import FileOperations +from videoarchiver.utils.compression_handler import CompressionHandler +from videoarchiver.utils.process_manager import ProcessManager +from videoarchiver.ffmpeg.ffmpeg_manager import FFmpegManager + +logger = logging.getLogger("VideoArchiver") + +class DownloadCore: + """Core download functionality for video archiver""" + + def __init__( + self, + download_path: str, + video_format: str, + max_quality: int, + max_file_size: int, + enabled_sites: Optional[list[str]] = None, + concurrent_downloads: int = 2, + ffmpeg_mgr: Optional[FFmpegManager] = None, + ): + self.download_path = Path(download_path) + self.download_path.mkdir(parents=True, exist_ok=True) + os.chmod(str(self.download_path), 0o755) + + self.video_format = video_format + self.max_quality = max_quality + self.max_file_size = max_file_size + self.enabled_sites = enabled_sites + self.ffmpeg_mgr = ffmpeg_mgr or FFmpegManager() + + # Initialize components + self.process_manager = ProcessManager(concurrent_downloads) + self.progress_handler = ProgressHandler() + self.file_ops = FileOperations() + self.compression_handler = CompressionHandler( + self.ffmpeg_mgr, self.progress_handler, self.file_ops + ) + + # Create cancellable logger + self.ytdl_logger = CancellableYTDLLogger() + + # Configure yt-dlp options + self.ydl_opts = self._configure_ydl_options() + + def _configure_ydl_options(self) -> Dict: + """Configure yt-dlp options""" + return { + "format": f"bv*[height<={self.max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={self.max_quality}]/best", + "outtmpl": "%(title)s.%(ext)s", + "merge_output_format": self.video_format, + "quiet": True, + "no_warnings": True, + "extract_flat": True, + "concurrent_fragment_downloads": 1, + "retries": 5, + "fragment_retries": 5, + "file_access_retries": 3, + "extractor_retries": 5, + "postprocessor_hooks": [self._check_file_size], + "progress_hooks": [self._handle_progress], + "ffmpeg_location": str(self.ffmpeg_mgr.get_ffmpeg_path()), + "ffprobe_location": str(self.ffmpeg_mgr.get_ffprobe_path()), + "paths": {"home": str(self.download_path)}, + "logger": self.ytdl_logger, + "ignoreerrors": True, + "no_color": True, + "geo_bypass": True, + "socket_timeout": 60, + "http_chunk_size": 1048576, + "external_downloader_args": {"ffmpeg": ["-timeout", "60000000"]}, + "max_sleep_interval": 5, + "sleep_interval": 1, + "max_filesize": self.max_file_size * 1024 * 1024, + } + + def _check_file_size(self, info: Dict) -> None: + """Check if file size is within limits""" + if info.get("filepath") and os.path.exists(info["filepath"]): + try: + size = os.path.getsize(info["filepath"]) + if size > (self.max_file_size * 1024 * 1024): + logger.info( + f"File exceeds size limit, will compress: {info['filepath']}" + ) + except OSError as e: + logger.error(f"Error checking file size: {str(e)}") + + def _handle_progress(self, d: Dict) -> None: + """Handle download progress updates""" + url = d.get("info_dict", {}).get("webpage_url", "unknown") + self.progress_handler.handle_download_progress(d, url) + + def is_supported_url(self, url: str) -> bool: + """Check if URL is supported""" + return check_url_support(url, self.ydl_opts, self.enabled_sites) + + async def download_video( + self, url: str, progress_callback: Optional[Callable[[float], None]] = None + ) -> Tuple[bool, str, str]: + """Download and process a video""" + if self.process_manager.is_shutting_down: + return False, "", "Download manager is shutting down" + + # Initialize progress tracking + self.progress_handler.initialize_progress(url) + original_file = None + compressed_file = None + + try: + # Download the video + success, file_path, error = await self._safe_download( + url, str(self.download_path), progress_callback + ) + if not success: + return False, "", error + + original_file = file_path + await self.process_manager.track_download(url, original_file) + + # Check file size and compress if needed + within_limit, file_size = self.file_ops.check_file_size(original_file, self.max_file_size) + if not within_limit: + logger.info(f"Compressing video: {original_file}") + try: + compressed_file = os.path.join( + self.download_path, + f"compressed_{os.path.basename(original_file)}", + ) + + # Attempt compression + success, error = await self.compression_handler.compress_video( + original_file, + compressed_file, + self.max_file_size, + progress_callback + ) + + if not success: + await self._cleanup_files(original_file, compressed_file) + return False, "", error + + # Verify compressed file + if not self.file_ops.verify_video_file( + compressed_file, + str(self.ffmpeg_mgr.get_ffprobe_path()) + ): + await self._cleanup_files(original_file, compressed_file) + return False, "", "Compressed file verification failed" + + # Delete original and return compressed + await self.file_ops.safe_delete_file(original_file) + return True, compressed_file, "" + + except Exception as e: + error_msg = f"Compression failed: {str(e)}" + await self._cleanup_files(original_file, compressed_file) + return False, "", error_msg + else: + # Move file to final location if no compression needed + final_path = os.path.join( + self.download_path, + os.path.basename(original_file) + ) + success = await self.file_ops.safe_move_file(original_file, final_path) + if not success: + await self._cleanup_files(original_file) + return False, "", "Failed to move file to final location" + return True, final_path, "" + + except Exception as e: + logger.error(f"Download error: {str(e)}") + await self._cleanup_files(original_file, compressed_file) + return False, "", str(e) + + finally: + # Clean up tracking + await self.process_manager.untrack_download(url) + self.progress_handler.complete(url) + + async def _safe_download( + self, + url: str, + output_dir: str, + progress_callback: Optional[Callable[[float], None]] = None, + ) -> Tuple[bool, str, str]: + """Safely download video with retries""" + if self.process_manager.is_shutting_down: + return False, "", "Download manager is shutting down" + + last_error = None + for attempt in range(5): # Max retries + try: + ydl_opts = self.ydl_opts.copy() + ydl_opts["outtmpl"] = os.path.join(output_dir, ydl_opts["outtmpl"]) + + # Add progress callback + if progress_callback: + original_progress_hook = ydl_opts["progress_hooks"][0] + + def combined_progress_hook(d): + original_progress_hook(d) + if d["status"] == "downloading": + try: + percent = float( + d.get("_percent_str", "0").replace("%", "") + ) + progress_callback(percent) + except Exception as e: + logger.error(f"Error in progress callback: {e}") + + ydl_opts["progress_hooks"] = [combined_progress_hook] + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = await asyncio.get_event_loop().run_in_executor( + self.process_manager.download_pool, + lambda: ydl.extract_info(url, download=True) + ) + + if info is None: + raise Exception("Failed to extract video information") + + file_path = os.path.join(output_dir, ydl.prepare_filename(info)) + if not os.path.exists(file_path): + raise FileNotFoundError("Download completed but file not found") + + if not self.file_ops.verify_video_file( + file_path, + str(self.ffmpeg_mgr.get_ffprobe_path()) + ): + raise Exception("Downloaded file is not a valid video") + + return True, file_path, "" + + except Exception as e: + last_error = str(e) + logger.error(f"Download attempt {attempt + 1} failed: {str(e)}") + if attempt < 4: # Less than max retries + delay = 10 * (2**attempt) + (attempt * 2) # Exponential backoff + await asyncio.sleep(delay) + else: + return False, "", f"All download attempts failed: {last_error}" + + async def _cleanup_files(self, *files: str) -> None: + """Clean up multiple files""" + for file in files: + if file and os.path.exists(file): + await self.file_ops.safe_delete_file(file) + + async def cleanup(self) -> None: + """Clean up resources""" + await self.process_manager.cleanup() + await self.compression_handler.cleanup() + + async def force_cleanup(self) -> None: + """Force cleanup of all resources""" + self.ytdl_logger.cancelled = True + await self.process_m + self.ytdl_logger.cancelled = True + await self.process_manager.force_cleanup() + await self.compress \ No newline at end of file diff --git a/videoarchiver/utils/exceptions.py b/videoarchiver/utils/exceptions.py index 0e2e24a..a370b43 100644 --- a/videoarchiver/utils/exceptions.py +++ b/videoarchiver/utils/exceptions.py @@ -1,8 +1,44 @@ """Custom exceptions for VideoArchiver""" +from typing import Optional, Dict, Any +from enum import Enum, auto + +class ErrorSeverity(Enum): + """Severity levels for errors""" + LOW = auto() + MEDIUM = auto() + HIGH = auto() + CRITICAL = auto() + +class ErrorContext: + """Context information for errors""" + def __init__( + self, + component: str, + operation: str, + details: Optional[Dict[str, Any]] = None, + severity: ErrorSeverity = ErrorSeverity.MEDIUM + ) -> None: + self.component = component + self.operation = operation + self.details = details or {} + self.severity = severity + + def __str__(self) -> str: + return ( + f"[{self.severity.name}] {self.component}.{self.operation}: " + f"{', '.join(f'{k}={v}' for k, v in self.details.items())}" + ) + class VideoArchiverError(Exception): """Base exception for VideoArchiver errors""" - pass + def __init__( + self, + message: str, + context: Optional[ErrorContext] = None + ) -> None: + self.context = context + super().__init__(f"{context}: {message}" if context else message) class VideoDownloadError(VideoArchiverError): """Error downloading video""" @@ -38,7 +74,17 @@ class PermissionError(VideoArchiverError): class NetworkError(VideoArchiverError): """Error with network operations""" - pass + def __init__( + self, + message: str, + url: Optional[str] = None, + status_code: Optional[int] = None, + context: Optional[ErrorContext] = None + ) -> None: + self.url = url + self.status_code = status_code + details = f" (URL: {url}" + (f", Status: {status_code})" if status_code else ")") + super().__init__(message + details, context) class ResourceError(VideoArchiverError): """Error with system resources""" @@ -54,15 +100,27 @@ class ComponentError(VideoArchiverError): class DiscordAPIError(VideoArchiverError): """Error with Discord API operations""" - def __init__(self, message: str, status_code: int = None): + def __init__( + self, + message: str, + status_code: Optional[int] = None, + context: Optional[ErrorContext] = None + ) -> None: self.status_code = status_code - super().__init__(f"Discord API Error: {message} (Status: {status_code if status_code else 'Unknown'})") + details = f" (Status: {status_code})" if status_code else "" + super().__init__(f"Discord API Error: {message}{details}", context) class ResourceExhaustedError(VideoArchiverError): """Error when system resources are exhausted""" - def __init__(self, message: str, resource_type: str = None): + def __init__( + self, + message: str, + resource_type: Optional[str] = None, + context: Optional[ErrorContext] = None + ) -> None: self.resource_type = resource_type - super().__init__(f"Resource exhausted: {message} (Type: {resource_type if resource_type else 'Unknown'})") + details = f" (Type: {resource_type})" if resource_type else "" + super().__init__(f"Resource exhausted: {message}{details}", context) class ProcessingError(VideoArchiverError): """Error during video processing""" @@ -74,4 +132,126 @@ class CleanupError(VideoArchiverError): class FileOperationError(VideoArchiverError): """Error during file operations""" + def __init__( + self, + message: str, + path: Optional[str] = None, + operation: Optional[str] = None, + context: Optional[ErrorContext] = None + ) -> None: + self.path = path + self.operation = operation + details = [] + if path: + details.append(f"Path: {path}") + if operation: + details.append(f"Operation: {operation}") + details_str = f" ({', '.join(details)})" if details else "" + super().__init__(f"File operation error: {message}{details_str}", context) + +# New exceptions for processor components +class ProcessorError(VideoArchiverError): + """Error in video processor operations""" pass + +class ValidationError(VideoArchiverError): + """Error in message or content validation""" + pass + +class DisplayError(VideoArchiverError): + """Error in status display operations""" + pass + +class URLExtractionError(VideoArchiverError): + """Error extracting URLs from content""" + def __init__( + self, + message: str, + url: Optional[str] = None, + context: Optional[ErrorContext] = None + ) -> None: + self.url = url + details = f" (URL: {url})" if url else "" + super().__init__(f"URL extraction error: {message}{details}", context) + +class MessageHandlerError(VideoArchiverError): + """Error in message handling operations""" + def __init__( + self, + message: str, + message_id: Optional[int] = None, + context: Optional[ErrorContext] = None + ) -> None: + self.message_id = message_id + details = f" (Message ID: {message_id})" if message_id else "" + super().__init__(f"Message handler error: {message}{details}", context) + +class QueueHandlerError(VideoArchiverError): + """Error in queue handling operations""" + pass + +class QueueProcessorError(VideoArchiverError): + """Error in queue processing operations""" + pass + +class FFmpegError(VideoArchiverError): + """Error in FFmpeg operations""" + def __init__( + self, + message: str, + command: Optional[str] = None, + exit_code: Optional[int] = None, + context: Optional[ErrorContext] = None + ) -> None: + self.command = command + self.exit_code = exit_code + details = [] + if command: + details.append(f"Command: {command}") + if exit_code is not None: + details.append(f"Exit Code: {exit_code}") + details_str = f" ({', '.join(details)})" if details else "" + super().__init__(f"FFmpeg error: {message}{details_str}", context) + +class DatabaseError(VideoArchiverError): + """Error in database operations""" + def __init__( + self, + message: str, + query: Optional[str] = None, + context: Optional[ErrorContext] = None + ) -> None: + self.query = query + details = f" (Query: {query})" if query else "" + super().__init__(f"Database error: {message}{details}", context) + +class HealthCheckError(VideoArchiverError): + """Error in health check operations""" + def __init__( + self, + message: str, + component: Optional[str] = None, + context: Optional[ErrorContext] = None + ) -> None: + self.component = component + details = f" (Component: {component})" if component else "" + super().__init__(f"Health check error: {message}{details}", context) + +class TrackingError(VideoArchiverError): + """Error in progress tracking operations""" + def __init__( + self, + message: str, + operation: Optional[str] = None, + item_id: Optional[str] = None, + context: Optional[ErrorContext] = None + ) -> None: + self.operation = operation + self.item_id = item_id + details = [] + if operation: + details.append(f"Operation: {operation}") + if item_id: + details.append(f"Item ID: {item_id}") + details_str = f" ({', '.join(details)})" if details else "" + super().__init__(f"Progress tracking error: {message}{details_str}", context) diff --git a/videoarchiver/utils/file_operations.py b/videoarchiver/utils/file_operations.py new file mode 100644 index 0000000..1aa8349 --- /dev/null +++ b/videoarchiver/utils/file_operations.py @@ -0,0 +1,138 @@ +"""Safe file operations with retry logic""" + +import os +import shutil +import asyncio +import logging +import json +import subprocess +from typing import Tuple +from pathlib import Path + +from videoarchiver.utils.exceptions import VideoVerificationError +from videoarchiver.utils.file_deletion import secure_delete_file + +logger = logging.getLogger("VideoArchiver") + +class FileOperations: + """Handles safe file operations with retries""" + + def __init__(self, max_retries: int = 3, retry_delay: int = 1): + self.max_retries = max_retries + self.retry_delay = retry_delay + + async def safe_delete_file(self, file_path: str) -> bool: + """Safely delete a file with retries""" + for attempt in range(self.max_retries): + try: + if os.path.exists(file_path): + await secure_delete_file(file_path) + return True + except Exception as e: + logger.error(f"Delete attempt {attempt + 1} failed: {str(e)}") + if attempt == self.max_retries - 1: + return False + await asyncio.sleep(self.retry_delay * (attempt + 1)) + return False + + async def safe_move_file(self, src: str, dst: str) -> bool: + """Safely move a file with retries""" + for attempt in range(self.max_retries): + try: + os.makedirs(os.path.dirname(dst), exist_ok=True) + shutil.move(src, dst) + return True + except Exception as e: + logger.error(f"Move attempt {attempt + 1} failed: {str(e)}") + if attempt == self.max_retries - 1: + return False + await asyncio.sleep(self.retry_delay * (attempt + 1)) + return False + + def verify_video_file(self, file_path: str, ffprobe_path: str) -> bool: + """Verify video file integrity""" + try: + cmd = [ + ffprobe_path, + "-v", + "quiet", + "-print_format", + "json", + "-show_format", + "-show_streams", + file_path, + ] + + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + timeout=30, + ) + + if result.returncode != 0: + raise VideoVerificationError(f"FFprobe failed: {result.stderr}") + + probe = json.loads(result.stdout) + + # Verify video stream + video_streams = [s for s in probe["streams"] if s["codec_type"] == "video"] + if not video_streams: + raise VideoVerificationError("No video streams found") + + # Verify duration + duration = float(probe["format"].get("duration", 0)) + if duration <= 0: + raise VideoVerificationError("Invalid video duration") + + # Verify file is readable + try: + with open(file_path, "rb") as f: + f.seek(0, 2) + if f.tell() == 0: + raise VideoVerificationError("Empty file") + except Exception as e: + raise VideoVerificationError(f"File read error: {str(e)}") + + return True + + except subprocess.TimeoutExpired: + logger.error(f"FFprobe timed out for {file_path}") + return False + except json.JSONDecodeError: + logger.error(f"Invalid FFprobe output for {file_path}") + return False + except Exception as e: + logger.error(f"Error verifying video file {file_path}: {e}") + return False + + def get_video_duration(self, file_path: str, ffprobe_path: str) -> float: + """Get video duration in seconds""" + try: + cmd = [ + ffprobe_path, + "-v", + "quiet", + "-print_format", + "json", + "-show_format", + file_path, + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise Exception(f"FFprobe failed: {result.stderr}") + + data = json.loads(result.stdout) + return float(data["format"]["duration"]) + except Exception as e: + logger.error(f"Error getting video duration: {e}") + return 0 + + def check_file_size(self, file_path: str, max_size_mb: int) -> Tuple[bool, int]: + """Check if file size is within limits""" + try: + if os.path.exists(file_path): + size = os.path.getsize(file_path) + max_size = max_size_mb * 1024 * 1024 + \ No newline at end of file diff --git a/videoarchiver/utils/process_manager.py b/videoarchiver/utils/process_manager.py new file mode 100644 index 0000000..6c7b8e6 --- /dev/null +++ b/videoarchiver/utils/process_manager.py @@ -0,0 +1,111 @@ +"""Process management and cleanup utilities""" + +import asyncio +import logging +import subprocess +from typing import Set, Dict, Any +from datetime import datetime +from concurrent.futures import ThreadPoolExecutor + +logger = logging.getLogger("VideoArchiver") + +class ProcessManager: + """Manages processes and resources for video operations""" + + def __init__(self, concurrent_downloads: int = 2): + self._active_processes: Set[subprocess.Popen] = set() + self._processes_lock = asyncio.Lock() + self._shutting_down = False + + # Create thread pool with proper naming + self.download_pool = ThreadPoolExecutor( + max_workers=max(1, min(3, concurrent_downloads)), + thread_name_prefix="videoarchiver_download" + ) + + # Track active downloads + self.active_downloads: Dict[str, Dict[str, Any]] = {} + self._downloads_lock = asyncio.Lock() + + async def cleanup(self) -> None: + """Clean up resources with proper shutdown""" + self._shutting_down = True + + try: + # Kill any active processes + async with self._processes_lock: + for process in self._active_processes: + try: + process.terminate() + await asyncio.sleep(0.1) # Give process time to terminate + if process.poll() is None: + process.kill() # Force kill if still running + except Exception as e: + logger.error(f"Error killing process: {e}") + self._active_processes.clear() + + # Clean up thread pool + self.download_pool.shutdown(wait=False, cancel_futures=True) + + # Clean up active downloads + async with self._downloads_lock: + self.active_downloads.clear() + + except Exception as e: + logger.error(f"Error during process manager cleanup: {e}") + finally: + self._shutting_down = False + + async def force_cleanup(self) -> None: + """Force cleanup of all resources""" + try: + # Kill all processes immediately + async with self._processes_lock: + for process in self._active_processes: + try: + process.kill() + except Exception as e: + logger.error(f"Error force killing process: {e}") + self._active_processes.clear() + + # Force shutdown thread pool + self.download_pool.shutdown(wait=False, cancel_futures=True) + + # Clear all tracking + async with self._downloads_lock: + self.active_downloads.clear() + + except Exception as e: + logger.error(f"Error during force cleanup: {e}") + + async def track_download(self, url: str, file_path: str) -> None: + """Track a new download""" + async with self._downloads_lock: + self.active_downloads[url] = { + "file_path": file_path, + "start_time": datetime.utcnow(), + } + + async def untrack_download(self, url: str) -> None: + """Remove download from tracking""" + async with self._downloads_lock: + self.active_downloads.pop(url, None) + + async def track_process(self, process: subprocess.Popen) -> None: + """Track a new process""" + async with self._processes_lock: + self._active_processes.add(process) + + async def untrack_process(self, process: subprocess.Popen) -> None: + """Remove process from tracking""" + async with self._processes_lock: + self._active_processes.discard(process) + + @property + def is_shutting_down(self) -> bool: + """Check if manager is shutting down""" + return self._shutting_down + + def get_active_downloads(self) -> Dict[str, Dict[str, Any]]: + """Get current active downloads""" + return self.acti \ No newline at end of file diff --git a/videoarchiver/utils/progress_handler.py b/videoarchiver/utils/progress_handler.py new file mode 100644 index 0000000..273e18c --- /dev/null +++ b/videoarchiver/utils/progress_handler.py @@ -0,0 +1,126 @@ +"""Progress tracking and logging utilities for video downloads""" + +import logging +from datetime import datetime +from typing import Dict, Any, Optional, Callable + +logger = logging.getLogger("VideoArchiver") + +class CancellableYTDLLogger: + """Custom yt-dlp logger that can handle cancellation""" + def __init__(self): + self.cancelled = False + + def debug(self, msg): + if self.cancelled: + raise yt_dlp.utils.DownloadError("Download cancelled") + logger.debug(msg) + + def warning(self, msg): + if self.cancelled: + raise yt_dlp.utils.DownloadError("Download cancelled") + logger.warning(msg) + + def error(self, msg): + if self.cancelled: + raise yt_dlp.utils.DownloadError("Download cancelled") + logger.error(msg) + +class ProgressHandler: + """Handles progress tracking and callbacks for video operations""" + def __init__(self): + self.progress_data: Dict[str, Dict[str, Any]] = {} + + def initialize_progress(self, url: str) -> None: + """Initialize progress tracking for a URL""" + self.progress_data[url] = { + "active": True, + "start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), + "percent": 0, + "speed": "N/A", + "eta": "N/A", + "downloaded_bytes": 0, + "total_bytes": 0, + "retries": 0, + "fragment_count": 0, + "fragment_index": 0, + "video_title": "Unknown", + "extractor": "Unknown", + "format": "Unknown", + "resolution": "Unknown", + "fps": "Unknown", + } + + def update(self, key: str, data: Dict[str, Any]) -> None: + """Update progress data for a key""" + if key in self.progress_data: + self.progress_data[key].update(data) + + def complete(self, key: str) -> None: + """Mark progress as complete for a key""" + if key in self.progress_data: + self.progress_data[key]["active"] = False + self.progress_data[key]["percent"] = 100 + + def get_progress(self, key: str) -> Optional[Dict[str, Any]]: + """Get progress data for a key""" + return self.progress_data.get(key) + + def handle_download_progress(self, d: Dict[str, Any], url: str, + progress_callback: Optional[Callable[[float], None]] = None) -> None: + """Handle download progress updates""" + try: + if d["status"] == "downloading": + progress_data = { + "active": True, + "percent": float(d.get("_percent_str", "0").replace("%", "")), + "speed": d.get("_speed_str", "N/A"), + "eta": d.get("_eta_str", "N/A"), + "downloaded_bytes": d.get("downloaded_bytes", 0), + "total_bytes": d.get("total_bytes", 0) or d.get("total_bytes_estimate", 0), + "retries": d.get("retry_count", 0), + "fragment_count": d.get("fragment_count", 0), + "fragment_index": d.get("fragment_index", 0), + "video_title": d.get("info_dict", {}).get("title", "Unknown"), + "extractor": d.get("info_dict", {}).get("extractor", "Unknown"), + "format": d.get("info_dict", {}).get("format", "Unknown"), + "resolution": d.get("info_dict", {}).get("resolution", "Unknown"), + "fps": d.get("info_dict", {}).get("fps", "Unknown"), + } + self.update(url, progress_data) + + if progress_callback: + progress_callback(progress_data["percent"]) + + logger.debug( + f"Download progress: {progress_data['percent']}% at {progress_data['speed']}, " + f"ETA: {progress_data['eta']}, Downloaded: {progress_data['downloaded_bytes']}/" + f"{progress_data['total_bytes']} bytes" + ) + elif d["status"] == "finished": + logger.info(f"Download completed: {d.get('filename', 'unknown')}") + + except Exception as e: + logger.error(f"Error in progress handler: {str(e)}") + + def handle_compression_progress(self, input_file: str, current_time: float, duration: float, + output_file: str, start_time: datetime, + progress_callback: Optional[Callable[[float], None]] = None) -> None: + """Handle compression progress updates""" + try: + if duration > 0: + progress = min(100, (current_time / duration) * 100) + elapsed = datetime.utcnow() - start_time + + self.update(input_file, { + "percent": progress, + "elapsed_time": str(elapsed).split(".")[0], + "current_size": os.path.getsize(output_file) if os.path.exists(output_file) else 0, + "current_time": current_time, + }) + + if progress_callback: + progress_callback(progress) + + except Exception as e: + logger.error(f"Error upda \ No newline at end of file diff --git a/videoarchiver/utils/progress_tracker.py b/videoarchiver/utils/progress_tracker.py index 2a889b0..d725cb1 100644 --- a/videoarchiver/utils/progress_tracker.py +++ b/videoarchiver/utils/progress_tracker.py @@ -1,109 +1,205 @@ -"""Module for tracking download and compression progress""" +"""Progress tracking module.""" import logging from typing import Dict, Any, Optional from datetime import datetime -logger = logging.getLogger("ProgressTracker") +logger = logging.getLogger(__name__) class ProgressTracker: - """Tracks progress of downloads and compression operations""" + """Progress tracker singleton.""" + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._initialized = False + return cls._instance def __init__(self): - self._download_progress: Dict[str, Dict[str, Any]] = {} - self._compression_progress: Dict[str, Dict[str, Any]] = {} + if not hasattr(self, '_initialized'): + self._data: Dict[str, Dict[str, Any]] = {} + self._initialized = True - def start_download(self, url: str) -> None: - """Initialize progress tracking for a download""" - self._download_progress[url] = { - "active": True, - "start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), - "percent": 0, - "speed": "N/A", - "eta": "N/A", - "downloaded_bytes": 0, - "total_bytes": 0, - "retries": 0, - "fragment_count": 0, - "fragment_index": 0, - "video_title": "Unknown", - "extractor": "Unknown", - "format": "Unknown", - "resolution": "Unknown", - "fps": "Unknown", - "last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), - } + def update(self, key: str, data: Dict[str, Any]) -> None: + """Update progress for a key.""" + if key not in self._data: + self._data[key] = { + 'active': True, + 'start_time': datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), + 'percent': 0 + } + self._data[key].update(data) + self._data[key]['last_update'] = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + logger.debug(f"Progress for {key}: {self._data[key].get('percent', 0)}%") - def update_download_progress(self, data: Dict[str, Any]) -> None: - """Update download progress information""" + def get(self, key: Optional[str] = None) -> Dict[str, Any]: + """Get progress for a key.""" + if key is None: + return self._data + return self._data.get(key, {}) + + def complete(self, key: str) -> None: + """Mark progress as complete.""" + if key in self._data: + self._data[key]['active'] = False + logger.info(f"Operation completed for {key}") + + def clear(self) -> None: + """Clear all progress data.""" + self._data.clear() + logger.info("Progress data cleared") + +_tracker = ProgressTracker() + + def get_compression(self, file_path: Optional[str] = None) -> Dict[str, Any]: + """Get compression progress.""" + if file_path is None: + return self._compressions + return self._compressions.get(file_path, {}) + + def complete_download(self, url: str) -> None: + """Mark download as complete.""" + if url in self._downloads: + self._downloads[url]['active'] = False + logger.info(f"Download completed for {url}") + + def complete_compression(self, file_path: str) -> None: + """Mark compression as complete.""" + if file_path in self._compressions: + self._compressions[file_path]['active'] = False + logger.info(f"Compression completed for {file_path}") + + def clear(self) -> None: + """Clear all progress data.""" + self._downloads.clear() + self._compressions.clear() + logger.info("Progress data cleared") + +# Global instance +_tracker = ProgressTrack + +# Global instance +_tracker = ProgressTracker() + +def get_tracker() -> Progre + """Clear all progress tracking""" + self._download_progress.clear() + self._compression_progress.clear() + logger.info("Cleared all progress tracking data") + +# Create singleton instance +progress_tracker = ProgressTracker() + +def get_progress_tracker() -> ProgressTracker: + + def mark_compression_complete(self, file_path: str) -> None: + """Mark a compression operation as complete""" + if file_path in self._compression_progress: + self._compression_progress[file_path]['active'] = False + logger.info(f"Compression completed for {file_path}") + + def clear_progress(self) -> None: + """Clear all progress tracking""" + self._download_progress.clear() + self._compression_progress.clear() + logger.info("Cleared all progress tracking data") + +# Create singleton instance +progress_tracker = ProgressTracker() + +# Export the singleton instance +def get_progress_tracker() -> ProgressTracker: + + + Args: + data: Dictionary containing download progress data + """ try: - # Get URL from info dict - url = data.get("info_dict", {}).get("webpage_url", "unknown") - if url not in self._download_progress: + info_dict = data.get("info_dict", {}) + url = info_dict.get("webpage_url") + if not url or url not in self._download_progress: return - if data["status"] == "downloading": + if data.get("status") == "downloading": + percent_str = data.get("_percent_str", "0").replace("%", "") + try: + percent = float(percent_str) + except ValueError: + percent = 0.0 + + total_bytes = ( + data.get("total_bytes", 0) or + data.get("total_bytes_estimate", 0) + ) + self._download_progress[url].update({ "active": True, - "percent": float(data.get("_percent_str", "0").replace("%", "")), + "percent": percent, "speed": data.get("_speed_str", "N/A"), "eta": data.get("_eta_str", "N/A"), "downloaded_bytes": data.get("downloaded_bytes", 0), - "total_bytes": data.get("total_bytes", 0) or data.get("total_bytes_estimate", 0), + "total_bytes": total_bytes, "retries": data.get("retry_count", 0), "fragment_count": data.get("fragment_count", 0), "fragment_index": data.get("fragment_index", 0), - "video_title": data.get("info_dict", {}).get("title", "Unknown"), - "extractor": data.get("info_dict", {}).get("extractor", "Unknown"), - "format": data.get("info_dict", {}).get("format", "Unknown"), - "resolution": data.get("info_dict", {}).get("resolution", "Unknown"), - "fps": data.get("info_dict", {}).get("fps", "Unknown"), - "last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), + "video_title": info_dict.get("title", "Unknown"), + "extractor": info_dict.get("extractor", "Unknown"), + "format": info_dict.get("format", "Unknown"), + "resolution": info_dict.get("resolution", "Unknown"), + "fps": info_dict.get("fps", "Unknown"), + "last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") }) logger.debug( f"Download progress for {url}: " - f"{self._download_progress[url]['percent']}% at {self._download_progress[url]['speed']}, " + f"{percent:.1f}% at {self._download_progress[url]['speed']}, " f"ETA: {self._download_progress[url]['eta']}" ) except Exception as e: - logger.error(f"Error updating download progress: {e}") + logger.error(f"Error updating download progress: {e}", exc_info=True) - def end_download(self, url: str) -> None: - """Mark a download as completed""" + def end_download(self, url: str, status: ProgressStatus = ProgressStatus.COMPLETED) -> None: + """ + Mark a download as completed. + + Args: + url: The URL being downloaded + status: The final status of the download + """ if url in self._download_progress: self._download_progress[url]["active"] = False + logger.info(f"Download {status.value} for {url}") - def start_compression( - self, - input_file: str, - params: Dict[str, str], - use_hardware: bool, - duration: float, - input_size: int, - target_size: int - ) -> None: - """Initialize progress tracking for compression""" - self._compression_progress[input_file] = { - "active": True, - "filename": input_file, - "start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), - "percent": 0, - "elapsed_time": "0:00", - "input_size": input_size, - "current_size": 0, - "target_size": target_size, - "codec": params.get("c:v", "unknown"), - "hardware_accel": use_hardware, - "preset": params.get("preset", "unknown"), - "crf": params.get("crf", "unknown"), - "duration": duration, - "bitrate": params.get("b:v", "unknown"), - "audio_codec": params.get("c:a", "unknown"), - "audio_bitrate": params.get("b:a", "unknown"), - "last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), - } + def start_compression(self, params: CompressionParams) -> None: + """ + Initialize progress tracking for compression. + + Args: + params: Compression parameters + """ + current_time = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + self._compression_progress[params.input_file] = CompressionProgress( + active=True, + filename=params.input_file, + start_time=current_time, + percent=0.0, + elapsed_time="0:00", + input_size=params.input_size, + current_size=0, + target_size=params.target_size, + codec=params.codec_params.get("c:v", "unknown"), + hardware_accel=params.use_hardware, + preset=params.codec_params.get("preset", "unknown"), + crf=params.codec_params.get("crf", "unknown"), + duration=params.duration, + bitrate=params.codec_params.get("b:v", "unknown"), + audio_codec=params.codec_params.get("c:a", "unknown"), + audio_bitrate=params.codec_params.get("b:a", "unknown"), + last_update=current_time, + current_time=None + ) def update_compression_progress( self, @@ -113,14 +209,23 @@ class ProgressTracker: current_size: int, current_time: float ) -> None: - """Update compression progress information""" + """ + Update compression progress information. + + Args: + input_file: The input file being compressed + progress: Current progress percentage (0-100) + elapsed_time: Time elapsed as string + current_size: Current file size in bytes + current_time: Current timestamp in seconds + """ if input_file in self._compression_progress: self._compression_progress[input_file].update({ - "percent": progress, + "percent": max(0.0, min(100.0, progress)), "elapsed_time": elapsed_time, "current_size": current_size, "current_time": current_time, - "last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), + "last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") }) logger.debug( @@ -128,29 +233,73 @@ class ProgressTracker: f"{progress:.1f}%, Size: {current_size}/{self._compression_progress[input_file]['target_size']} bytes" ) - def end_compression(self, input_file: str) -> None: - """Mark a compression operation as completed""" + def end_compression( + self, + input_file: str, + status: ProgressStatus = ProgressStatus.COMPLETED + ) -> None: + """ + Mark a compression operation as completed. + + Args: + input_file: The input file being compressed + status: The final status of the compression + """ if input_file in self._compression_progress: self._compression_progress[input_file]["active"] = False + logger.info(f"Compression {status.value} for {input_file}") - def get_download_progress(self, url: str) -> Optional[Dict[str, Any]]: - """Get progress information for a download""" + def get_download_progress(self, url: Optional[str] = None) -> Optional[DownloadProgress]: + """ + Get progress information for a download. + + Args: + url: Optional URL to get progress for. If None, returns all progress. + + Returns: + Progress information for the specified download or None if not found + """ + if url is None: + return self._download_progress return self._download_progress.get(url) - def get_compression_progress(self, input_file: str) -> Optional[Dict[str, Any]]: - """Get progress information for a compression operation""" + def get_compression_progress( + self, + input_file: Optional[str] = None + ) -> Optional[CompressionProgress]: + """ + Get progress information for a compression operation. + + Args: + input_file: Optional file to get progress for. If None, returns all progress. + + Returns: + Progress information for the specified compression or None if not found + """ + if input_file is None: + return self._compression_progress return self._compression_progress.get(input_file) - def get_active_downloads(self) -> Dict[str, Dict[str, Any]]: - """Get all active downloads""" + def get_active_downloads(self) -> Dict[str, DownloadProgress]: + """ + Get all active downloads. + + Returns: + Dictionary of active downloads and their progress + """ return { url: progress for url, progress in self._download_progress.items() if progress.get("active", False) } - def get_active_compressions(self) -> Dict[str, Dict[str, Any]]: - """Get all active compression operations""" + def get_active_compressions(self) -> Dict[str, CompressionProgress]: + """ + Get all active compression operations. + + Returns: + Dictionary of active compressions and their progress + """ return { input_file: progress for input_file, progress in self._compression_progress.items() @@ -161,3 +310,4 @@ class ProgressTracker: """Clear all progress tracking""" self._download_progress.clear() self._compression_progress.clear() + logger.info("Cleared \ No newline at end of file diff --git a/videoarchiver/utils/url_validator.py b/videoarchiver/utils/url_validator.py new file mode 100644 index 0000000..64fb231 --- /dev/null +++ b/videoarchiver/utils/url_validator.py @@ -0,0 +1,76 @@ +"""URL validation utilities for video downloads""" + +import re +import logging +import yt_dlp +from typing import List, Optional + +logger = logging.getLogger("VideoArchiver") + +def is_video_url_pattern(url: str) -> bool: + """Check if URL matches common video platform patterns""" + video_patterns = [ + r"youtube\.com/watch\?v=", + r"youtu\.be/", + r"vimeo\.com/", + r"tiktok\.com/", + r"twitter\.com/.*/video/", + r"x\.com/.*/video/", + r"bsky\.app/", + r"facebook\.com/.*/videos/", + r"instagram\.com/.*/(tv|reel|p)/", + r"twitch\.tv/.*/clip/", + r"streamable\.com/", + r"v\.redd\.it/", + r"clips\.twitch\.tv/", + r"dailymotion\.com/video/", + r"\.mp4$", + r"\.webm$", + r"\.mov$", + ] + return any(re.search(pattern, url, re.IGNORECASE) for pattern in video_patterns) + +def check_url_support(url: str, ydl_opts: dict, enabled_sites: Optional[List[str]] = None) -> bool: + """Check if URL is supported by attempting a simulated download""" + if not is_video_url_pattern(url): + return False + + try: + simulate_opts = { + **ydl_opts, + "simulate": True, + "quiet": True, + "no_warnings": True, + "extract_flat": True, + "skip_download": True, + "format": "best", + } + + with yt_dlp.YoutubeDL(simulate_opts) as ydl: + try: + info = ydl.extract_info(url, download=False) + if info is None: + return False + + if enabled_sites: + extractor = info.get("extractor", "").lower() + if not any( + site.lower() in extractor for site in enabled_sites + ): + logger.info(f"Site {extractor} not in enabled sites list") + return False + + logger.info( + f"URL supported: {url} (Extractor: {info.get('extractor', 'unknown')})" + ) + return True + + except yt_dlp.utils.UnsupportedError: + return False + except Exception as e: + if "Unsupported URL" not in str(e): + logger.error(f"Error checking URL {url}: {str(e)}") + return False + + except Exception as e: + logger.error \ No newline at end of file diff --git a/videoarchiver/utils/video_downloader.py b/videoarchiver/utils/video_downloader.py deleted file mode 100644 index 3440122..0000000 --- a/videoarchiver/utils/video_downloader.py +++ /dev/null @@ -1,809 +0,0 @@ -"""Video download and processing utilities""" - -import os -import re -import logging -import asyncio -import ffmpeg -import yt_dlp -import shutil -import subprocess -import json -import signal -from concurrent.futures import ThreadPoolExecutor -from typing import Dict, List, Optional, Tuple, Callable, Set -from pathlib import Path -from datetime import datetime - -from videoarchiver.ffmpeg.ffmpeg_manager import FFmpegManager -from videoarchiver.ffmpeg.exceptions import ( - FFmpegError, - CompressionError, - VerificationError, - FFprobeError, - TimeoutError, - handle_ffmpeg_error, -) -from videoarchiver.utils.exceptions import VideoVerificationError -from videoarchiver.utils.file_ops import secure_delete_file -from videoarchiver.utils.path_manager import temp_path_context - -logger = logging.getLogger("VideoArchiver") - - -# Add a custom yt-dlp logger to handle cancellation -class CancellableYTDLLogger: - def __init__(self): - self.cancelled = False - - def debug(self, msg): - if self.cancelled: - raise Exception("Download cancelled") - logger.debug(msg) - - def warning(self, msg): - if self.cancelled: - raise Exception("Download cancelled") - logger.warning(msg) - - def error(self, msg): - if self.cancelled: - raise Exception("Download cancelled") - logger.error(msg) - - -def is_video_url_pattern(url: str) -> bool: - """Check if URL matches common video platform patterns""" - video_patterns = [ - r"youtube\.com/watch\?v=", - r"youtu\.be/", - r"vimeo\.com/", - r"tiktok\.com/", - r"twitter\.com/.*/video/", - r"x\.com/.*/video/", - r"bsky\.app/", - r"facebook\.com/.*/videos/", - r"instagram\.com/.*/(tv|reel|p)/", - r"twitch\.tv/.*/clip/", - r"streamable\.com/", - r"v\.redd\.it/", - r"clips\.twitch\.tv/", - r"dailymotion\.com/video/", - r"\.mp4$", - r"\.webm$", - r"\.mov$", - ] - return any(re.search(pattern, url, re.IGNORECASE) for pattern in video_patterns) - - -class VideoDownloader: - MAX_RETRIES = 5 - RETRY_DELAY = 10 - FILE_OP_RETRIES = 3 - FILE_OP_RETRY_DELAY = 1 - SHUTDOWN_TIMEOUT = 15 # seconds - - def __init__( - self, - download_path: str, - video_format: str, - max_quality: int, - max_file_size: int, - enabled_sites: Optional[List[str]] = None, - concurrent_downloads: int = 2, - ffmpeg_mgr: Optional[FFmpegManager] = None, - ): - self.download_path = Path(download_path) - self.download_path.mkdir(parents=True, exist_ok=True) - os.chmod(str(self.download_path), 0o755) - - self.video_format = video_format - self.max_quality = max_quality - self.max_file_size = max_file_size - self.enabled_sites = enabled_sites - self.ffmpeg_mgr = ffmpeg_mgr or FFmpegManager() - - # Create thread pool with proper naming - self.download_pool = ThreadPoolExecutor( - max_workers=max(1, min(3, concurrent_downloads)), - thread_name_prefix="videoarchiver_download", - ) - - # Track active downloads and processes - self.active_downloads: Dict[str, Dict[str, Any]] = {} - self._downloads_lock = asyncio.Lock() - self._active_processes: Set[subprocess.Popen] = set() - self._processes_lock = asyncio.Lock() - self._shutting_down = False - - # Create cancellable logger - self.ytdl_logger = CancellableYTDLLogger() - - # Configure yt-dlp options - self.ydl_opts = { - "format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best", - "outtmpl": "%(title)s.%(ext)s", - "merge_output_format": video_format, - "quiet": True, - "no_warnings": True, - "extract_flat": True, - "concurrent_fragment_downloads": 1, - "retries": self.MAX_RETRIES, - "fragment_retries": self.MAX_RETRIES, - "file_access_retries": self.FILE_OP_RETRIES, - "extractor_retries": self.MAX_RETRIES, - "postprocessor_hooks": [self._check_file_size], - "progress_hooks": [self._progress_hook, self._detailed_progress_hook], - "ffmpeg_location": str(self.ffmpeg_mgr.get_ffmpeg_path()), - "ffprobe_location": str(self.ffmpeg_mgr.get_ffprobe_path()), - "paths": {"home": str(self.download_path)}, - "logger": self.ytdl_logger, - "ignoreerrors": True, - "no_color": True, - "geo_bypass": True, - "socket_timeout": 60, - "http_chunk_size": 1048576, - "external_downloader_args": {"ffmpeg": ["-timeout", "60000000"]}, - "max_sleep_interval": 5, - "sleep_interval": 1, - "max_filesize": max_file_size * 1024 * 1024, - } - - async def cleanup(self) -> None: - """Clean up resources with proper shutdown""" - self._shutting_down = True - - try: - # Cancel active downloads - self.ytdl_logger.cancelled = True - - # Kill any active FFmpeg processes - async with self._processes_lock: - for process in self._active_processes: - try: - process.terminate() - await asyncio.sleep(0.1) # Give process time to terminate - if process.poll() is None: - process.kill() # Force kill if still running - except Exception as e: - logger.error(f"Error killing process: {e}") - self._active_processes.clear() - - # Clean up thread pool - self.download_pool.shutdown(wait=False, cancel_futures=True) - - # Clean up active downloads - async with self._downloads_lock: - self.active_downloads.clear() - - except Exception as e: - logger.error(f"Error during downloader cleanup: {e}") - finally: - self._shutting_down = False - - async def force_cleanup(self) -> None: - """Force cleanup of all resources""" - try: - # Force cancel all downloads - self.ytdl_logger.cancelled = True - - # Kill all processes immediately - async with self._processes_lock: - for process in self._active_processes: - try: - process.kill() - except Exception as e: - logger.error(f"Error force killing process: {e}") - self._active_processes.clear() - - # Force shutdown thread pool - self.download_pool.shutdown(wait=False, cancel_futures=True) - - # Clear all tracking - async with self._downloads_lock: - self.active_downloads.clear() - - except Exception as e: - logger.error(f"Error during force cleanup: {e}") - - def _detailed_progress_hook(self, d): - """Handle detailed download progress tracking""" - try: - if d["status"] == "downloading": - # Get URL from info dict - url = d.get("info_dict", {}).get("webpage_url", "unknown") - - # Update global progress tracking - from videoarchiver.processor import _download_progress - - if url in _download_progress: - _download_progress[url].update( - { - "active": True, - "percent": float( - d.get("_percent_str", "0").replace("%", "") - ), - "speed": d.get("_speed_str", "N/A"), - "eta": d.get("_eta_str", "N/A"), - "downloaded_bytes": d.get("downloaded_bytes", 0), - "total_bytes": d.get("total_bytes", 0) - or d.get("total_bytes_estimate", 0), - "retries": d.get("retry_count", 0), - "fragment_count": d.get("fragment_count", 0), - "fragment_index": d.get("fragment_index", 0), - "video_title": d.get("info_dict", {}).get( - "title", "Unknown" - ), - "extractor": d.get("info_dict", {}).get( - "extractor", "Unknown" - ), - "format": d.get("info_dict", {}).get("format", "Unknown"), - "resolution": d.get("info_dict", {}).get( - "resolution", "Unknown" - ), - "fps": d.get("info_dict", {}).get("fps", "Unknown"), - "last_update": datetime.utcnow().strftime( - "%Y-%m-%d %H:%M:%S" - ), - } - ) - - logger.debug( - f"Detailed progress for {url}: " - f"{_download_progress[url]['percent']}% at {_download_progress[url]['speed']}, " - f"ETA: {_download_progress[url]['eta']}" - ) - except Exception as e: - logger.error(f"Error in detailed progress hook: {str(e)}") - - def _progress_hook(self, d): - """Handle download progress""" - if d["status"] == "finished": - logger.info(f"Download completed: {d['filename']}") - elif d["status"] == "downloading": - try: - percent = float(d.get("_percent_str", "0").replace("%", "")) - speed = d.get("_speed_str", "N/A") - eta = d.get("_eta_str", "N/A") - downloaded = d.get("downloaded_bytes", 0) - total = d.get("total_bytes", 0) or d.get("total_bytes_estimate", 0) - - logger.debug( - f"Download progress: {percent}% at {speed}, " - f"ETA: {eta}, Downloaded: {downloaded}/{total} bytes" - ) - except Exception as e: - logger.debug(f"Error logging progress: {str(e)}") - - def is_supported_url(self, url: str) -> bool: - """Check if URL is supported by attempting a simulated download""" - if not is_video_url_pattern(url): - return False - - try: - simulate_opts = { - **self.ydl_opts, - "simulate": True, - "quiet": True, - "no_warnings": True, - "extract_flat": True, - "skip_download": True, - "format": "best", - } - - with yt_dlp.YoutubeDL(simulate_opts) as ydl: - try: - info = ydl.extract_info(url, download=False) - if info is None: - return False - - if self.enabled_sites: - extractor = info.get("extractor", "").lower() - if not any( - site.lower() in extractor for site in self.enabled_sites - ): - logger.info(f"Site {extractor} not in enabled sites list") - return False - - logger.info( - f"URL supported: {url} (Extractor: {info.get('extractor', 'unknown')})" - ) - return True - - except yt_dlp.utils.UnsupportedError: - return False - except Exception as e: - if "Unsupported URL" not in str(e): - logger.error(f"Error checking URL {url}: {str(e)}") - return False - - except Exception as e: - logger.error(f"Error during URL check: {str(e)}") - return False - - async def download_video( - self, url: str, progress_callback: Optional[Callable[[float], None]] = None - ) -> Tuple[bool, str, str]: - """Download and process a video with improved error handling""" - if self._shutting_down: - return False, "", "Downloader is shutting down" - - # Initialize progress tracking for this URL - from videoarchiver.processor import _download_progress - - _download_progress[url] = { - "active": True, - "start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), - "percent": 0, - "speed": "N/A", - "eta": "N/A", - "downloaded_bytes": 0, - "total_bytes": 0, - "retries": 0, - "fragment_count": 0, - "fragment_index": 0, - "video_title": "Unknown", - "extractor": "Unknown", - "format": "Unknown", - "resolution": "Unknown", - "fps": "Unknown", - "last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), - } - - original_file = None - compressed_file = None - temp_dir = None - hardware_accel_failed = False - compression_params = None - - try: - with temp_path_context() as temp_dir: - # Download the video - success, file_path, error = await self._safe_download( - url, temp_dir, progress_callback - ) - if not success: - return False, "", error - - original_file = file_path - - async with self._downloads_lock: - self.active_downloads[url] = { - "file_path": original_file, - "start_time": datetime.utcnow(), - } - - # Check file size and compress if needed - file_size = os.path.getsize(original_file) - if file_size > (self.max_file_size * 1024 * 1024): - logger.info(f"Compressing video: {original_file}") - try: - # Get optimal compression parameters - compression_params = self.ffmpeg_mgr.get_compression_params( - original_file, self.max_file_size - ) - compressed_file = os.path.join( - self.download_path, - f"compressed_{os.path.basename(original_file)}", - ) - - # Try hardware acceleration first - success = await self._try_compression( - original_file, - compressed_file, - compression_params, - progress_callback, - use_hardware=True, - ) - - # If hardware acceleration fails, fall back to CPU - if not success: - hardware_accel_failed = True - logger.warning( - "Hardware acceleration failed, falling back to CPU encoding" - ) - success = await self._try_compression( - original_file, - compressed_file, - compression_params, - progress_callback, - use_hardware=False, - ) - - if not success: - raise CompressionError( - "Failed to compress with both hardware and CPU encoding", - file_size, - self.max_file_size * 1024 * 1024, - ) - - # Verify compressed file - if not self._verify_video_file(compressed_file): - raise VideoVerificationError( - "Compressed file verification failed" - ) - - compressed_size = os.path.getsize(compressed_file) - if compressed_size <= (self.max_file_size * 1024 * 1024): - await self._safe_delete_file(original_file) - return True, compressed_file, "" - else: - await self._safe_delete_file(compressed_file) - raise CompressionError( - "Failed to compress to target size", - file_size, - self.max_file_size * 1024 * 1024, - ) - - except Exception as e: - error_msg = str(e) - if hardware_accel_failed: - error_msg = f"Hardware acceleration failed, CPU fallback error: {error_msg}" - if compressed_file and os.path.exists(compressed_file): - await self._safe_delete_file(compressed_file) - return False, "", error_msg - - else: - # Move file to final location - final_path = os.path.join( - self.download_path, os.path.basename(original_file) - ) - success = await self._safe_move_file(original_file, final_path) - if not success: - return False, "", "Failed to move file to final location" - return True, final_path, "" - - except Exception as e: - logger.error(f"Download error: {str(e)}") - return False, "", str(e) - - finally: - # Clean up - async with self._downloads_lock: - self.active_downloads.pop(url, None) - if url in _download_progress: - _download_progress[url]["active"] = False - - try: - if original_file and os.path.exists(original_file): - await self._safe_delete_file(original_file) - if ( - compressed_file - and os.path.exists(compressed_file) - and not compressed_file.startswith(self.download_path) - ): - await self._safe_delete_file(compressed_file) - except Exception as e: - logger.error(f"Error during file cleanup: {str(e)}") - - async def _try_compression( - self, - input_file: str, - output_file: str, - params: Dict[str, str], - progress_callback: Optional[Callable[[float], None]] = None, - use_hardware: bool = True, - ) -> bool: - """Attempt video compression with given parameters""" - if self._shutting_down: - return False - - try: - # Build FFmpeg command - ffmpeg_path = str(self.ffmpeg_mgr.get_ffmpeg_path()) - cmd = [ffmpeg_path, "-y", "-i", input_file] - - # Add progress monitoring - cmd.extend(["-progress", "pipe:1"]) - - # Modify parameters based on hardware acceleration preference - if use_hardware: - gpu_info = self.ffmpeg_mgr.gpu_info - if gpu_info["nvidia"] and params.get("c:v") == "libx264": - params["c:v"] = "h264_nvenc" - elif gpu_info["amd"] and params.get("c:v") == "libx264": - params["c:v"] = "h264_amf" - elif gpu_info["intel"] and params.get("c:v") == "libx264": - params["c:v"] = "h264_qsv" - else: - params["c:v"] = "libx264" - - # Add all parameters to command - for key, value in params.items(): - cmd.extend([f"-{key}", str(value)]) - - # Add output file - cmd.append(output_file) - - # Get video duration for progress calculation - duration = self._get_video_duration(input_file) - - # Update compression progress tracking - from videoarchiver.processor import _compression_progress - - # Get input file size - input_size = os.path.getsize(input_file) - - # Initialize compression progress - _compression_progress[input_file] = { - "active": True, - "filename": os.path.basename(input_file), - "start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), - "percent": 0, - "elapsed_time": "0:00", - "input_size": input_size, - "current_size": 0, - "target_size": self.max_file_size * 1024 * 1024, - "codec": params.get("c:v", "unknown"), - "hardware_accel": use_hardware, - "preset": params.get("preset", "unknown"), - "crf": params.get("crf", "unknown"), - "duration": duration, - "bitrate": params.get("b:v", "unknown"), - "audio_codec": params.get("c:a", "unknown"), - "audio_bitrate": params.get("b:a", "unknown"), - "last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), - } - - # Run compression with progress monitoring - process = await asyncio.create_subprocess_exec( - *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE - ) - - # Track the process - async with self._processes_lock: - self._active_processes.add(process) - - start_time = datetime.utcnow() - loop = asyncio.get_running_loop() - - try: - while True: - if self._shutting_down: - process.terminate() - return False - - line = await process.stdout.readline() - if not line: - break - - try: - line = line.decode().strip() - if line.startswith("out_time_ms="): - current_time = ( - int(line.split("=")[1]) / 1000000 - ) # Convert microseconds to seconds - if duration > 0: - progress = min(100, (current_time / duration) * 100) - - # Update compression progress - elapsed = datetime.utcnow() - start_time - _compression_progress[input_file].update( - { - "percent": progress, - "elapsed_time": str(elapsed).split(".")[ - 0 - ], # Remove microseconds - "current_size": ( - os.path.getsize(output_file) - if os.path.exists(output_file) - else 0 - ), - "current_time": current_time, - "last_update": datetime.utcnow().strftime( - "%Y-%m-%d %H:%M:%S" - ), - } - ) - - if progress_callback: - # Call the callback directly since it now handles task creation - progress_callback(progress) - - except Exception as e: - logger.error(f"Error parsing FFmpeg progress: {e}") - - await process.wait() - success = os.path.exists(output_file) - - # Update final status - if success and input_file in _compression_progress: - _compression_progress[input_file].update( - { - "active": False, - "percent": 100, - "current_size": os.path.getsize(output_file), - "last_update": datetime.utcnow().strftime( - "%Y-%m-%d %H:%M:%S" - ), - } - ) - - return success - - finally: - # Remove process from tracking - async with self._processes_lock: - self._active_processes.discard(process) - - except subprocess.CalledProcessError as e: - logger.error(f"FFmpeg compression failed: {e.stderr.decode()}") - return False - except Exception as e: - logger.error(f"Compression attempt failed: {str(e)}") - return False - finally: - # Ensure compression progress is marked as inactive - if input_file in _compression_progress: - _compression_progress[input_file]["active"] = False - - def _get_video_duration(self, file_path: str) -> float: - """Get video duration in seconds""" - try: - ffprobe_path = str(self.ffmpeg_mgr.get_ffprobe_path()) - cmd = [ - ffprobe_path, - "-v", - "quiet", - "-print_format", - "json", - "-show_format", - file_path, - ] - result = subprocess.run(cmd, capture_output=True, text=True) - data = json.loads(result.stdout) - return float(data["format"]["duration"]) - except Exception as e: - logger.error(f"Error getting video duration: {e}") - return 0 - - def _check_file_size(self, info): - """Check if file size is within limits""" - if info.get("filepath") and os.path.exists(info["filepath"]): - try: - size = os.path.getsize(info["filepath"]) - if size > (self.max_file_size * 1024 * 1024): - logger.info( - f"File exceeds size limit, will compress: {info['filepath']}" - ) - except OSError as e: - logger.error(f"Error checking file size: {str(e)}") - - def _verify_video_file(self, file_path: str) -> bool: - """Verify video file integrity""" - try: - ffprobe_path = str(self.ffmpeg_mgr.get_ffprobe_path()) - cmd = [ - ffprobe_path, - "-v", - "quiet", - "-print_format", - "json", - "-show_format", - "-show_streams", - file_path, - ] - - result = subprocess.run( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - timeout=30, - ) - - if result.returncode != 0: - raise VideoVerificationError(f"FFprobe failed: {result.stderr}") - - probe = json.loads(result.stdout) - - # Verify video stream - video_streams = [s for s in probe["streams"] if s["codec_type"] == "video"] - if not video_streams: - raise VideoVerificationError("No video streams found") - - # Verify duration - duration = float(probe["format"].get("duration", 0)) - if duration <= 0: - raise VideoVerificationError("Invalid video duration") - - # Verify file is readable - with open(file_path, "rb") as f: - f.seek(0, 2) - if f.tell() == 0: - raise VideoVerificationError("Empty file") - - return True - - except Exception as e: - logger.error(f"Error verifying video file {file_path}: {e}") - return False - - async def _safe_download( - self, - url: str, - temp_dir: str, - progress_callback: Optional[Callable[[float], None]] = None, - ) -> Tuple[bool, str, str]: - """Safely download video with retries""" - if self._shutting_down: - return False, "", "Downloader is shutting down" - - last_error = None - for attempt in range(self.MAX_RETRIES): - try: - ydl_opts = self.ydl_opts.copy() - ydl_opts["outtmpl"] = os.path.join(temp_dir, ydl_opts["outtmpl"]) - - # Add progress callback - if progress_callback: - original_progress_hook = ydl_opts["progress_hooks"][0] - - def combined_progress_hook(d): - original_progress_hook(d) - if d["status"] == "downloading": - try: - percent = float( - d.get("_percent_str", "0").replace("%", "") - ) - # Call the callback directly since it now handles task creation - progress_callback(percent) - except Exception as e: - logger.error(f"Error in progress callback: {e}") - - ydl_opts["progress_hooks"] = [combined_progress_hook] - - with yt_dlp.YoutubeDL(ydl_opts) as ydl: - info = await asyncio.get_event_loop().run_in_executor( - self.download_pool, lambda: ydl.extract_info(url, download=True) - ) - - if info is None: - raise Exception("Failed to extract video information") - - file_path = os.path.join(temp_dir, ydl.prepare_filename(info)) - if not os.path.exists(file_path): - raise FileNotFoundError("Download completed but file not found") - - if not self._verify_video_file(file_path): - raise VideoVerificationError("Downloaded file is not a valid video") - - return True, file_path, "" - - except Exception as e: - last_error = str(e) - logger.error(f"Download attempt {attempt + 1} failed: {str(e)}") - if attempt < self.MAX_RETRIES - 1: - # Exponential backoff with jitter - delay = self.RETRY_DELAY * (2**attempt) + (attempt * 2) - await asyncio.sleep(delay) - else: - return False, "", f"All download attempts failed: {last_error}" - - async def _safe_delete_file(self, file_path: str) -> bool: - """Safely delete a file with retries""" - for attempt in range(self.FILE_OP_RETRIES): - try: - if await secure_delete_file(file_path): - return True - await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1)) - except Exception as e: - logger.error(f"Delete attempt {attempt + 1} failed: {str(e)}") - if attempt == self.FILE_OP_RETRIES - 1: - return False - await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1)) - return False - - async def _safe_move_file(self, src: str, dst: str) -> bool: - """Safely move a file with retries""" - for attempt in range(self.FILE_OP_RETRIES): - try: - os.makedirs(os.path.dirname(dst), exist_ok=True) - shutil.move(src, dst) - return True - except Exception as e: - logger.error(f"Move attempt {attempt + 1} failed: {str(e)}") - if attempt == self.FILE_OP_RETRIES - 1: - return False - await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1)) - return False