Core Systems:

Component-based architecture with lifecycle management
Enhanced error handling and recovery mechanisms
Comprehensive state management and tracking
Event-driven architecture with monitoring
Queue Management:

Multiple processing strategies for different scenarios
Advanced state management with recovery
Comprehensive metrics and health monitoring
Sophisticated cleanup system with multiple strategies
Processing Pipeline:

Enhanced message handling with validation
Improved URL extraction and processing
Better queue management and monitoring
Advanced cleanup mechanisms
Overall Benefits:

Better code organization and maintainability
Improved error handling and recovery
Enhanced monitoring and reporting
More robust and reliable system
This commit is contained in:
pacnpal
2024-11-16 05:01:29 +00:00
parent 537a325807
commit a4ca6e8ea6
47 changed files with 11085 additions and 2110 deletions

View File

@@ -0,0 +1,225 @@
"""Module for managing Discord channel configurations"""
import logging
from typing import Dict, List, Optional, Tuple
import discord
from .exceptions import ConfigurationError as ConfigError, DiscordAPIError
logger = logging.getLogger("ChannelManager")
class ChannelManager:
"""Manages Discord channel configurations"""
def __init__(self, config_manager):
self.config_manager = config_manager
async def get_channel(
self,
guild: discord.Guild,
channel_type: str
) -> Optional[discord.TextChannel]:
"""Get a channel by type
Args:
guild: Discord guild
channel_type: Type of channel (archive, notification, log)
Returns:
Optional[discord.TextChannel]: Channel if found and valid
Raises:
ConfigError: If channel type is invalid
DiscordAPIError: If channel exists but is invalid type
"""
try:
if channel_type not in ["archive", "notification", "log"]:
raise ConfigError(f"Invalid channel type: {channel_type}")
settings = await self.config_manager.get_guild_settings(guild.id)
channel_id = settings.get(f"{channel_type}_channel")
if channel_id is None:
return None
channel = guild.get_channel(channel_id)
if channel is None:
logger.warning(f"Channel {channel_id} not found in guild {guild.id}")
return None
if not isinstance(channel, discord.TextChannel):
raise DiscordAPIError(f"Channel {channel_id} is not a text channel")
return channel
except Exception as e:
logger.error(f"Failed to get {channel_type} channel for guild {guild.id}: {e}")
raise ConfigError(f"Failed to get channel: {str(e)}")
async def get_monitored_channels(
self,
guild: discord.Guild
) -> List[discord.TextChannel]:
"""Get all monitored channels for a guild
Args:
guild: Discord guild
Returns:
List[discord.TextChannel]: List of monitored channels
Raises:
ConfigError: If channel retrieval fails
"""
try:
settings = await self.config_manager.get_guild_settings(guild.id)
monitored_channel_ids = settings["monitored_channels"]
# If no channels are set to be monitored, return all text channels
if not monitored_channel_ids:
return [
channel for channel in guild.channels
if isinstance(channel, discord.TextChannel)
]
# Otherwise, return only the specified channels
channels: List[discord.TextChannel] = []
invalid_channels: List[int] = []
for channel_id in monitored_channel_ids:
channel = guild.get_channel(channel_id)
if channel and isinstance(channel, discord.TextChannel):
channels.append(channel)
else:
invalid_channels.append(channel_id)
logger.warning(f"Invalid monitored channel {channel_id} in guild {guild.id}")
# Clean up invalid channels if found
if invalid_channels:
await self._remove_invalid_channels(guild.id, invalid_channels)
return channels
except Exception as e:
logger.error(f"Failed to get monitored channels for guild {guild.id}: {e}")
raise ConfigError(f"Failed to get monitored channels: {str(e)}")
async def verify_channel_permissions(
self,
channel: discord.TextChannel,
required_permissions: List[str]
) -> Tuple[bool, List[str]]:
"""Verify bot has required permissions in a channel
Args:
channel: Channel to check
required_permissions: List of required permission names
Returns:
Tuple[bool, List[str]]: (Has all permissions, List of missing permissions)
"""
try:
bot_member = channel.guild.me
channel_perms = channel.permissions_for(bot_member)
missing_perms = [
perm for perm in required_permissions
if not getattr(channel_perms, perm, False)
]
return not bool(missing_perms), missing_perms
except Exception as e:
logger.error(f"Error checking channel permissions: {e}")
return False, ["Failed to check permissions"]
async def add_monitored_channel(
self,
guild_id: int,
channel_id: int
) -> None:
"""Add a channel to monitored channels
Args:
guild_id: Guild ID
channel_id: Channel ID to add
Raises:
ConfigError: If channel cannot be added
"""
try:
await self.config_manager.add_to_list(
guild_id,
"monitored_channels",
channel_id
)
except Exception as e:
logger.error(f"Failed to add monitored channel {channel_id}: {e}")
raise ConfigError(f"Failed to add monitored channel: {str(e)}")
async def remove_monitored_channel(
self,
guild_id: int,
channel_id: int
) -> None:
"""Remove a channel from monitored channels
Args:
guild_id: Guild ID
channel_id: Channel ID to remove
Raises:
ConfigError: If channel cannot be removed
"""
try:
await self.config_manager.remove_from_list(
guild_id,
"monitored_channels",
channel_id
)
except Exception as e:
logger.error(f"Failed to remove monitored channel {channel_id}: {e}")
raise ConfigError(f"Failed to remove monitored channel: {str(e)}")
async def _remove_invalid_channels(
self,
guild_id: int,
channel_ids: List[int]
) -> None:
"""Remove invalid channels from monitored channels
Args:
guild_id: Guild ID
channel_ids: List of invalid channel IDs to remove
"""
try:
for channel_id in channel_ids:
await self.remove_monitored_channel(guild_id, channel_id)
except Exception as e:
logger.error(f"Error removing invalid channels: {e}")
async def get_channel_info(
self,
guild: discord.Guild
) -> Dict[str, Optional[discord.TextChannel]]:
"""Get all configured channels for a guild
Args:
guild: Discord guild
Returns:
Dict[str, Optional[discord.TextChannel]]: Dictionary of channel types to channels
"""
try:
return {
'archive': await self.get_channel(guild, "archive"),
'notification': await self.get_channel(guild, "notification"),
'log': await self.get_channel(guild, "log")
}
except Exception as e:
logger.error(f"Error getting channel info: {e}")
return {
'archive': None,
'notification': None,
'log': None
}

View File

@@ -0,0 +1,242 @@
"""Module for managing Discord role configurations"""
import logging
from typing import Dict, List, Set, Tuple
import discord
from .exceptions import ConfigurationError as ConfigError
logger = logging.getLogger("RoleManager")
class RoleManager:
"""Manages Discord role configurations"""
def __init__(self, config_manager):
self.config_manager = config_manager
async def check_user_roles(
self,
member: discord.Member
) -> Tuple[bool, Optional[str]]:
"""Check if user has permission based on allowed roles
Args:
member: Discord member to check
Returns:
Tuple[bool, Optional[str]]: (Has permission, Reason if denied)
Raises:
ConfigError: If role check fails
"""
try:
allowed_roles = await self.config_manager.get_setting(
member.guild.id,
"allowed_roles"
)
# If no roles are set, allow all users
if not allowed_roles:
return True, None
# Check user roles
user_roles = {role.id for role in member.roles}
allowed_role_set = set(allowed_roles)
if user_roles & allowed_role_set: # Intersection
return True, None
# Get role names for error message
missing_roles = await self._get_role_names(
member.guild,
allowed_role_set
)
return False, f"Missing required roles: {', '.join(missing_roles)}"
except Exception as e:
logger.error(f"Failed to check roles for user {member.id} in guild {member.guild.id}: {e}")
raise ConfigError(f"Failed to check user roles: {str(e)}")
async def add_allowed_role(
self,
guild_id: int,
role_id: int
) -> None:
"""Add a role to allowed roles
Args:
guild_id: Guild ID
role_id: Role ID to add
Raises:
ConfigError: If role cannot be added
"""
try:
await self.config_manager.add_to_list(
guild_id,
"allowed_roles",
role_id
)
except Exception as e:
logger.error(f"Failed to add allowed role {role_id}: {e}")
raise ConfigError(f"Failed to add allowed role: {str(e)}")
async def remove_allowed_role(
self,
guild_id: int,
role_id: int
) -> None:
"""Remove a role from allowed roles
Args:
guild_id: Guild ID
role_id: Role ID to remove
Raises:
ConfigError: If role cannot be removed
"""
try:
await self.config_manager.remove_from_list(
guild_id,
"allowed_roles",
role_id
)
except Exception as e:
logger.error(f"Failed to remove allowed role {role_id}: {e}")
raise ConfigError(f"Failed to remove allowed role: {str(e)}")
async def get_allowed_roles(
self,
guild: discord.Guild
) -> List[discord.Role]:
"""Get all allowed roles for a guild
Args:
guild: Discord guild
Returns:
List[discord.Role]: List of allowed roles
Raises:
ConfigError: If roles cannot be retrieved
"""
try:
settings = await self.config_manager.get_guild_settings(guild.id)
role_ids = settings["allowed_roles"]
roles = []
invalid_roles = []
for role_id in role_ids:
role = guild.get_role(role_id)
if role:
roles.append(role)
else:
invalid_roles.append(role_id)
logger.warning(f"Invalid role {role_id} in guild {guild.id}")
# Clean up invalid roles if found
if invalid_roles:
await self._remove_invalid_roles(guild.id, invalid_roles)
return roles
except Exception as e:
logger.error(f"Failed to get allowed roles for guild {guild.id}: {e}")
raise ConfigError(f"Failed to get allowed roles: {str(e)}")
async def verify_role_hierarchy(
self,
guild: discord.Guild,
role: discord.Role
) -> Tuple[bool, Optional[str]]:
"""Verify bot's role hierarchy position for managing a role
Args:
guild: Discord guild
role: Role to check
Returns:
Tuple[bool, Optional[str]]: (Can manage role, Reason if not)
"""
try:
bot_member = guild.me
bot_top_role = bot_member.top_role
if role >= bot_top_role:
return False, f"Role {role.name} is higher than or equal to bot's highest role"
return True, None
except Exception as e:
logger.error(f"Error checking role hierarchy: {e}")
return False, "Failed to check role hierarchy"
async def _get_role_names(
self,
guild: discord.Guild,
role_ids: Set[int]
) -> List[str]:
"""Get role names from role IDs
Args:
guild: Discord guild
role_ids: Set of role IDs
Returns:
List[str]: List of role names
"""
role_names = []
for role_id in role_ids:
role = guild.get_role(role_id)
if role:
role_names.append(role.name)
return role_names
async def _remove_invalid_roles(
self,
guild_id: int,
role_ids: List[int]
) -> None:
"""Remove invalid roles from allowed roles
Args:
guild_id: Guild ID
role_ids: List of invalid role IDs to remove
"""
try:
for role_id in role_ids:
await self.remove_allowed_role(guild_id, role_id)
except Exception as e:
logger.error(f"Error removing invalid roles: {e}")
async def get_role_info(
self,
guild: discord.Guild
) -> Dict[str, Any]:
"""Get role configuration information
Args:
guild: Discord guild
Returns:
Dict[str, Any]: Dictionary containing role information
"""
try:
allowed_roles = await self.get_allowed_roles(guild)
bot_member = guild.me
return {
'allowed_roles': allowed_roles,
'bot_top_role': bot_member.top_role,
'bot_permissions': bot_member.guild_permissions,
'role_count': len(allowed_roles)
}
except Exception as e:
logger.error(f"Error getting role info: {e}")
return {
'allowed_roles': [],
'bot_top_role': None,
'bot_permissions': None,
'role_count': 0
}

View File

@@ -0,0 +1,211 @@
"""Module for formatting configuration settings"""
import logging
from typing import Dict, Any, List
from datetime import datetime
import discord
from .exceptions import ConfigurationError as ConfigError
logger = logging.getLogger("SettingsFormatter")
class SettingsFormatter:
"""Formats configuration settings for display"""
def __init__(self):
self.embed_color = discord.Color.blue()
async def format_settings_embed(
self,
guild: discord.Guild,
settings: Dict[str, Any]
) -> discord.Embed:
"""Format guild settings into a Discord embed
Args:
guild: Discord guild
settings: Guild settings dictionary
Returns:
discord.Embed: Formatted settings embed
Raises:
ConfigError: If formatting fails
"""
try:
embed = discord.Embed(
title="Video Archiver Settings",
color=self.embed_color,
timestamp=datetime.utcnow()
)
# Add sections
await self._add_core_settings(embed, guild, settings)
await self._add_channel_settings(embed, guild, settings)
await self._add_permission_settings(embed, guild, settings)
await self._add_video_settings(embed, settings)
await self._add_operation_settings(embed, settings)
await self._add_site_settings(embed, settings)
embed.set_footer(text="Last updated")
return embed
except Exception as e:
logger.error(f"Failed to format settings embed: {e}")
raise ConfigError(f"Failed to format settings: {str(e)}")
async def _add_core_settings(
self,
embed: discord.Embed,
guild: discord.Guild,
settings: Dict[str, Any]
) -> None:
"""Add core settings to embed"""
embed.add_field(
name="Core Settings",
value="\n".join([
f"**Enabled:** {settings['enabled']}",
f"**Database Enabled:** {settings['use_database']}",
f"**Update Check Disabled:** {settings['disable_update_check']}"
]),
inline=False
)
async def _add_channel_settings(
self,
embed: discord.Embed,
guild: discord.Guild,
settings: Dict[str, Any]
) -> None:
"""Add channel settings to embed"""
# Get channels with error handling
channels = await self._get_channel_mentions(guild, settings)
embed.add_field(
name="Channel Settings",
value="\n".join([
f"**Archive Channel:** {channels['archive']}",
f"**Notification Channel:** {channels['notification']}",
f"**Log Channel:** {channels['log']}",
f"**Monitored Channels:**\n{channels['monitored']}"
]),
inline=False
)
async def _add_permission_settings(
self,
embed: discord.Embed,
guild: discord.Guild,
settings: Dict[str, Any]
) -> None:
"""Add permission settings to embed"""
allowed_roles = await self._get_role_names(guild, settings["allowed_roles"])
embed.add_field(
name="Permission Settings",
value=f"**Allowed Roles:**\n{allowed_roles}",
inline=False
)
async def _add_video_settings(
self,
embed: discord.Embed,
settings: Dict[str, Any]
) -> None:
"""Add video settings to embed"""
embed.add_field(
name="Video Settings",
value="\n".join([
f"**Format:** {settings['video_format']}",
f"**Max Quality:** {settings['video_quality']}p",
f"**Max File Size:** {settings['max_file_size']}MB"
]),
inline=False
)
async def _add_operation_settings(
self,
embed: discord.Embed,
settings: Dict[str, Any]
) -> None:
"""Add operation settings to embed"""
embed.add_field(
name="Operation Settings",
value="\n".join([
f"**Delete After Repost:** {settings['delete_after_repost']}",
f"**Message Duration:** {settings['message_duration']} hours",
f"**Concurrent Downloads:** {settings['concurrent_downloads']}",
f"**Max Retries:** {settings['max_retries']}",
f"**Retry Delay:** {settings['retry_delay']}s"
]),
inline=False
)
async def _add_site_settings(
self,
embed: discord.Embed,
settings: Dict[str, Any]
) -> None:
"""Add site settings to embed"""
enabled_sites = settings["enabled_sites"]
sites_text = ", ".join(enabled_sites) if enabled_sites else "All sites"
embed.add_field(
name="Enabled Sites",
value=sites_text,
inline=False
)
async def _get_channel_mentions(
self,
guild: discord.Guild,
settings: Dict[str, Any]
) -> Dict[str, str]:
"""Get channel mentions with error handling"""
try:
# Get channel objects
archive_channel = guild.get_channel(settings["archive_channel"])
notification_channel = guild.get_channel(settings["notification_channel"])
log_channel = guild.get_channel(settings["log_channel"])
# Get monitored channels
monitored_channels = []
for channel_id in settings["monitored_channels"]:
channel = guild.get_channel(channel_id)
if channel and isinstance(channel, discord.TextChannel):
monitored_channels.append(channel.mention)
return {
"archive": archive_channel.mention if archive_channel else "Not set",
"notification": notification_channel.mention if notification_channel else "Same as archive",
"log": log_channel.mention if log_channel else "Not set",
"monitored": "\n".join(monitored_channels) if monitored_channels else "All channels"
}
except Exception as e:
logger.error(f"Error getting channel mentions: {e}")
return {
"archive": "Error",
"notification": "Error",
"log": "Error",
"monitored": "Error getting channels"
}
async def _get_role_names(
self,
guild: discord.Guild,
role_ids: List[int]
) -> str:
"""Get role names with error handling"""
try:
role_names = []
for role_id in role_ids:
role = guild.get_role(role_id)
if role:
role_names.append(role.name)
return ", ".join(role_names) if role_names else "All roles (no restrictions)"
except Exception as e:
logger.error(f"Error getting role names: {e}")
return "Error getting roles"

View File

@@ -0,0 +1,135 @@
"""Module for validating configuration settings"""
import logging
from typing import Any, Dict, List, Union
from .exceptions import ConfigurationError as ConfigError
logger = logging.getLogger("ConfigValidation")
class ValidationManager:
"""Manages validation of configuration settings"""
# Valid settings constraints
VALID_VIDEO_FORMATS = ["mp4", "webm", "mkv"]
MAX_QUALITY_RANGE = (144, 4320) # 144p to 4K
MAX_FILE_SIZE_RANGE = (1, 100) # 1MB to 100MB
MAX_CONCURRENT_DOWNLOADS = 5
MAX_MESSAGE_DURATION = 168 # 1 week in hours
MAX_RETRIES = 10
MAX_RETRY_DELAY = 30
def validate_setting(self, setting: str, value: Any) -> None:
"""Validate a setting value against constraints
Args:
setting: Name of the setting to validate
value: Value to validate
Raises:
ConfigError: If validation fails
"""
try:
validator = getattr(self, f"_validate_{setting}", None)
if validator:
validator(value)
else:
self._validate_generic(setting, value)
except Exception as e:
logger.error(f"Validation error for {setting}: {e}")
raise ConfigError(f"Validation error for {setting}: {str(e)}")
def _validate_video_format(self, value: str) -> None:
"""Validate video format setting"""
if value not in self.VALID_VIDEO_FORMATS:
raise ConfigError(
f"Invalid video format. Must be one of: {', '.join(self.VALID_VIDEO_FORMATS)}"
)
def _validate_video_quality(self, value: int) -> None:
"""Validate video quality setting"""
if not isinstance(value, int) or not (
self.MAX_QUALITY_RANGE[0] <= value <= self.MAX_QUALITY_RANGE[1]
):
raise ConfigError(
f"Video quality must be between {self.MAX_QUALITY_RANGE[0]} and {self.MAX_QUALITY_RANGE[1]}"
)
def _validate_max_file_size(self, value: Union[int, float]) -> None:
"""Validate max file size setting"""
if not isinstance(value, (int, float)) or not (
self.MAX_FILE_SIZE_RANGE[0] <= value <= self.MAX_FILE_SIZE_RANGE[1]
):
raise ConfigError(
f"Max file size must be between {self.MAX_FILE_SIZE_RANGE[0]} and {self.MAX_FILE_SIZE_RANGE[1]} MB"
)
def _validate_concurrent_downloads(self, value: int) -> None:
"""Validate concurrent downloads setting"""
if not isinstance(value, int) or not (1 <= value <= self.MAX_CONCURRENT_DOWNLOADS):
raise ConfigError(
f"Concurrent downloads must be between 1 and {self.MAX_CONCURRENT_DOWNLOADS}"
)
def _validate_message_duration(self, value: int) -> None:
"""Validate message duration setting"""
if not isinstance(value, int) or not (0 <= value <= self.MAX_MESSAGE_DURATION):
raise ConfigError(
f"Message duration must be between 0 and {self.MAX_MESSAGE_DURATION} hours"
)
def _validate_max_retries(self, value: int) -> None:
"""Validate max retries setting"""
if not isinstance(value, int) or not (0 <= value <= self.MAX_RETRIES):
raise ConfigError(
f"Max retries must be between 0 and {self.MAX_RETRIES}"
)
def _validate_retry_delay(self, value: int) -> None:
"""Validate retry delay setting"""
if not isinstance(value, int) or not (1 <= value <= self.MAX_RETRY_DELAY):
raise ConfigError(
f"Retry delay must be between 1 and {self.MAX_RETRY_DELAY} seconds"
)
def _validate_message_template(self, value: str) -> None:
"""Validate message template setting"""
if not isinstance(value, str):
raise ConfigError("Message template must be a string")
# Check for required placeholders
required_placeholders = ["{username}", "{channel}"]
for placeholder in required_placeholders:
if placeholder not in value:
raise ConfigError(f"Message template must contain {placeholder}")
def _validate_boolean(self, value: bool) -> None:
"""Validate boolean settings"""
if not isinstance(value, bool):
raise ConfigError("Value must be a boolean")
def _validate_list(self, value: List[Any]) -> None:
"""Validate list settings"""
if not isinstance(value, list):
raise ConfigError("Value must be a list")
def _validate_generic(self, setting: str, value: Any) -> None:
"""Generic validation for settings without specific validators"""
if setting.endswith("_channel") and value is not None:
if not isinstance(value, int):
raise ConfigError(f"{setting} must be a channel ID (int) or None")
elif setting in ["enabled", "delete_after_repost", "disable_update_check", "use_database"]:
self._validate_boolean(value)
elif setting in ["monitored_channels", "allowed_roles", "enabled_sites"]:
self._validate_list(value)
def validate_all_settings(self, settings: Dict[str, Any]) -> None:
"""Validate all settings in a configuration dictionary
Args:
settings: Dictionary of settings to validate
Raises:
ConfigError: If any validation fails
"""
for setting, value in settings.items():
self.validate_setting(setting, value)

View File

@@ -1,20 +1,24 @@
"""Configuration management for VideoArchiver"""
from redbot.core import Config
from redbot.core import commands # Added for exception types
from typing import Dict, Any, Optional, List, Union, cast
import discord
import logging
from datetime import datetime
import asyncio
from .utils.exceptions import ConfigurationError as ConfigError, DiscordAPIError
logger = logging.getLogger('VideoArchiver')
import logging
import asyncio
from typing import Dict, Any, Optional, List, Union
import discord
from redbot.core import Config
from .config.validation_manager import ValidationManager
from .config.settings_formatter import SettingsFormatter
from .config.channel_manager import ChannelManager
from .config.role_manager import RoleManager
from .utils.exceptions import ConfigurationError as ConfigError
logger = logging.getLogger("VideoArchiver")
class ConfigManager:
"""Manages guild configurations for VideoArchiver"""
default_guild = {
"enabled": False, # Added the enabled setting
"enabled": False,
"archive_channel": None,
"notification_channel": None,
"log_channel": None,
@@ -34,21 +38,21 @@ class ConfigManager:
"retry_delay": 5,
"discord_retry_attempts": 3,
"discord_retry_delay": 5,
"use_database": False, # Added the missing use_database setting
"use_database": False,
}
# Valid settings constraints
VALID_VIDEO_FORMATS = ["mp4", "webm", "mkv"]
MAX_QUALITY_RANGE = (144, 4320) # 144p to 4K
MAX_FILE_SIZE_RANGE = (1, 100) # 1MB to 100MB
MAX_CONCURRENT_DOWNLOADS = 5
MAX_MESSAGE_DURATION = 168 # 1 week in hours
MAX_RETRIES = 10
MAX_RETRY_DELAY = 30
def __init__(self, bot_config: Config):
"""Initialize configuration managers"""
self.config = bot_config
self.config.register_guild(**self.default_guild)
# Initialize managers
self.validation_manager = ValidationManager()
self.settings_formatter = SettingsFormatter()
self.channel_manager = ChannelManager(self)
self.role_manager = RoleManager(self)
# Thread safety
self._config_locks: Dict[int, asyncio.Lock] = {}
async def _get_guild_lock(self, guild_id: int) -> asyncio.Lock:
@@ -57,71 +61,42 @@ class ConfigManager:
self._config_locks[guild_id] = asyncio.Lock()
return self._config_locks[guild_id]
def _validate_setting(self, setting: str, value: Any) -> None:
"""Validate setting value against constraints"""
try:
if setting == "video_format" and value not in self.VALID_VIDEO_FORMATS:
raise ConfigError(f"Invalid video format. Must be one of: {', '.join(self.VALID_VIDEO_FORMATS)}")
elif setting == "video_quality":
if not isinstance(value, int) or not (self.MAX_QUALITY_RANGE[0] <= value <= self.MAX_QUALITY_RANGE[1]):
raise ConfigError(f"Video quality must be between {self.MAX_QUALITY_RANGE[0]} and {self.MAX_QUALITY_RANGE[1]}")
elif setting == "max_file_size":
if not isinstance(value, (int, float)) or not (self.MAX_FILE_SIZE_RANGE[0] <= value <= self.MAX_FILE_SIZE_RANGE[1]):
raise ConfigError(f"Max file size must be between {self.MAX_FILE_SIZE_RANGE[0]} and {self.MAX_FILE_SIZE_RANGE[1]} MB")
elif setting == "concurrent_downloads":
if not isinstance(value, int) or not (1 <= value <= self.MAX_CONCURRENT_DOWNLOADS):
raise ConfigError(f"Concurrent downloads must be between 1 and {self.MAX_CONCURRENT_DOWNLOADS}")
elif setting == "message_duration":
if not isinstance(value, int) or not (0 <= value <= self.MAX_MESSAGE_DURATION):
raise ConfigError(f"Message duration must be between 0 and {self.MAX_MESSAGE_DURATION} hours")
elif setting == "max_retries":
if not isinstance(value, int) or not (0 <= value <= self.MAX_RETRIES):
raise ConfigError(f"Max retries must be between 0 and {self.MAX_RETRIES}")
elif setting == "retry_delay":
if not isinstance(value, int) or not (1 <= value <= self.MAX_RETRY_DELAY):
raise ConfigError(f"Retry delay must be between 1 and {self.MAX_RETRY_DELAY} seconds")
elif setting in ["message_template"] and not isinstance(value, str):
raise ConfigError("Message template must be a string")
elif setting in ["enabled", "delete_after_repost", "disable_update_check", "use_database"] and not isinstance(value, bool):
raise ConfigError(f"{setting} must be a boolean")
except Exception as e:
raise ConfigError(f"Validation error for {setting}: {str(e)}")
async def get_guild_settings(self, guild_id: int) -> Dict[str, Any]:
"""Get all settings for a guild with error handling"""
"""Get all settings for a guild"""
try:
async with await self._get_guild_lock(guild_id):
return await self.config.guild_from_id(guild_id).all()
except Exception as e:
logger.error(f"Failed to get guild settings for {guild_id}: {str(e)}")
logger.error(f"Failed to get guild settings for {guild_id}: {e}")
raise ConfigError(f"Failed to get guild settings: {str(e)}")
async def update_setting(self, guild_id: int, setting: str, value: Any) -> None:
"""Update a specific setting for a guild with validation"""
async def update_setting(
self,
guild_id: int,
setting: str,
value: Any
) -> None:
"""Update a specific setting for a guild"""
try:
if setting not in self.default_guild:
raise ConfigError(f"Invalid setting: {setting}")
self._validate_setting(setting, value)
# Validate setting
self.validation_manager.validate_setting(setting, value)
async with await self._get_guild_lock(guild_id):
await self.config.guild_from_id(guild_id).set_raw(setting, value=value)
except Exception as e:
logger.error(f"Failed to update setting {setting} for guild {guild_id}: {str(e)}")
logger.error(f"Failed to update setting {setting} for guild {guild_id}: {e}")
raise ConfigError(f"Failed to update setting: {str(e)}")
async def get_setting(self, guild_id: int, setting: str) -> Any:
"""Get a specific setting for a guild with error handling"""
async def get_setting(
self,
guild_id: int,
setting: str
) -> Any:
"""Get a specific setting for a guild"""
try:
if setting not in self.default_guild:
raise ConfigError(f"Invalid setting: {setting}")
@@ -130,11 +105,15 @@ class ConfigManager:
return await self.config.guild_from_id(guild_id).get_raw(setting)
except Exception as e:
logger.error(f"Failed to get setting {setting} for guild {guild_id}: {str(e)}")
logger.error(f"Failed to get setting {setting} for guild {guild_id}: {e}")
raise ConfigError(f"Failed to get setting: {str(e)}")
async def toggle_setting(self, guild_id: int, setting: str) -> bool:
"""Toggle a boolean setting for a guild with validation"""
async def toggle_setting(
self,
guild_id: int,
setting: str
) -> bool:
"""Toggle a boolean setting for a guild"""
try:
if setting not in self.default_guild:
raise ConfigError(f"Invalid setting: {setting}")
@@ -148,11 +127,16 @@ class ConfigManager:
return not current
except Exception as e:
logger.error(f"Failed to toggle setting {setting} for guild {guild_id}: {str(e)}")
logger.error(f"Failed to toggle setting {setting} for guild {guild_id}: {e}")
raise ConfigError(f"Failed to toggle setting: {str(e)}")
async def add_to_list(self, guild_id: int, setting: str, value: Any) -> None:
"""Add a value to a list setting with validation"""
async def add_to_list(
self,
guild_id: int,
setting: str,
value: Any
) -> None:
"""Add a value to a list setting"""
try:
if setting not in self.default_guild:
raise ConfigError(f"Invalid setting: {setting}")
@@ -165,11 +149,16 @@ class ConfigManager:
items.append(value)
except Exception as e:
logger.error(f"Failed to add to list {setting} for guild {guild_id}: {str(e)}")
logger.error(f"Failed to add to list {setting} for guild {guild_id}: {e}")
raise ConfigError(f"Failed to add to list: {str(e)}")
async def remove_from_list(self, guild_id: int, setting: str, value: Any) -> None:
"""Remove a value from a list setting with validation"""
async def remove_from_list(
self,
guild_id: int,
setting: str,
value: Any
) -> None:
"""Remove a value from a list setting"""
try:
if setting not in self.default_guild:
raise ConfigError(f"Invalid setting: {setting}")
@@ -182,187 +171,29 @@ class ConfigManager:
items.remove(value)
except Exception as e:
logger.error(f"Failed to remove from list {setting} for guild {guild_id}: {str(e)}")
logger.error(f"Failed to remove from list {setting} for guild {guild_id}: {e}")
raise ConfigError(f"Failed to remove from list: {str(e)}")
async def get_channel(self, guild: discord.Guild, channel_type: str) -> Optional[discord.TextChannel]:
"""Get a channel by type with error handling and validation"""
async def format_settings_embed(self, guild: discord.Guild) -> discord.Embed:
"""Format guild settings into a Discord embed"""
try:
if channel_type not in ["archive", "notification", "log"]:
raise ConfigError(f"Invalid channel type: {channel_type}")
settings = await self.get_guild_settings(guild.id)
channel_id = settings.get(f"{channel_type}_channel")
if channel_id is None:
return None
channel = guild.get_channel(channel_id)
if channel is None:
logger.warning(f"Channel {channel_id} not found in guild {guild.id}")
return None
if not isinstance(channel, discord.TextChannel):
raise DiscordAPIError(f"Channel {channel_id} is not a text channel")
return channel
return await self.settings_formatter.format_settings_embed(guild, settings)
except Exception as e:
logger.error(f"Failed to get {channel_type} channel for guild {guild.id}: {str(e)}")
raise ConfigError(f"Failed to get channel: {str(e)}")
logger.error(f"Failed to format settings embed for guild {guild.id}: {e}")
raise ConfigError(f"Failed to format settings: {str(e)}")
async def check_user_roles(self, member: discord.Member) -> bool:
"""Check if user has permission based on allowed roles with error handling"""
try:
allowed_roles = await self.get_setting(member.guild.id, "allowed_roles")
# If no roles are set, allow all users
if not allowed_roles:
return True
return any(role.id in allowed_roles for role in member.roles)
except Exception as e:
logger.error(f"Failed to check roles for user {member.id} in guild {member.guild.id}: {str(e)}")
raise ConfigError(f"Failed to check user roles: {str(e)}")
# Channel management delegated to channel_manager
async def get_channel(self, guild: discord.Guild, channel_type: str) -> Optional[discord.TextChannel]:
"""Get a channel by type"""
return await self.channel_manager.get_channel(guild, channel_type)
async def get_monitored_channels(self, guild: discord.Guild) -> List[discord.TextChannel]:
"""Get all monitored channels for a guild with validation"""
try:
settings = await self.get_guild_settings(guild.id)
monitored_channel_ids = settings["monitored_channels"]
"""Get all monitored channels for a guild"""
return await self.channel_manager.get_monitored_channels(guild)
# If no channels are set to be monitored, return all text channels
if not monitored_channel_ids:
return [channel for channel in guild.channels if isinstance(channel, discord.TextChannel)]
# Otherwise, return only the specified channels
channels: List[discord.TextChannel] = []
for channel_id in monitored_channel_ids:
channel = guild.get_channel(channel_id)
if channel and isinstance(channel, discord.TextChannel):
channels.append(channel)
else:
logger.warning(f"Invalid monitored channel {channel_id} in guild {guild.id}")
return channels
except Exception as e:
logger.error(f"Failed to get monitored channels for guild {guild.id}: {str(e)}")
raise ConfigError(f"Failed to get monitored channels: {str(e)}")
async def format_settings_embed(self, guild: discord.Guild) -> discord.Embed:
"""Format guild settings into a Discord embed with error handling"""
try:
settings = await self.get_guild_settings(guild.id)
embed = discord.Embed(
title="Video Archiver Settings",
color=discord.Color.blue(),
timestamp=datetime.utcnow()
)
# Get channels with error handling
archive_channel = guild.get_channel(settings["archive_channel"]) if settings["archive_channel"] else None
notification_channel = guild.get_channel(settings["notification_channel"]) if settings["notification_channel"] else None
log_channel = guild.get_channel(settings["log_channel"]) if settings["log_channel"] else None
# Get monitored channels and roles with validation
monitored_channels = []
for channel_id in settings["monitored_channels"]:
channel = guild.get_channel(channel_id)
if channel and isinstance(channel, discord.TextChannel):
monitored_channels.append(channel.mention)
allowed_roles = []
for role_id in settings["allowed_roles"]:
role = guild.get_role(role_id)
if role:
allowed_roles.append(role.name)
# Add fields with proper formatting
embed.add_field(
name="Enabled",
value=str(settings["enabled"]),
inline=False
)
embed.add_field(
name="Archive Channel",
value=archive_channel.mention if archive_channel else "Not set",
inline=False
)
embed.add_field(
name="Notification Channel",
value=notification_channel.mention if notification_channel else "Same as archive",
inline=False
)
embed.add_field(
name="Log Channel",
value=log_channel.mention if log_channel else "Not set",
inline=False
)
embed.add_field(
name="Monitored Channels",
value="\n".join(monitored_channels) if monitored_channels else "All channels",
inline=False
)
embed.add_field(
name="Allowed Roles",
value=", ".join(allowed_roles) if allowed_roles else "All roles (no restrictions)",
inline=False
)
# Add other settings with validation
embed.add_field(
name="Video Format",
value=settings["video_format"],
inline=True
)
embed.add_field(
name="Max Quality",
value=f"{settings['video_quality']}p",
inline=True
)
embed.add_field(
name="Max File Size",
value=f"{settings['max_file_size']}MB",
inline=True
)
embed.add_field(
name="Delete After Repost",
value=str(settings["delete_after_repost"]),
inline=True
)
embed.add_field(
name="Message Duration",
value=f"{settings['message_duration']} hours",
inline=True
)
embed.add_field(
name="Concurrent Downloads",
value=str(settings["concurrent_downloads"]),
inline=True
)
embed.add_field(
name="Update Check Disabled",
value=str(settings["disable_update_check"]),
inline=True
)
embed.add_field(
name="Database Enabled",
value=str(settings["use_database"]),
inline=True
)
# Add enabled sites with validation
embed.add_field(
name="Enabled Sites",
value=", ".join(settings["enabled_sites"]) if settings["enabled_sites"] else "All sites",
inline=False
)
# Add footer with last update time
embed.set_footer(text="Last updated")
return embed
except Exception as e:
logger.error(f"Failed to format settings embed for guild {guild.id}: {str(e)}")
raise ConfigError(f"Failed to format settings: {str(e)}")
# Role management delegated to role_manager
async def check_user_roles(self, member: discord.Member) -> bool:
"""Check if user has permission based on allowed roles"""
has_permission, _ = await self.role_manager.check_user_roles(member)
return has_permission

View File

@@ -4,154 +4,216 @@ from __future__ import annotations
import asyncio
import logging
from pathlib import Path
from typing import Dict, Any, Optional
from datetime import datetime
from redbot.core.bot import Red
from redbot.core.commands import GroupCog
from .initialization import initialize_cog, init_callback
from .error_handler import handle_command_error
from .cleanup import cleanup_resources, force_cleanup_resources
from .settings import Settings
from .lifecycle import LifecycleManager
from .component_manager import ComponentManager, ComponentState
from .error_handler import error_manager, handle_command_error
from .response_handler import response_manager
from .commands import setup_archiver_commands, setup_database_commands, setup_settings_commands
from ..utils.exceptions import VideoArchiverError as ProcessingError
from .events import setup_events
logger = logging.getLogger("VideoArchiver")
# Constants for timeouts
UNLOAD_TIMEOUT = 30 # seconds
CLEANUP_TIMEOUT = 15 # seconds
class CogStatus:
"""Tracks cog status and health"""
class VideoArchiver(GroupCog):
"""Archive videos from Discord channels"""
def __init__(self):
self.start_time = datetime.utcnow()
self.last_error: Optional[str] = None
self.error_count = 0
self.command_count = 0
self.last_command_time: Optional[datetime] = None
self.health_checks: Dict[str, bool] = {}
default_guild_settings = {
"enabled": False,
"archive_channel": None,
"log_channel": None,
"enabled_channels": [], # Empty list means all channels
"allowed_roles": [], # Empty list means all roles
"video_format": "mp4",
"video_quality": "high",
"max_file_size": 8, # MB
"message_duration": 30, # seconds
"message_template": "{author} archived a video from {channel}",
"concurrent_downloads": 2,
"enabled_sites": None, # None means all sites
"use_database": False, # Database tracking is off by default
def record_error(self, error: str) -> None:
"""Record an error occurrence"""
self.last_error = error
self.error_count += 1
def record_command(self) -> None:
"""Record a command execution"""
self.command_count += 1
self.last_command_time = datetime.utcnow()
def update_health_check(self, check: str, status: bool) -> None:
"""Update health check status"""
self.health_checks[check] = status
def get_status(self) -> Dict[str, Any]:
"""Get current status"""
return {
"uptime": (datetime.utcnow() - self.start_time).total_seconds(),
"last_error": self.last_error,
"error_count": self.error_count,
"command_count": self.command_count,
"last_command": self.last_command_time.isoformat() if self.last_command_time else None,
"health_checks": self.health_checks.copy()
}
class ComponentAccessor:
"""Provides safe access to components"""
def __init__(self, component_manager: ComponentManager):
self._component_manager = component_manager
def get_component(self, name: str) -> Optional[Any]:
"""Get a component with state validation"""
component = self._component_manager.get(name)
if component and component.state == ComponentState.READY:
return component
return None
def get_component_status(self, name: str) -> Dict[str, Any]:
"""Get component status"""
return self._component_manager.get_component_status().get(name, {})
class VideoArchiver(GroupCog, Settings):
"""Archive videos from Discord channels"""
def __init__(self, bot: Red) -> None:
"""Initialize the cog with minimal setup"""
super().__init__()
self.bot = bot
self.ready = asyncio.Event()
self._init_task = None
self._cleanup_task = None
self._queue_task = None
self._unloading = False
self.db = None
self.queue_manager = None
self.processor = None
self.components = {}
self.config_manager = None
self.update_checker = None
self.ffmpeg_mgr = None
self.data_path = None
self.download_path = None
# Initialize managers
self.lifecycle_manager = LifecycleManager(self)
self.component_manager = ComponentManager(self)
self.component_accessor = ComponentAccessor(self.component_manager)
self.status = CogStatus()
# Set up commands
setup_archiver_commands(self)
setup_database_commands(self)
setup_settings_commands(self)
# Set up events - non-blocking
from .events import setup_events
# Set up events
setup_events(self)
# Register cleanup handlers
self.lifecycle_manager.register_cleanup_handler(self._cleanup_handler)
async def cog_load(self) -> None:
"""Handle cog loading without blocking"""
"""Handle cog loading"""
try:
# Start initialization as background task without waiting
self._init_task = asyncio.create_task(initialize_cog(self))
self._init_task.add_done_callback(lambda t: init_callback(self, t))
logger.info("Initialization started in background")
await self.lifecycle_manager.handle_load()
await self._start_health_monitoring()
except Exception as e:
# Ensure cleanup on any error
try:
await asyncio.wait_for(
force_cleanup_resources(self), timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.error("Force cleanup during load error timed out")
raise ProcessingError(f"Error during cog load: {str(e)}")
self.status.record_error(str(e))
raise
async def cog_unload(self) -> None:
"""Clean up when cog is unloaded with proper timeout handling"""
self._unloading = True
"""Handle cog unloading"""
try:
# Cancel any pending tasks
if self._init_task and not self._init_task.done():
self._init_task.cancel()
if self._cleanup_task and not self._cleanup_task.done():
self._cleanup_task.cancel()
# Cancel queue processing task if it exists
if (
hasattr(self, "_queue_task")
and self._queue_task
and not self._queue_task.done()
):
self._queue_task.cancel()
try:
await self._queue_task
except asyncio.CancelledError:
pass
await self.lifecycle_manager.handle_unload()
except Exception as e:
logger.error(f"Error cancelling queue task: {e}")
# Try normal cleanup first
cleanup_task = asyncio.create_task(cleanup_resources(self))
try:
await asyncio.wait_for(cleanup_task, timeout=UNLOAD_TIMEOUT)
logger.info("Normal cleanup completed")
except (asyncio.TimeoutError, Exception) as e:
if isinstance(e, asyncio.TimeoutError):
logger.warning("Normal cleanup timed out, forcing cleanup")
else:
logger.error(f"Error during normal cleanup: {str(e)}")
# Cancel normal cleanup and force cleanup
cleanup_task.cancel()
try:
# Force cleanup with timeout
await asyncio.wait_for(
force_cleanup_resources(self), timeout=CLEANUP_TIMEOUT
)
logger.info("Force cleanup completed")
except asyncio.TimeoutError:
logger.error("Force cleanup timed out")
except Exception as e:
logger.error(f"Error during force cleanup: {str(e)}")
except Exception as e:
logger.error(f"Error during cog unload: {str(e)}")
finally:
self._unloading = False
# Ensure ready flag is cleared
self.ready.clear()
# Clear all references
self.bot = None
self.processor = None
self.queue_manager = None
self.update_checker = None
self.ffmpeg_mgr = None
self.components.clear()
self.db = None
self._init_task = None
self._cleanup_task = None
if hasattr(self, "_queue_task"):
self._queue_task = None
self.status.record_error(str(e))
raise
async def cog_command_error(self, ctx, error):
"""Handle command errors"""
self.status.record_error(str(error))
await handle_command_error(ctx, error)
async def cog_before_invoke(self, ctx) -> bool:
"""Pre-command hook"""
self.status.record_command()
return True
async def _start_health_monitoring(self) -> None:
"""Start health monitoring tasks"""
asyncio.create_task(self._monitor_component_health())
asyncio.create_task(self._monitor_system_health())
async def _monitor_component_health(self) -> None:
"""Monitor component health"""
while True:
try:
component_status = self.component_manager.get_component_status()
for name, status in component_status.items():
self.status.update_health_check(
f"component_{name}",
status["state"] == ComponentState.READY.value
)
except Exception as e:
logger.error(f"Error monitoring component health: {e}")
await asyncio.sleep(60) # Check every minute
async def _monitor_system_health(self) -> None:
"""Monitor system health metrics"""
while True:
try:
# Check queue health
queue_manager = self.queue_manager
if queue_manager:
queue_status = await queue_manager.get_queue_status()
self.status.update_health_check(
"queue_health",
queue_status["active"] and not queue_status["stalled"]
)
# Check processor health
processor = self.processor
if processor:
processor_status = await processor.get_status()
self.status.update_health_check(
"processor_health",
processor_status["active"]
)
except Exception as e:
logger.error(f"Error monitoring system health: {e}")
await asyncio.sleep(30) # Check every 30 seconds
async def _cleanup_handler(self) -> None:
"""Custom cleanup handler"""
try:
# Perform any custom cleanup
pass
except Exception as e:
logger.error(f"Error in cleanup handler: {e}")
def get_status(self) -> Dict[str, Any]:
"""Get comprehensive cog status"""
return {
"cog": self.status.get_status(),
"lifecycle": self.lifecycle_manager.get_status(),
"components": self.component_manager.get_component_status(),
"errors": error_manager.tracker.get_error_stats()
}
# Component property accessors
@property
def processor(self):
"""Get the processor component"""
return self.component_accessor.get_component("processor")
@property
def queue_manager(self):
"""Get the queue manager component"""
return self.component_accessor.get_component("queue_manager")
@property
def config_manager(self):
"""Get the config manager component"""
return self.component_accessor.get_component("config_manager")
@property
def ffmpeg_mgr(self):
"""Get the FFmpeg manager component"""
return self.component_accessor.get_component("ffmpeg_mgr")
@property
def data_path(self):
"""Get the data path"""
return self.component_accessor.get_component("data_path")
@property
def download_path(self):
"""Get the download path"""
return self.component_accessor.get_component("download_path")

View File

@@ -0,0 +1,261 @@
"""Module for managing VideoArchiver components"""
import logging
import asyncio
from typing import Dict, Any, Optional, Set, List
from enum import Enum
from datetime import datetime
logger = logging.getLogger("VideoArchiver")
class ComponentState(Enum):
"""Possible states of a component"""
UNREGISTERED = "unregistered"
REGISTERED = "registered"
INITIALIZING = "initializing"
READY = "ready"
ERROR = "error"
SHUTDOWN = "shutdown"
class ComponentDependencyError(Exception):
"""Raised when component dependencies cannot be satisfied"""
pass
class ComponentLifecycleError(Exception):
"""Raised when component lifecycle operations fail"""
pass
class Component:
"""Base class for managed components"""
def __init__(self, name: str):
self.name = name
self.state = ComponentState.UNREGISTERED
self.dependencies: Set[str] = set()
self.dependents: Set[str] = set()
self.registration_time: Optional[datetime] = None
self.initialization_time: Optional[datetime] = None
self.error: Optional[str] = None
async def initialize(self) -> None:
"""Initialize the component"""
pass
async def shutdown(self) -> None:
"""Shutdown the component"""
pass
class ComponentTracker:
"""Tracks component states and relationships"""
def __init__(self):
self.states: Dict[str, ComponentState] = {}
self.history: List[Dict[str, Any]] = []
def update_state(self, name: str, state: ComponentState, error: Optional[str] = None) -> None:
"""Update component state"""
self.states[name] = state
self.history.append({
"component": name,
"state": state.value,
"timestamp": datetime.utcnow(),
"error": error
})
def get_component_history(self, name: str) -> List[Dict[str, Any]]:
"""Get state history for a component"""
return [
entry for entry in self.history
if entry["component"] == name
]
class DependencyManager:
"""Manages component dependencies"""
def __init__(self):
self.dependencies: Dict[str, Set[str]] = {}
self.dependents: Dict[str, Set[str]] = {}
def add_dependency(self, component: str, dependency: str) -> None:
"""Add a dependency relationship"""
if component not in self.dependencies:
self.dependencies[component] = set()
self.dependencies[component].add(dependency)
if dependency not in self.dependents:
self.dependents[dependency] = set()
self.dependents[dependency].add(component)
def get_dependencies(self, component: str) -> Set[str]:
"""Get dependencies for a component"""
return self.dependencies.get(component, set())
def get_dependents(self, component: str) -> Set[str]:
"""Get components that depend on this component"""
return self.dependents.get(component, set())
def get_initialization_order(self) -> List[str]:
"""Get components in dependency order"""
visited = set()
order = []
def visit(component: str) -> None:
if component in visited:
return
visited.add(component)
for dep in self.dependencies.get(component, set()):
visit(dep)
order.append(component)
for component in self.dependencies:
visit(component)
return order
class ComponentManager:
"""Manages VideoArchiver components"""
def __init__(self, cog):
self.cog = cog
self._components: Dict[str, Component] = {}
self.tracker = ComponentTracker()
self.dependency_manager = DependencyManager()
def register(
self,
name: str,
component: Any,
dependencies: Optional[Set[str]] = None
) -> None:
"""Register a component with dependencies"""
try:
# Wrap non-Component objects
if not isinstance(component, Component):
component = Component(name)
# Register dependencies
if dependencies:
for dep in dependencies:
if dep not in self._components:
raise ComponentDependencyError(
f"Dependency {dep} not registered for {name}"
)
self.dependency_manager.add_dependency(name, dep)
# Register component
self._components[name] = component
component.registration_time = datetime.utcnow()
self.tracker.update_state(name, ComponentState.REGISTERED)
logger.debug(f"Registered component: {name}")
except Exception as e:
logger.error(f"Error registering component {name}: {e}")
self.tracker.update_state(name, ComponentState.ERROR, str(e))
raise ComponentLifecycleError(f"Failed to register component: {str(e)}")
async def initialize_components(self) -> None:
"""Initialize all components in dependency order"""
try:
# Get initialization order
init_order = self.dependency_manager.get_initialization_order()
# Initialize core components first
await self._initialize_core_components()
# Initialize remaining components
for name in init_order:
if name not in self._components:
continue
component = self._components[name]
try:
self.tracker.update_state(name, ComponentState.INITIALIZING)
await component.initialize()
component.initialization_time = datetime.utcnow()
self.tracker.update_state(name, ComponentState.READY)
except Exception as e:
logger.error(f"Error initializing component {name}: {e}")
self.tracker.update_state(name, ComponentState.ERROR, str(e))
raise ComponentLifecycleError(
f"Failed to initialize component {name}: {str(e)}"
)
except Exception as e:
logger.error(f"Error during component initialization: {e}")
raise ComponentLifecycleError(f"Component initialization failed: {str(e)}")
async def _initialize_core_components(self) -> None:
"""Initialize core system components"""
from ..config_manager import ConfigManager
from ..processor.core import Processor
from ..queue.manager import QueueManager
from ..ffmpeg.ffmpeg_manager import FFmpegManager
core_components = {
"config_manager": (ConfigManager(self.cog), set()),
"processor": (Processor(self.cog), {"config_manager"}),
"queue_manager": (QueueManager(self.cog), {"config_manager"}),
"ffmpeg_mgr": (FFmpegManager(self.cog), set())
}
for name, (component, deps) in core_components.items():
self.register(name, component, deps)
# Initialize paths
await self._initialize_paths()
async def _initialize_paths(self) -> None:
"""Initialize required paths"""
from pathlib import Path
from ..utils.path_manager import ensure_directory
data_dir = Path(self.cog.bot.data_path) / "VideoArchiver"
download_dir = data_dir / "downloads"
# Ensure directories exist
await ensure_directory(data_dir)
await ensure_directory(download_dir)
# Register paths
self.register("data_path", data_dir)
self.register("download_path", download_dir)
def get(self, name: str) -> Optional[Any]:
"""Get a registered component"""
component = self._components.get(name)
return component if isinstance(component, Component) else None
async def shutdown_components(self) -> None:
"""Shutdown components in reverse dependency order"""
shutdown_order = reversed(self.dependency_manager.get_initialization_order())
for name in shutdown_order:
if name not in self._components:
continue
component = self._components[name]
try:
await component.shutdown()
self.tracker.update_state(name, ComponentState.SHUTDOWN)
except Exception as e:
logger.error(f"Error shutting down component {name}: {e}")
self.tracker.update_state(name, ComponentState.ERROR, str(e))
def clear(self) -> None:
"""Clear all registered components"""
self._components.clear()
logger.debug("Cleared all components")
def get_component_status(self) -> Dict[str, Any]:
"""Get status of all components"""
return {
name: {
"state": self.tracker.states.get(name, ComponentState.UNREGISTERED).value,
"registration_time": component.registration_time,
"initialization_time": component.initialization_time,
"dependencies": self.dependency_manager.get_dependencies(name),
"dependents": self.dependency_manager.get_dependents(name),
"error": component.error
}
for name, component in self._components.items()
}

View File

@@ -2,45 +2,201 @@
import logging
import traceback
from redbot.core.commands import Context, MissingPermissions, BotMissingPermissions, MissingRequiredArgument, BadArgument
from typing import Dict, Optional, Tuple, Type
import discord
from redbot.core.commands import (
Context,
MissingPermissions,
BotMissingPermissions,
MissingRequiredArgument,
BadArgument,
CommandError
)
from ..utils.exceptions import VideoArchiverError as ProcessingError, ConfigurationError as ConfigError
from .response_handler import handle_response
from .response_handler import response_manager
logger = logging.getLogger("VideoArchiver")
async def handle_command_error(ctx: Context, error: Exception) -> None:
"""Handle command errors"""
error_msg = None
try:
if isinstance(error, MissingPermissions):
error_msg = "❌ You don't have permission to use this command."
elif isinstance(error, BotMissingPermissions):
error_msg = "❌ I don't have the required permissions to do that."
elif isinstance(error, MissingRequiredArgument):
error_msg = f"❌ Missing required argument: {error.param.name}"
elif isinstance(error, BadArgument):
error_msg = f"❌ Invalid argument: {str(error)}"
elif isinstance(error, ConfigError):
error_msg = f"❌ Configuration error: {str(error)}"
elif isinstance(error, ProcessingError):
error_msg = f"❌ Processing error: {str(error)}"
else:
logger.error(
f"Command error in {ctx.command}: {traceback.format_exc()}"
)
error_msg = (
"❌ An unexpected error occurred. Check the logs for details."
)
class ErrorFormatter:
"""Formats error messages for display"""
if error_msg:
await handle_response(ctx, error_msg)
@staticmethod
def format_permission_error(error: Exception) -> str:
"""Format permission error messages"""
if isinstance(error, MissingPermissions):
return "You don't have permission to use this command."
elif isinstance(error, BotMissingPermissions):
return "I don't have the required permissions to do that."
return str(error)
@staticmethod
def format_argument_error(error: Exception) -> str:
"""Format argument error messages"""
if isinstance(error, MissingRequiredArgument):
return f"Missing required argument: {error.param.name}"
elif isinstance(error, BadArgument):
return f"Invalid argument: {str(error)}"
return str(error)
@staticmethod
def format_processing_error(error: ProcessingError) -> str:
"""Format processing error messages"""
return f"Processing error: {str(error)}"
@staticmethod
def format_config_error(error: ConfigError) -> str:
"""Format configuration error messages"""
return f"Configuration error: {str(error)}"
@staticmethod
def format_unexpected_error(error: Exception) -> str:
"""Format unexpected error messages"""
return "An unexpected error occurred. Check the logs for details."
class ErrorCategorizer:
"""Categorizes errors and determines handling strategy"""
ERROR_TYPES = {
MissingPermissions: ("permission", "error"),
BotMissingPermissions: ("permission", "error"),
MissingRequiredArgument: ("argument", "warning"),
BadArgument: ("argument", "warning"),
ConfigError: ("configuration", "error"),
ProcessingError: ("processing", "error"),
}
@classmethod
def categorize_error(cls, error: Exception) -> Tuple[str, str]:
"""Categorize an error and determine its severity
Returns:
Tuple[str, str]: (Error category, Severity level)
"""
for error_type, (category, severity) in cls.ERROR_TYPES.items():
if isinstance(error, error_type):
return category, severity
return "unexpected", "error"
class ErrorTracker:
"""Tracks error occurrences and patterns"""
def __init__(self):
self.error_counts: Dict[str, int] = {}
self.error_patterns: Dict[str, Dict[str, int]] = {}
def track_error(self, error: Exception, category: str) -> None:
"""Track an error occurrence"""
error_type = type(error).__name__
self.error_counts[error_type] = self.error_counts.get(error_type, 0) + 1
if category not in self.error_patterns:
self.error_patterns[category] = {}
self.error_patterns[category][error_type] = self.error_patterns[category].get(error_type, 0) + 1
def get_error_stats(self) -> Dict:
"""Get error statistics"""
return {
"counts": self.error_counts.copy(),
"patterns": self.error_patterns.copy()
}
class ErrorManager:
"""Manages error handling and reporting"""
def __init__(self):
self.formatter = ErrorFormatter()
self.categorizer = ErrorCategorizer()
self.tracker = ErrorTracker()
async def handle_error(
self,
ctx: Context,
error: Exception
) -> None:
"""Handle a command error
Args:
ctx: Command context
error: The error that occurred
"""
try:
# Categorize error
category, severity = self.categorizer.categorize_error(error)
# Track error
self.tracker.track_error(error, category)
# Format error message
error_msg = await self._format_error_message(error, category)
# Log error details
self._log_error(ctx, error, category, severity)
# Send response
await response_manager.send_response(
ctx,
content=error_msg,
response_type=severity
)
except Exception as e:
logger.error(f"Error handling command error: {str(e)}")
try:
await handle_response(
await response_manager.send_response(
ctx,
"An error occurred while handling another error. Please check the logs.",
content="An error occurred while handling another error. Please check the logs.",
response_type="error"
)
except Exception:
pass
async def _format_error_message(
self,
error: Exception,
category: str
) -> str:
"""Format error message based on category"""
try:
if category == "permission":
return self.formatter.format_permission_error(error)
elif category == "argument":
return self.formatter.format_argument_error(error)
elif category == "processing":
return self.formatter.format_processing_error(error)
elif category == "configuration":
return self.formatter.format_config_error(error)
else:
return self.formatter.format_unexpected_error(error)
except Exception as e:
logger.error(f"Error formatting error message: {e}")
return "An error occurred. Please check the logs."
def _log_error(
self,
ctx: Context,
error: Exception,
category: str,
severity: str
) -> None:
"""Log error details"""
try:
if severity == "error":
logger.error(
f"Command error in {ctx.command} (Category: {category}):\n"
f"{traceback.format_exc()}"
)
else:
logger.warning(
f"Command warning in {ctx.command} (Category: {category}):\n"
f"{str(error)}"
)
except Exception as e:
logger.error(f"Error logging error details: {e}")
# Global error manager instance
error_manager = ErrorManager()
async def handle_command_error(ctx: Context, error: Exception) -> None:
"""Helper function to handle command errors using the error manager"""
await error_manager.handle_error(ctx, error)

View File

@@ -4,65 +4,165 @@ import logging
import discord
import asyncio
import traceback
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Dict, Any, Optional
from datetime import datetime
from ..processor.reactions import REACTIONS, handle_archived_reaction
from .guild import initialize_guild_components, cleanup_guild_components
from .error_handler import error_manager
from .response_handler import response_manager
if TYPE_CHECKING:
from .base import VideoArchiver
logger = logging.getLogger("VideoArchiver")
def setup_events(cog: "VideoArchiver") -> None:
"""Set up event handlers for the cog"""
class EventTracker:
"""Tracks event occurrences and patterns"""
@cog.listener()
async def on_guild_join(guild: discord.Guild) -> None:
def __init__(self):
self.event_counts: Dict[str, int] = {}
self.last_events: Dict[str, datetime] = {}
self.error_counts: Dict[str, int] = {}
def record_event(self, event_type: str) -> None:
"""Record an event occurrence"""
self.event_counts[event_type] = self.event_counts.get(event_type, 0) + 1
self.last_events[event_type] = datetime.utcnow()
def record_error(self, event_type: str) -> None:
"""Record an event error"""
self.error_counts[event_type] = self.error_counts.get(event_type, 0) + 1
def get_stats(self) -> Dict[str, Any]:
"""Get event statistics"""
return {
"counts": self.event_counts.copy(),
"last_events": {k: v.isoformat() for k, v in self.last_events.items()},
"errors": self.error_counts.copy()
}
class GuildEventHandler:
"""Handles guild-related events"""
def __init__(self, cog: "VideoArchiver", tracker: EventTracker):
self.cog = cog
self.tracker = tracker
async def handle_guild_join(self, guild: discord.Guild) -> None:
"""Handle bot joining a new guild"""
if not cog.ready.is_set():
self.tracker.record_event("guild_join")
if not self.cog.ready.is_set():
return
try:
await initialize_guild_components(cog, guild.id)
await initialize_guild_components(self.cog, guild.id)
logger.info(f"Initialized components for new guild {guild.id}")
except Exception as e:
self.tracker.record_error("guild_join")
logger.error(f"Failed to initialize new guild {guild.id}: {str(e)}")
@cog.listener()
async def on_guild_remove(guild: discord.Guild) -> None:
async def handle_guild_remove(self, guild: discord.Guild) -> None:
"""Handle bot leaving a guild"""
self.tracker.record_event("guild_remove")
try:
await cleanup_guild_components(cog, guild.id)
await cleanup_guild_components(self.cog, guild.id)
except Exception as e:
self.tracker.record_error("guild_remove")
logger.error(f"Error cleaning up removed guild {guild.id}: {str(e)}")
@cog.listener()
async def on_message(message: discord.Message) -> None:
class MessageEventHandler:
"""Handles message-related events"""
def __init__(self, cog: "VideoArchiver", tracker: EventTracker):
self.cog = cog
self.tracker = tracker
async def handle_message(self, message: discord.Message) -> None:
"""Handle new messages for video processing"""
self.tracker.record_event("message")
# Skip if not ready or if message is from DM/bot
if not cog.ready.is_set() or message.guild is None or message.author.bot:
if not self.cog.ready.is_set() or message.guild is None or message.author.bot:
return
# Skip if message is a command
ctx = await cog.bot.get_context(message)
ctx = await self.cog.bot.get_context(message)
if ctx.valid:
return
# Process message in background task to avoid blocking
asyncio.create_task(process_message_background(cog, message))
# Process message in background task
asyncio.create_task(self._process_message_background(message))
@cog.listener()
async def on_raw_reaction_add(payload: discord.RawReactionActionEvent) -> None:
async def _process_message_background(self, message: discord.Message) -> None:
"""Process message in background to avoid blocking"""
try:
await self.cog.processor.process_message(message)
except Exception as e:
self.tracker.record_error("message_processing")
await self._handle_processing_error(message, e)
async def _handle_processing_error(
self,
message: discord.Message,
error: Exception
) -> None:
"""Handle message processing errors"""
logger.error(
f"Error processing message {message.id}: {traceback.format_exc()}"
)
try:
log_channel = await self.cog.config_manager.get_channel(
message.guild, "log"
)
if log_channel:
await response_manager.send_response(
log_channel,
content=(
f"Error processing message: {str(error)}\n"
f"Message ID: {message.id}\n"
f"Channel: {message.channel.mention}"
),
response_type="error"
)
except Exception as log_error:
logger.error(f"Failed to log error to guild: {str(log_error)}")
class ReactionEventHandler:
"""Handles reaction-related events"""
def __init__(self, cog: "VideoArchiver", tracker: EventTracker):
self.cog = cog
self.tracker = tracker
async def handle_reaction_add(
self,
payload: discord.RawReactionActionEvent
) -> None:
"""Handle reactions to messages"""
if payload.user_id == cog.bot.user.id:
self.tracker.record_event("reaction_add")
if payload.user_id == self.cog.bot.user.id:
return
try:
await self._process_reaction(payload)
except Exception as e:
self.tracker.record_error("reaction_processing")
logger.error(f"Error handling reaction: {e}")
async def _process_reaction(
self,
payload: discord.RawReactionActionEvent
) -> None:
"""Process a reaction event"""
# Get the channel and message
channel = cog.bot.get_channel(payload.channel_id)
channel = self.cog.bot.get_channel(payload.channel_id)
if not channel:
return
message = await channel.fetch_message(payload.message_id)
if not message:
return
@@ -70,31 +170,41 @@ def setup_events(cog: "VideoArchiver") -> None:
# Check if it's the archived reaction
if str(payload.emoji) == REACTIONS["archived"]:
# Only process if database is enabled
if cog.db:
user = cog.bot.get_user(payload.user_id)
# Process reaction in background task
asyncio.create_task(handle_archived_reaction(message, user, cog.db))
if self.cog.db:
user = self.cog.bot.get_user(payload.user_id)
asyncio.create_task(
handle_archived_reaction(message, user, self.cog.db)
)
except Exception as e:
logger.error(f"Error handling reaction: {e}")
class EventManager:
"""Manages Discord event handling"""
async def process_message_background(cog: "VideoArchiver", message: discord.Message) -> None:
"""Process message in background to avoid blocking"""
try:
await cog.processor.process_message(message)
except Exception as e:
logger.error(
f"Error processing message {message.id}: {traceback.format_exc()}"
)
try:
log_channel = await cog.config_manager.get_channel(
message.guild, "log"
)
if log_channel:
await log_channel.send(
f"Error processing message: {str(e)}\n"
f"Message ID: {message.id}\n"
f"Channel: {message.channel.mention}"
)
except Exception as log_error:
logger.error(f"Failed to log error to guild: {str(log_error)}")
def __init__(self, cog: "VideoArchiver"):
self.tracker = EventTracker()
self.guild_handler = GuildEventHandler(cog, self.tracker)
self.message_handler = MessageEventHandler(cog, self.tracker)
self.reaction_handler = ReactionEventHandler(cog, self.tracker)
def get_stats(self) -> Dict[str, Any]:
"""Get event statistics"""
return self.tracker.get_stats()
def setup_events(cog: "VideoArchiver") -> None:
"""Set up event handlers for the cog"""
event_manager = EventManager(cog)
@cog.listener()
async def on_guild_join(guild: discord.Guild) -> None:
await event_manager.guild_handler.handle_guild_join(guild)
@cog.listener()
async def on_guild_remove(guild: discord.Guild) -> None:
await event_manager.guild_handler.handle_guild_remove(guild)
@cog.listener()
async def on_message(message: discord.Message) -> None:
await event_manager.message_handler.handle_message(message)
@cog.listener()
async def on_raw_reaction_add(payload: discord.RawReactionActionEvent) -> None:
await event_manager.reaction_handler.handle_reaction_add(payload)

View File

@@ -4,6 +4,7 @@ import logging
import asyncio
import traceback
from pathlib import Path
from typing import Dict, Any, Optional
from redbot.core import Config, data_manager
from ..config_manager import ConfigManager
@@ -17,39 +18,82 @@ from ..utils.exceptions import VideoArchiverError as ProcessingError
logger = logging.getLogger("VideoArchiver")
# Constants for timeouts
INIT_TIMEOUT = 60 # seconds
COMPONENT_INIT_TIMEOUT = 30 # seconds
CLEANUP_TIMEOUT = 15 # seconds
class InitializationTracker:
"""Tracks initialization progress"""
async def initialize_cog(cog) -> None:
"""Initialize all components with proper error handling"""
def __init__(self):
self.total_steps = 8 # Total number of initialization steps
self.current_step = 0
self.current_component = ""
self.errors: Dict[str, str] = {}
def start_step(self, component: str) -> None:
"""Start a new initialization step"""
self.current_step += 1
self.current_component = component
logger.info(f"Initializing {component} ({self.current_step}/{self.total_steps})")
def record_error(self, component: str, error: str) -> None:
"""Record an initialization error"""
self.errors[component] = error
logger.error(f"Error initializing {component}: {error}")
def get_progress(self) -> Dict[str, Any]:
"""Get current initialization progress"""
return {
"progress": (self.current_step / self.total_steps) * 100,
"current_component": self.current_component,
"errors": self.errors.copy()
}
class ComponentInitializer:
"""Handles initialization of individual components"""
def __init__(self, cog, tracker: InitializationTracker):
self.cog = cog
self.tracker = tracker
async def init_config(self) -> None:
"""Initialize configuration manager"""
self.tracker.start_step("Config Manager")
try:
# Initialize config first as other components depend on it
config = Config.get_conf(cog, identifier=855847, force_registration=True)
config.register_guild(**cog.default_guild_settings)
cog.config_manager = ConfigManager(config)
config = Config.get_conf(self.cog, identifier=855847, force_registration=True)
config.register_guild(**self.cog.default_guild_settings)
self.cog.config_manager = ConfigManager(config)
logger.info("Config manager initialized")
# Set up paths
cog.data_path = Path(data_manager.cog_data_path(cog))
cog.download_path = cog.data_path / "downloads"
cog.download_path.mkdir(parents=True, exist_ok=True)
logger.info("Paths initialized")
# Clean existing downloads
try:
await cleanup_downloads(str(cog.download_path))
except Exception as e:
logger.warning(f"Download cleanup error: {e}")
self.tracker.record_error("Config Manager", str(e))
raise
# Initialize shared FFmpeg manager
cog.ffmpeg_mgr = FFmpegManager()
async def init_paths(self) -> None:
"""Initialize data paths"""
self.tracker.start_step("Paths")
try:
self.cog.data_path = Path(data_manager.cog_data_path(self.cog))
self.cog.download_path = self.cog.data_path / "downloads"
self.cog.download_path.mkdir(parents=True, exist_ok=True)
logger.info("Paths initialized")
except Exception as e:
self.tracker.record_error("Paths", str(e))
raise
# Initialize queue manager
queue_path = cog.data_path / "queue_state.json"
async def init_ffmpeg(self) -> None:
"""Initialize FFmpeg manager"""
self.tracker.start_step("FFmpeg Manager")
try:
self.cog.ffmpeg_mgr = FFmpegManager()
logger.info("FFmpeg manager initialized")
except Exception as e:
self.tracker.record_error("FFmpeg Manager", str(e))
raise
async def init_queue(self) -> None:
"""Initialize queue manager"""
self.tracker.start_step("Queue Manager")
try:
queue_path = self.cog.data_path / "queue_state.json"
queue_path.parent.mkdir(parents=True, exist_ok=True)
cog.queue_manager = EnhancedVideoQueueManager(
self.cog.queue_manager = EnhancedVideoQueueManager(
max_retries=3,
retry_delay=5,
max_queue_size=1000,
@@ -57,44 +101,115 @@ async def initialize_cog(cog) -> None:
max_history_age=86400,
persistence_path=str(queue_path),
)
await cog.queue_manager.initialize()
# Initialize processor
cog.processor = VideoProcessor(
cog.bot,
cog.config_manager,
cog.components,
queue_manager=cog.queue_manager,
ffmpeg_mgr=cog.ffmpeg_mgr,
db=cog.db,
)
# Initialize components for existing guilds
for guild in cog.bot.guilds:
try:
await initialize_guild_components(cog, guild.id)
await self.cog.queue_manager.initialize()
logger.info("Queue manager initialized")
except Exception as e:
logger.error(f"Failed to initialize guild {guild.id}: {str(e)}")
continue
self.tracker.record_error("Queue Manager", str(e))
raise
# Initialize update checker
cog.update_checker = UpdateChecker(cog.bot, cog.config_manager)
await cog.update_checker.start()
# Start queue processing as a background task
cog._queue_task = asyncio.create_task(
cog.queue_manager.process_queue(cog.processor.process_video)
async def init_processor(self) -> None:
"""Initialize video processor"""
self.tracker.start_step("Video Processor")
try:
self.cog.processor = VideoProcessor(
self.cog.bot,
self.cog.config_manager,
self.cog.components,
queue_manager=self.cog.queue_manager,
ffmpeg_mgr=self.cog.ffmpeg_mgr,
db=self.cog.db,
)
logger.info("Video processor initialized")
except Exception as e:
self.tracker.record_error("Video Processor", str(e))
raise
async def init_guilds(self) -> None:
"""Initialize guild components"""
self.tracker.start_step("Guild Components")
errors = []
for guild in self.cog.bot.guilds:
try:
await initialize_guild_components(self.cog, guild.id)
except Exception as e:
errors.append(f"Guild {guild.id}: {str(e)}")
logger.error(f"Failed to initialize guild {guild.id}: {str(e)}")
if errors:
self.tracker.record_error("Guild Components", "; ".join(errors))
async def init_update_checker(self) -> None:
"""Initialize update checker"""
self.tracker.start_step("Update Checker")
try:
self.cog.update_checker = UpdateChecker(self.cog.bot, self.cog.config_manager)
await self.cog.update_checker.start()
logger.info("Update checker initialized")
except Exception as e:
self.tracker.record_error("Update Checker", str(e))
raise
async def start_queue_processing(self) -> None:
"""Start queue processing"""
self.tracker.start_step("Queue Processing")
try:
self.cog._queue_task = asyncio.create_task(
self.cog.queue_manager.process_queue(self.cog.processor.process_video)
)
logger.info("Queue processing started")
except Exception as e:
self.tracker.record_error("Queue Processing", str(e))
raise
class InitializationManager:
"""Manages VideoArchiver initialization"""
def __init__(self, cog):
self.cog = cog
self.tracker = InitializationTracker()
self.component_initializer = ComponentInitializer(cog, self.tracker)
async def initialize(self) -> None:
"""Initialize all components"""
try:
# Initialize components in sequence
await self.component_initializer.init_config()
await self.component_initializer.init_paths()
# Clean existing downloads
try:
await cleanup_downloads(str(self.cog.download_path))
except Exception as e:
logger.warning(f"Download cleanup error: {e}")
await self.component_initializer.init_ffmpeg()
await self.component_initializer.init_queue()
await self.component_initializer.init_processor()
await self.component_initializer.init_guilds()
await self.component_initializer.init_update_checker()
await self.component_initializer.start_queue_processing()
# Set ready flag
cog.ready.set()
self.cog.ready.set()
logger.info("VideoArchiver initialization completed successfully")
except Exception as e:
logger.error(f"Error during initialization: {str(e)}")
await cleanup_resources(cog)
await cleanup_resources(self.cog)
raise
def get_progress(self) -> Dict[str, Any]:
"""Get initialization progress"""
return self.tracker.get_progress()
# Global initialization manager instance
init_manager: Optional[InitializationManager] = None
async def initialize_cog(cog) -> None:
"""Initialize all components with proper error handling"""
global init_manager
init_manager = InitializationManager(cog)
await init_manager.initialize()
def init_callback(cog, task: asyncio.Task) -> None:
"""Handle initialization task completion"""
try:

View File

@@ -0,0 +1,239 @@
"""Module for managing VideoArchiver lifecycle"""
import asyncio
import logging
from typing import Optional, Dict, Any, Set
from enum import Enum
from datetime import datetime
from .cleanup import cleanup_resources, force_cleanup_resources
from ..utils.exceptions import VideoArchiverError
from .initialization import initialize_cog, init_callback
logger = logging.getLogger("VideoArchiver")
class LifecycleState(Enum):
"""Possible states in the cog lifecycle"""
UNINITIALIZED = "uninitialized"
INITIALIZING = "initializing"
READY = "ready"
UNLOADING = "unloading"
ERROR = "error"
class TaskManager:
"""Manages asyncio tasks"""
def __init__(self):
self._tasks: Dict[str, asyncio.Task] = {}
self._task_history: Dict[str, Dict[str, Any]] = {}
async def create_task(
self,
name: str,
coro,
callback=None
) -> asyncio.Task:
"""Create and track a task"""
task = asyncio.create_task(coro)
self._tasks[name] = task
self._task_history[name] = {
"start_time": datetime.utcnow(),
"status": "running"
}
if callback:
task.add_done_callback(lambda t: self._handle_completion(name, t, callback))
else:
task.add_done_callback(lambda t: self._handle_completion(name, t))
return task
def _handle_completion(
self,
name: str,
task: asyncio.Task,
callback=None
) -> None:
"""Handle task completion"""
try:
task.result() # Raises exception if task failed
status = "completed"
except asyncio.CancelledError:
status = "cancelled"
except Exception as e:
status = "failed"
logger.error(f"Task {name} failed: {e}")
self._task_history[name].update({
"end_time": datetime.utcnow(),
"status": status
})
if callback:
try:
callback(task)
except Exception as e:
logger.error(f"Task callback error for {name}: {e}")
self._tasks.pop(name, None)
async def cancel_task(self, name: str) -> None:
"""Cancel a specific task"""
if task := self._tasks.get(name):
if not task.done():
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
except Exception as e:
logger.error(f"Error cancelling task {name}: {e}")
async def cancel_all_tasks(self) -> None:
"""Cancel all tracked tasks"""
for name in list(self._tasks.keys()):
await self.cancel_task(name)
def get_task_status(self) -> Dict[str, Any]:
"""Get status of all tasks"""
return {
"active_tasks": list(self._tasks.keys()),
"history": self._task_history.copy()
}
class StateTracker:
"""Tracks lifecycle state and transitions"""
def __init__(self):
self.state = LifecycleState.UNINITIALIZED
self.state_history: List[Dict[str, Any]] = []
self._record_state()
def set_state(self, state: LifecycleState) -> None:
"""Set current state"""
self.state = state
self._record_state()
def _record_state(self) -> None:
"""Record state transition"""
self.state_history.append({
"state": self.state.value,
"timestamp": datetime.utcnow()
})
def get_state_history(self) -> List[Dict[str, Any]]:
"""Get state transition history"""
return self.state_history.copy()
class LifecycleManager:
"""Manages the lifecycle of the VideoArchiver cog"""
def __init__(self, cog):
self.cog = cog
self.task_manager = TaskManager()
self.state_tracker = StateTracker()
self._cleanup_handlers: Set[callable] = set()
def register_cleanup_handler(self, handler: callable) -> None:
"""Register a cleanup handler"""
self._cleanup_handlers.add(handler)
async def handle_load(self) -> None:
"""Handle cog loading without blocking"""
try:
self.state_tracker.set_state(LifecycleState.INITIALIZING)
# Start initialization as background task
await self.task_manager.create_task(
"initialization",
initialize_cog(self.cog),
lambda t: init_callback(self.cog, t)
)
logger.info("Initialization started in background")
except Exception as e:
self.state_tracker.set_state(LifecycleState.ERROR)
# Ensure cleanup on any error
try:
await asyncio.wait_for(
force_cleanup_resources(self.cog),
timeout=15 # CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.error("Force cleanup during load error timed out")
raise VideoArchiverError(f"Error during cog load: {str(e)}")
async def handle_unload(self) -> None:
"""Clean up when cog is unloaded"""
self.state_tracker.set_state(LifecycleState.UNLOADING)
try:
# Cancel all tasks
await self.task_manager.cancel_all_tasks()
# Run cleanup handlers
await self._run_cleanup_handlers()
# Try normal cleanup
try:
cleanup_task = await self.task_manager.create_task(
"cleanup",
cleanup_resources(self.cog)
)
await asyncio.wait_for(cleanup_task, timeout=30) # UNLOAD_TIMEOUT
logger.info("Normal cleanup completed")
except (asyncio.TimeoutError, Exception) as e:
if isinstance(e, asyncio.TimeoutError):
logger.warning("Normal cleanup timed out, forcing cleanup")
else:
logger.error(f"Error during normal cleanup: {str(e)}")
# Force cleanup
try:
await asyncio.wait_for(
force_cleanup_resources(self.cog),
timeout=15 # CLEANUP_TIMEOUT
)
logger.info("Force cleanup completed")
except asyncio.TimeoutError:
logger.error("Force cleanup timed out")
except Exception as e:
logger.error(f"Error during force cleanup: {str(e)}")
except Exception as e:
logger.error(f"Error during cog unload: {str(e)}")
self.state_tracker.set_state(LifecycleState.ERROR)
finally:
# Clear all references
await self._cleanup_references()
async def _run_cleanup_handlers(self) -> None:
"""Run all registered cleanup handlers"""
for handler in self._cleanup_handlers:
try:
if asyncio.iscoroutinefunction(handler):
await handler()
else:
handler()
except Exception as e:
logger.error(f"Error in cleanup handler: {e}")
async def _cleanup_references(self) -> None:
"""Clean up all references"""
self.cog.ready.clear()
self.cog.bot = None
self.cog.processor = None
self.cog.queue_manager = None
self.cog.update_checker = None
self.cog.ffmpeg_mgr = None
self.cog.components.clear()
self.cog.db = None
def get_status(self) -> Dict[str, Any]:
"""Get current lifecycle status"""
return {
"state": self.state_tracker.state.value,
"state_history": self.state_tracker.get_state_history(),
"tasks": self.task_manager.get_task_status()
}

View File

@@ -2,77 +2,197 @@
import logging
import discord
from typing import Optional, Union, Dict, Any
from redbot.core.commands import Context
logger = logging.getLogger("VideoArchiver")
async def handle_response(ctx: Context, content: str = None, embed: discord.Embed = None) -> None:
"""Helper method to handle responses for both regular commands and interactions"""
try:
# Check if this is a slash command interaction
is_interaction = hasattr(ctx, "interaction") and ctx.interaction is not None
class ResponseFormatter:
"""Formats responses for consistency"""
if is_interaction:
@staticmethod
def format_success(message: str) -> Dict[str, Any]:
"""Format a success message"""
return {
"content": f"{message}",
"color": discord.Color.green()
}
@staticmethod
def format_error(message: str) -> Dict[str, Any]:
"""Format an error message"""
return {
"content": f"{message}",
"color": discord.Color.red()
}
@staticmethod
def format_warning(message: str) -> Dict[str, Any]:
"""Format a warning message"""
return {
"content": f"⚠️ {message}",
"color": discord.Color.yellow()
}
@staticmethod
def format_info(message: str) -> Dict[str, Any]:
"""Format an info message"""
return {
"content": f" {message}",
"color": discord.Color.blue()
}
class InteractionHandler:
"""Handles slash command interactions"""
@staticmethod
async def send_initial_response(
interaction: discord.Interaction,
content: Optional[str] = None,
embed: Optional[discord.Embed] = None
) -> bool:
"""Send initial interaction response"""
try:
# For slash commands
if not ctx.interaction.response.is_done():
# If not responded yet, send initial response
if not interaction.response.is_done():
if embed:
await ctx.interaction.response.send_message(
content=content, embed=embed
)
await interaction.response.send_message(content=content, embed=embed)
else:
await ctx.interaction.response.send_message(content=content)
else:
# If already responded (deferred), use followup
try:
if embed:
await ctx.interaction.followup.send(
content=content, embed=embed
)
else:
await ctx.interaction.followup.send(content=content)
except AttributeError:
# Fallback if followup is not available
if embed:
await ctx.send(content=content, embed=embed)
else:
await ctx.send(content=content)
except discord.errors.InteractionResponded:
# If interaction was already responded to, try followup
try:
if embed:
await ctx.interaction.followup.send(
content=content, embed=embed
)
else:
await ctx.interaction.followup.send(content=content)
except (AttributeError, discord.errors.HTTPException):
# Final fallback to regular message
if embed:
await ctx.send(content=content, embed=embed)
else:
await ctx.send(content=content)
await interaction.response.send_message(content=content)
return True
return False
except Exception as e:
logger.error(f"Error handling interaction response: {e}")
# Fallback to regular message
logger.error(f"Error sending initial interaction response: {e}")
return False
@staticmethod
async def send_followup(
interaction: discord.Interaction,
content: Optional[str] = None,
embed: Optional[discord.Embed] = None
) -> bool:
"""Send interaction followup"""
try:
if embed:
await ctx.send(content=content, embed=embed)
await interaction.followup.send(content=content, embed=embed)
else:
await ctx.send(content=content)
await interaction.followup.send(content=content)
return True
except Exception as e:
logger.error(f"Error sending interaction followup: {e}")
return False
class ResponseManager:
"""Manages command responses"""
def __init__(self):
self.formatter = ResponseFormatter()
self.interaction_handler = InteractionHandler()
async def send_response(
self,
ctx: Context,
content: Optional[str] = None,
embed: Optional[discord.Embed] = None,
response_type: str = "normal"
) -> None:
"""Send a response to a command
Args:
ctx: Command context
content: Optional message content
embed: Optional embed
response_type: Type of response (normal, success, error, warning, info)
"""
try:
# Format response if type specified
if response_type != "normal":
format_method = getattr(self.formatter, f"format_{response_type}", None)
if format_method and content:
formatted = format_method(content)
content = formatted["content"]
if not embed:
embed = discord.Embed(color=formatted["color"])
# Handle response
if self._is_interaction(ctx):
await self._handle_interaction_response(ctx, content, embed)
else:
# Regular command response
if embed:
await ctx.send(content=content, embed=embed)
else:
await ctx.send(content=content)
await self._handle_regular_response(ctx, content, embed)
except Exception as e:
logger.error(f"Error sending response: {e}")
# Final fallback attempt
await self._send_fallback_response(ctx, content, embed)
def _is_interaction(self, ctx: Context) -> bool:
"""Check if context is from an interaction"""
return hasattr(ctx, "interaction") and ctx.interaction is not None
async def _handle_interaction_response(
self,
ctx: Context,
content: Optional[str],
embed: Optional[discord.Embed]
) -> None:
"""Handle interaction response"""
try:
# Try initial response
if await self.interaction_handler.send_initial_response(
ctx.interaction, content, embed
):
return
# Try followup
if await self.interaction_handler.send_followup(
ctx.interaction, content, embed
):
return
# Fallback to regular message
await self._handle_regular_response(ctx, content, embed)
except Exception as e:
logger.error(f"Error handling interaction response: {e}")
await self._send_fallback_response(ctx, content, embed)
async def _handle_regular_response(
self,
ctx: Context,
content: Optional[str],
embed: Optional[discord.Embed]
) -> None:
"""Handle regular command response"""
try:
if embed:
await ctx.send(content=content, embed=embed)
else:
await ctx.send(content=content)
except Exception as e2:
logger.error(f"Failed to send fallback message: {e2}")
except Exception as e:
logger.error(f"Error sending regular response: {e}")
await self._send_fallback_response(ctx, content, embed)
async def _send_fallback_response(
self,
ctx: Context,
content: Optional[str],
embed: Optional[discord.Embed]
) -> None:
"""Send fallback response when other methods fail"""
try:
if embed:
await ctx.send(content=content, embed=embed)
else:
await ctx.send(content=content)
except Exception as e:
logger.error(f"Failed to send fallback response: {e}")
# Global response manager instance
response_manager = ResponseManager()
async def handle_response(
ctx: Context,
content: Optional[str] = None,
embed: Optional[discord.Embed] = None,
response_type: str = "normal"
) -> None:
"""Helper function to handle responses using the response manager"""
await response_manager.send_response(ctx, content, embed, response_type)

View File

@@ -0,0 +1,228 @@
"""Module for managing VideoArchiver settings"""
from typing import Dict, Any, List, Optional
from dataclasses import dataclass
from enum import Enum
class VideoFormat(Enum):
"""Supported video formats"""
MP4 = "mp4"
WEBM = "webm"
MKV = "mkv"
class VideoQuality(Enum):
"""Video quality presets"""
LOW = "low" # 480p
MEDIUM = "medium" # 720p
HIGH = "high" # 1080p
ULTRA = "ultra" # 4K
@dataclass
class SettingDefinition:
"""Defines a setting's properties"""
name: str
category: str
default_value: Any
description: str
data_type: type
required: bool = True
min_value: Optional[int] = None
max_value: Optional[int] = None
choices: Optional[List[Any]] = None
depends_on: Optional[str] = None
class SettingCategory(Enum):
"""Setting categories"""
GENERAL = "general"
CHANNELS = "channels"
PERMISSIONS = "permissions"
VIDEO = "video"
MESSAGES = "messages"
PERFORMANCE = "performance"
FEATURES = "features"
class Settings:
"""Manages VideoArchiver settings"""
# Setting definitions
SETTINGS = {
"enabled": SettingDefinition(
name="enabled",
category=SettingCategory.GENERAL.value,
default_value=False,
description="Whether the archiver is enabled for this guild",
data_type=bool
),
"archive_channel": SettingDefinition(
name="archive_channel",
category=SettingCategory.CHANNELS.value,
default_value=None,
description="Channel where archived videos are posted",
data_type=int,
required=False
),
"log_channel": SettingDefinition(
name="log_channel",
category=SettingCategory.CHANNELS.value,
default_value=None,
description="Channel for logging archiver actions",
data_type=int,
required=False
),
"enabled_channels": SettingDefinition(
name="enabled_channels",
category=SettingCategory.CHANNELS.value,
default_value=[],
description="Channels to monitor (empty means all channels)",
data_type=list
),
"allowed_roles": SettingDefinition(
name="allowed_roles",
category=SettingCategory.PERMISSIONS.value,
default_value=[],
description="Roles allowed to use archiver (empty means all roles)",
data_type=list
),
"video_format": SettingDefinition(
name="video_format",
category=SettingCategory.VIDEO.value,
default_value=VideoFormat.MP4.value,
description="Format for archived videos",
data_type=str,
choices=[format.value for format in VideoFormat]
),
"video_quality": SettingDefinition(
name="video_quality",
category=SettingCategory.VIDEO.value,
default_value=VideoQuality.HIGH.value,
description="Quality preset for archived videos",
data_type=str,
choices=[quality.value for quality in VideoQuality]
),
"max_file_size": SettingDefinition(
name="max_file_size",
category=SettingCategory.VIDEO.value,
default_value=8,
description="Maximum file size in MB",
data_type=int,
min_value=1,
max_value=100
),
"message_duration": SettingDefinition(
name="message_duration",
category=SettingCategory.MESSAGES.value,
default_value=30,
description="Duration to show status messages (seconds)",
data_type=int,
min_value=5,
max_value=300
),
"message_template": SettingDefinition(
name="message_template",
category=SettingCategory.MESSAGES.value,
default_value="{author} archived a video from {channel}",
description="Template for archive messages",
data_type=str
),
"concurrent_downloads": SettingDefinition(
name="concurrent_downloads",
category=SettingCategory.PERFORMANCE.value,
default_value=2,
description="Maximum concurrent downloads",
data_type=int,
min_value=1,
max_value=5
),
"enabled_sites": SettingDefinition(
name="enabled_sites",
category=SettingCategory.FEATURES.value,
default_value=None,
description="Sites to enable archiving for (None means all sites)",
data_type=list,
required=False
),
"use_database": SettingDefinition(
name="use_database",
category=SettingCategory.FEATURES.value,
default_value=False,
description="Enable database tracking of archived videos",
data_type=bool
),
}
@classmethod
def get_setting_definition(cls, setting: str) -> Optional[SettingDefinition]:
"""Get definition for a setting"""
return cls.SETTINGS.get(setting)
@classmethod
def get_settings_by_category(cls, category: str) -> Dict[str, SettingDefinition]:
"""Get all settings in a category"""
return {
name: definition
for name, definition in cls.SETTINGS.items()
if definition.category == category
}
@classmethod
def validate_setting(cls, setting: str, value: Any) -> bool:
"""Validate a setting value"""
definition = cls.get_setting_definition(setting)
if not definition:
return False
# Check type
if not isinstance(value, definition.data_type):
return False
# Check required
if definition.required and value is None:
return False
# Check choices
if definition.choices and value not in definition.choices:
return False
# Check numeric bounds
if isinstance(value, (int, float)):
if definition.min_value is not None and value < definition.min_value:
return False
if definition.max_value is not None and value > definition.max_value:
return False
return True
@property
def default_guild_settings(self) -> Dict[str, Any]:
"""Default settings for guild configuration"""
return {
name: definition.default_value
for name, definition in self.SETTINGS.items()
}
@classmethod
def get_setting_help(cls, setting: str) -> Optional[str]:
"""Get help text for a setting"""
definition = cls.get_setting_definition(setting)
if not definition:
return None
help_text = [
f"Setting: {definition.name}",
f"Category: {definition.category}",
f"Description: {definition.description}",
f"Type: {definition.data_type.__name__}",
f"Required: {definition.required}",
f"Default: {definition.default_value}"
]
if definition.choices:
help_text.append(f"Choices: {', '.join(map(str, definition.choices))}")
if definition.min_value is not None:
help_text.append(f"Minimum: {definition.min_value}")
if definition.max_value is not None:
help_text.append(f"Maximum: {definition.max_value}")
if definition.depends_on:
help_text.append(f"Depends on: {definition.depends_on}")
return "\n".join(help_text)

View File

@@ -0,0 +1,190 @@
"""Module for managing database connections"""
import logging
import sqlite3
from pathlib import Path
from contextlib import contextmanager
from typing import Generator, Optional
import threading
from queue import Queue, Empty
logger = logging.getLogger("DBConnectionManager")
class ConnectionManager:
"""Manages SQLite database connections and connection pooling"""
def __init__(self, db_path: Path, pool_size: int = 5):
"""Initialize the connection manager
Args:
db_path: Path to the SQLite database file
pool_size: Maximum number of connections in the pool
"""
self.db_path = db_path
self.pool_size = pool_size
self._connection_pool: Queue[sqlite3.Connection] = Queue(maxsize=pool_size)
self._local = threading.local()
self._lock = threading.Lock()
# Initialize connection pool
self._initialize_pool()
def _initialize_pool(self) -> None:
"""Initialize the connection pool"""
try:
for _ in range(self.pool_size):
conn = self._create_connection()
if conn:
self._connection_pool.put(conn)
except Exception as e:
logger.error(f"Error initializing connection pool: {e}")
raise
def _create_connection(self) -> Optional[sqlite3.Connection]:
"""Create a new database connection with proper settings"""
try:
conn = sqlite3.connect(
self.db_path,
detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES,
timeout=30.0 # 30 second timeout
)
# Enable foreign keys
conn.execute("PRAGMA foreign_keys = ON")
# Set journal mode to WAL for better concurrency
conn.execute("PRAGMA journal_mode = WAL")
# Set synchronous mode to NORMAL for better performance
conn.execute("PRAGMA synchronous = NORMAL")
# Enable extended result codes for better error handling
conn.execute("PRAGMA extended_result_codes = ON")
return conn
except sqlite3.Error as e:
logger.error(f"Error creating database connection: {e}")
return None
@contextmanager
def get_connection(self) -> Generator[sqlite3.Connection, None, None]:
"""Get a database connection from the pool
Yields:
sqlite3.Connection: A database connection
Raises:
sqlite3.Error: If unable to get a connection
"""
conn = None
try:
# Check if we have a transaction-bound connection
conn = getattr(self._local, 'transaction_connection', None)
if conn is not None:
yield conn
return
# Get connection from pool or create new one
try:
conn = self._connection_pool.get(timeout=5.0)
except Empty:
logger.warning("Connection pool exhausted, creating new connection")
conn = self._create_connection()
if not conn:
raise sqlite3.Error("Failed to create database connection")
yield conn
except Exception as e:
logger.error(f"Error getting database connection: {e}")
if conn:
try:
conn.rollback()
except Exception:
pass
raise
finally:
if conn and not hasattr(self._local, 'transaction_connection'):
try:
conn.rollback() # Reset connection state
self._connection_pool.put(conn)
except Exception as e:
logger.error(f"Error returning connection to pool: {e}")
try:
conn.close()
except Exception:
pass
@contextmanager
def transaction(self) -> Generator[sqlite3.Connection, None, None]:
"""Start a database transaction
Yields:
sqlite3.Connection: A database connection for the transaction
Raises:
sqlite3.Error: If unable to start transaction
"""
if hasattr(self._local, 'transaction_connection'):
raise sqlite3.Error("Nested transactions are not supported")
conn = None
try:
# Get connection from pool
try:
conn = self._connection_pool.get(timeout=5.0)
except Empty:
logger.warning("Connection pool exhausted, creating new connection")
conn = self._create_connection()
if not conn:
raise sqlite3.Error("Failed to create database connection")
# Bind connection to current thread
self._local.transaction_connection = conn
# Start transaction
conn.execute("BEGIN")
yield conn
# Commit transaction
conn.commit()
except Exception as e:
logger.error(f"Error in database transaction: {e}")
if conn:
try:
conn.rollback()
except Exception:
pass
raise
finally:
if conn:
try:
# Remove thread-local binding
delattr(self._local, 'transaction_connection')
# Return connection to pool
self._connection_pool.put(conn)
except Exception as e:
logger.error(f"Error cleaning up transaction: {e}")
try:
conn.close()
except Exception:
pass
def close_all(self) -> None:
"""Close all connections in the pool"""
with self._lock:
while not self._connection_pool.empty():
try:
conn = self._connection_pool.get_nowait()
try:
conn.close()
except Exception as e:
logger.error(f"Error closing connection: {e}")
except Empty:
break

View File

@@ -0,0 +1,197 @@
"""Module for managing database queries"""
import logging
import sqlite3
from typing import Optional, Tuple, List, Dict, Any
from datetime import datetime
logger = logging.getLogger("DBQueryManager")
class QueryManager:
"""Manages database queries and operations"""
def __init__(self, connection_manager):
self.connection_manager = connection_manager
async def add_archived_video(
self,
original_url: str,
discord_url: str,
message_id: int,
channel_id: int,
guild_id: int,
metadata: Optional[Dict[str, Any]] = None
) -> bool:
"""Add a newly archived video to the database"""
try:
with self.connection_manager.get_connection() as conn:
cursor = conn.cursor()
# Prepare query and parameters
query = """
INSERT OR REPLACE INTO archived_videos
(original_url, discord_url, message_id, channel_id, guild_id,
file_size, duration, format, resolution, bitrate)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"""
# Extract metadata values with defaults
metadata = metadata or {}
params = (
original_url,
discord_url,
message_id,
channel_id,
guild_id,
metadata.get('file_size'),
metadata.get('duration'),
metadata.get('format'),
metadata.get('resolution'),
metadata.get('bitrate')
)
cursor.execute(query, params)
conn.commit()
return True
except sqlite3.Error as e:
logger.error(f"Error adding archived video: {e}")
return False
async def get_archived_video(
self,
url: str
) -> Optional[Dict[str, Any]]:
"""Get archived video information by original URL"""
try:
with self.connection_manager.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT discord_url, message_id, channel_id, guild_id,
file_size, duration, format, resolution, bitrate,
archived_at
FROM archived_videos
WHERE original_url = ?
""", (url,))
result = cursor.fetchone()
if not result:
return None
return {
'discord_url': result[0],
'message_id': result[1],
'channel_id': result[2],
'guild_id': result[3],
'file_size': result[4],
'duration': result[5],
'format': result[6],
'resolution': result[7],
'bitrate': result[8],
'archived_at': result[9]
}
except sqlite3.Error as e:
logger.error(f"Error retrieving archived video: {e}")
return None
async def is_url_archived(self, url: str) -> bool:
"""Check if a URL has already been archived"""
try:
with self.connection_manager.get_connection() as conn:
cursor = conn.cursor()
cursor.execute(
"SELECT 1 FROM archived_videos WHERE original_url = ?",
(url,)
)
return cursor.fetchone() is not None
except sqlite3.Error as e:
logger.error(f"Error checking archived status: {e}")
return False
async def get_guild_stats(self, guild_id: int) -> Dict[str, Any]:
"""Get archiving statistics for a guild"""
try:
with self.connection_manager.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT
COUNT(*) as total_videos,
SUM(file_size) as total_size,
AVG(duration) as avg_duration,
MAX(archived_at) as last_archived
FROM archived_videos
WHERE guild_id = ?
""", (guild_id,))
result = cursor.fetchone()
return {
'total_videos': result[0],
'total_size': result[1] or 0,
'avg_duration': result[2] or 0,
'last_archived': result[3]
}
except sqlite3.Error as e:
logger.error(f"Error getting guild stats: {e}")
return {
'total_videos': 0,
'total_size': 0,
'avg_duration': 0,
'last_archived': None
}
async def get_channel_videos(
self,
channel_id: int,
limit: int = 100,
offset: int = 0
) -> List[Dict[str, Any]]:
"""Get archived videos for a channel"""
try:
with self.connection_manager.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT original_url, discord_url, message_id,
file_size, duration, format, resolution,
archived_at
FROM archived_videos
WHERE channel_id = ?
ORDER BY archived_at DESC
LIMIT ? OFFSET ?
""", (channel_id, limit, offset))
results = cursor.fetchall()
return [{
'original_url': row[0],
'discord_url': row[1],
'message_id': row[2],
'file_size': row[3],
'duration': row[4],
'format': row[5],
'resolution': row[6],
'archived_at': row[7]
} for row in results]
except sqlite3.Error as e:
logger.error(f"Error getting channel videos: {e}")
return []
async def cleanup_old_records(self, days: int) -> int:
"""Clean up records older than specified days"""
try:
with self.connection_manager.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
DELETE FROM archived_videos
WHERE archived_at < datetime('now', ? || ' days')
""", (-days,))
deleted = cursor.rowcount
conn.commit()
return deleted
except sqlite3.Error as e:
logger.error(f"Error cleaning up old records: {e}")
return 0

View File

@@ -0,0 +1,109 @@
"""Module for managing database schema"""
import logging
import sqlite3
from pathlib import Path
from typing import List
logger = logging.getLogger("DBSchemaManager")
class SchemaManager:
"""Manages database schema creation and updates"""
SCHEMA_VERSION = 1 # Increment when schema changes
def __init__(self, db_path: Path):
self.db_path = db_path
def initialize_schema(self) -> None:
"""Initialize or update the database schema"""
try:
self._create_schema_version_table()
current_version = self._get_schema_version()
if current_version < self.SCHEMA_VERSION:
self._apply_migrations(current_version)
self._update_schema_version()
except sqlite3.Error as e:
logger.error(f"Schema initialization error: {e}")
raise
def _create_schema_version_table(self) -> None:
"""Create schema version tracking table"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS schema_version (
version INTEGER PRIMARY KEY
)
""")
# Insert initial version if table is empty
cursor.execute("INSERT OR IGNORE INTO schema_version VALUES (0)")
conn.commit()
def _get_schema_version(self) -> int:
"""Get current schema version"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("SELECT version FROM schema_version LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
def _update_schema_version(self) -> None:
"""Update schema version to current"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"UPDATE schema_version SET version = ?",
(self.SCHEMA_VERSION,)
)
conn.commit()
def _apply_migrations(self, current_version: int) -> None:
"""Apply necessary schema migrations"""
migrations = self._get_migrations(current_version)
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
for migration in migrations:
try:
cursor.executescript(migration)
conn.commit()
except sqlite3.Error as e:
logger.error(f"Migration failed: {e}")
raise
def _get_migrations(self, current_version: int) -> List[str]:
"""Get list of migrations to apply"""
migrations = []
# Version 0 to 1: Initial schema
if current_version < 1:
migrations.append("""
CREATE TABLE IF NOT EXISTS archived_videos (
original_url TEXT PRIMARY KEY,
discord_url TEXT NOT NULL,
message_id INTEGER NOT NULL,
channel_id INTEGER NOT NULL,
guild_id INTEGER NOT NULL,
archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
file_size INTEGER,
duration INTEGER,
format TEXT,
resolution TEXT,
bitrate INTEGER
);
CREATE INDEX IF NOT EXISTS idx_guild_channel
ON archived_videos(guild_id, channel_id);
CREATE INDEX IF NOT EXISTS idx_archived_at
ON archived_videos(archived_at);
""")
# Add more migrations here as schema evolves
# if current_version < 2:
# migrations.append(...)
return migrations

View File

@@ -1,8 +1,12 @@
"""Database management for archived videos"""
import sqlite3
import logging
from pathlib import Path
from typing import Optional, Tuple
from typing import Optional, Dict, Any, List
from .schema_manager import SchemaManager
from .query_manager import QueryManager
from .connection_manager import ConnectionManager
logger = logging.getLogger("VideoArchiverDB")
@@ -10,70 +14,84 @@ class VideoArchiveDB:
"""Manages the SQLite database for archived videos"""
def __init__(self, data_path: Path):
"""Initialize the database connection"""
"""Initialize the database and its components
Args:
data_path: Path to the data directory
"""
# Set up database path
self.db_path = data_path / "archived_videos.db"
self.db_path.parent.mkdir(parents=True, exist_ok=True)
self._init_db()
def _init_db(self):
"""Initialize the database schema"""
try:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS archived_videos (
original_url TEXT PRIMARY KEY,
discord_url TEXT NOT NULL,
message_id INTEGER NOT NULL,
channel_id INTEGER NOT NULL,
guild_id INTEGER NOT NULL,
archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
except sqlite3.Error as e:
logger.error(f"Database initialization error: {e}")
raise
# Initialize managers
self.connection_manager = ConnectionManager(self.db_path)
self.schema_manager = SchemaManager(self.db_path)
self.query_manager = QueryManager(self.connection_manager)
def add_archived_video(self, original_url: str, discord_url: str, message_id: int, channel_id: int, guild_id: int) -> bool:
# Initialize database schema
self.schema_manager.initialize_schema()
logger.info("Video archive database initialized successfully")
async def add_archived_video(
self,
original_url: str,
discord_url: str,
message_id: int,
channel_id: int,
guild_id: int,
metadata: Optional[Dict[str, Any]] = None
) -> bool:
"""Add a newly archived video to the database"""
try:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
INSERT OR REPLACE INTO archived_videos
(original_url, discord_url, message_id, channel_id, guild_id)
VALUES (?, ?, ?, ?, ?)
""", (original_url, discord_url, message_id, channel_id, guild_id))
conn.commit()
return True
except sqlite3.Error as e:
logger.error(f"Error adding archived video: {e}")
return False
return await self.query_manager.add_archived_video(
original_url,
discord_url,
message_id,
channel_id,
guild_id,
metadata
)
def get_archived_video(self, url: str) -> Optional[Tuple[str, int, int, int]]:
async def get_archived_video(self, url: str) -> Optional[Dict[str, Any]]:
"""Get archived video information by original URL"""
try:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT discord_url, message_id, channel_id, guild_id
FROM archived_videos
WHERE original_url = ?
""", (url,))
result = cursor.fetchone()
return result if result else None
except sqlite3.Error as e:
logger.error(f"Error retrieving archived video: {e}")
return None
return await self.query_manager.get_archived_video(url)
def is_url_archived(self, url: str) -> bool:
async def is_url_archived(self, url: str) -> bool:
"""Check if a URL has already been archived"""
return await self.query_manager.is_url_archived(url)
async def get_guild_stats(self, guild_id: int) -> Dict[str, Any]:
"""Get archiving statistics for a guild"""
return await self.query_manager.get_guild_stats(guild_id)
async def get_channel_videos(
self,
channel_id: int,
limit: int = 100,
offset: int = 0
) -> List[Dict[str, Any]]:
"""Get archived videos for a channel"""
return await self.query_manager.get_channel_videos(
channel_id,
limit,
offset
)
async def cleanup_old_records(self, days: int) -> int:
"""Clean up records older than specified days"""
return await self.query_manager.cleanup_old_records(days)
def close(self) -> None:
"""Close all database connections"""
try:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("SELECT 1 FROM archived_videos WHERE original_url = ?", (url,))
return cursor.fetchone() is not None
except sqlite3.Error as e:
logger.error(f"Error checking archived status: {e}")
return False
self.connection_manager.close_all()
logger.info("Database connections closed")
except Exception as e:
logger.error(f"Error closing database connections: {e}")
async def __aenter__(self):
"""Async context manager entry"""
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit"""
self.close()

View File

@@ -0,0 +1,163 @@
"""Module for managing FFmpeg binaries"""
import logging
import os
from pathlib import Path
from typing import Dict, Optional
from .exceptions import (
FFmpegError,
DownloadError,
VerificationError,
PermissionError,
FFmpegNotFoundError
)
from .ffmpeg_downloader import FFmpegDownloader
from .verification_manager import VerificationManager
logger = logging.getLogger("FFmpegBinaryManager")
class BinaryManager:
"""Manages FFmpeg binary files and their lifecycle"""
def __init__(
self,
base_dir: Path,
system: str,
machine: str,
verification_manager: VerificationManager
):
self.base_dir = base_dir
self.verification_manager = verification_manager
# Initialize downloader
self.downloader = FFmpegDownloader(
system=system,
machine=machine,
base_dir=base_dir
)
self._ffmpeg_path: Optional[Path] = None
self._ffprobe_path: Optional[Path] = None
def initialize_binaries(self, gpu_info: Dict[str, bool]) -> Dict[str, Path]:
"""Initialize FFmpeg and FFprobe binaries
Args:
gpu_info: Dictionary of GPU availability
Returns:
Dict[str, Path]: Paths to FFmpeg and FFprobe binaries
Raises:
FFmpegError: If initialization fails
"""
try:
# Verify existing binaries if they exist
if self._verify_existing_binaries(gpu_info):
return self._get_binary_paths()
# Download and verify binaries
logger.info("Downloading FFmpeg and FFprobe...")
try:
binaries = self.downloader.download()
self._ffmpeg_path = binaries["ffmpeg"]
self._ffprobe_path = binaries["ffprobe"]
except Exception as e:
raise DownloadError(f"Failed to download FFmpeg: {e}")
# Verify downloaded binaries
self._verify_binaries(gpu_info)
return self._get_binary_paths()
except Exception as e:
logger.error(f"Failed to initialize binaries: {e}")
if isinstance(e, (DownloadError, VerificationError, PermissionError)):
raise
raise FFmpegError(f"Failed to initialize binaries: {e}")
def _verify_existing_binaries(self, gpu_info: Dict[str, bool]) -> bool:
"""Verify existing binary files if they exist
Returns:
bool: True if existing binaries are valid
"""
if (self.downloader.ffmpeg_path.exists() and
self.downloader.ffprobe_path.exists()):
logger.info(f"Found existing FFmpeg: {self.downloader.ffmpeg_path}")
logger.info(f"Found existing FFprobe: {self.downloader.ffprobe_path}")
try:
self._ffmpeg_path = self.downloader.ffmpeg_path
self._ffprobe_path = self.downloader.ffprobe_path
self._verify_binaries(gpu_info)
return True
except Exception as e:
logger.warning(f"Existing binaries verification failed: {e}")
return False
return False
def _verify_binaries(self, gpu_info: Dict[str, bool]) -> None:
"""Verify binary files and set permissions"""
try:
# Set permissions
self.verification_manager.verify_binary_permissions(self._ffmpeg_path)
self.verification_manager.verify_binary_permissions(self._ffprobe_path)
# Verify functionality
self.verification_manager.verify_ffmpeg(
self._ffmpeg_path,
self._ffprobe_path,
gpu_info
)
except Exception as e:
self._ffmpeg_path = None
self._ffprobe_path = None
raise VerificationError(f"Binary verification failed: {e}")
def _get_binary_paths(self) -> Dict[str, Path]:
"""Get paths to FFmpeg binaries
Returns:
Dict[str, Path]: Paths to FFmpeg and FFprobe binaries
Raises:
FFmpegNotFoundError: If binaries are not available
"""
if not self._ffmpeg_path or not self._ffprobe_path:
raise FFmpegNotFoundError("FFmpeg binaries not initialized")
return {
"ffmpeg": self._ffmpeg_path,
"ffprobe": self._ffprobe_path
}
def force_download(self, gpu_info: Dict[str, bool]) -> bool:
"""Force re-download of FFmpeg binaries
Returns:
bool: True if download and verification successful
"""
try:
logger.info("Force downloading FFmpeg...")
binaries = self.downloader.download()
self._ffmpeg_path = binaries["ffmpeg"]
self._ffprobe_path = binaries["ffprobe"]
self._verify_binaries(gpu_info)
return True
except Exception as e:
logger.error(f"Failed to force download FFmpeg: {e}")
return False
def get_ffmpeg_path(self) -> str:
"""Get path to FFmpeg binary"""
if not self._ffmpeg_path or not self._ffmpeg_path.exists():
raise FFmpegNotFoundError("FFmpeg is not available")
return str(self._ffmpeg_path)
def get_ffprobe_path(self) -> str:
"""Get path to FFprobe binary"""
if not self._ffprobe_path or not self._ffprobe_path.exists():
raise FFmpegNotFoundError("FFprobe is not available")
return str(self._ffprobe_path)

View File

@@ -1,44 +1,28 @@
"""Main FFmpeg management module"""
import os
import logging
import platform
import multiprocessing
import logging
import subprocess
import traceback
import signal
import psutil
from pathlib import Path
from typing import Dict, Any, Optional, Set
from typing import Dict, Any, Optional
from videoarchiver.ffmpeg.exceptions import (
from .exceptions import (
FFmpegError,
DownloadError,
VerificationError,
EncodingError,
AnalysisError,
GPUError,
HardwareAccelerationError,
FFmpegNotFoundError,
FFprobeError,
CompressionError,
FormatError,
PermissionError,
TimeoutError,
ResourceError,
QualityError,
AudioError,
BitrateError,
handle_ffmpeg_error
FFmpegNotFoundError
)
from videoarchiver.ffmpeg.gpu_detector import GPUDetector
from videoarchiver.ffmpeg.video_analyzer import VideoAnalyzer
from videoarchiver.ffmpeg.encoder_params import EncoderParams
from videoarchiver.ffmpeg.ffmpeg_downloader import FFmpegDownloader
from .gpu_detector import GPUDetector
from .video_analyzer import VideoAnalyzer
from .encoder_params import EncoderParams
from .process_manager import ProcessManager
from .verification_manager import VerificationManager
from .binary_manager import BinaryManager
logger = logging.getLogger("VideoArchiver")
class FFmpegManager:
"""Manages FFmpeg operations and lifecycle"""
def __init__(self):
"""Initialize FFmpeg manager"""
# Set up base directory in videoarchiver/bin
@@ -46,228 +30,39 @@ class FFmpegManager:
self.base_dir = module_dir / "bin"
logger.info(f"FFmpeg base directory: {self.base_dir}")
# Initialize downloader
self.downloader = FFmpegDownloader(
# Initialize managers
self.process_manager = ProcessManager()
self.verification_manager = VerificationManager(self.process_manager)
self.binary_manager = BinaryManager(
base_dir=self.base_dir,
system=platform.system(),
machine=platform.machine(),
base_dir=self.base_dir
verification_manager=self.verification_manager
)
# Get or download FFmpeg and FFprobe
binaries = self._initialize_binaries()
self.ffmpeg_path = binaries["ffmpeg"]
self.ffprobe_path = binaries["ffprobe"]
logger.info(f"Using FFmpeg from: {self.ffmpeg_path}")
logger.info(f"Using FFprobe from: {self.ffprobe_path}")
# Initialize components
self.gpu_detector = GPUDetector(self.ffmpeg_path)
self.video_analyzer = VideoAnalyzer(self.ffmpeg_path)
self.gpu_detector = GPUDetector(self.get_ffmpeg_path)
self.video_analyzer = VideoAnalyzer(self.get_ffmpeg_path)
self._gpu_info = self.gpu_detector.detect_gpu()
self._cpu_cores = multiprocessing.cpu_count()
# Initialize encoder params
self.encoder_params = EncoderParams(self._cpu_cores, self._gpu_info)
# Track active FFmpeg processes
self._active_processes: Set[subprocess.Popen] = set()
# Verify FFmpeg functionality
self._verify_ffmpeg()
# Initialize binaries
binaries = self.binary_manager.initialize_binaries(self._gpu_info)
logger.info(f"Using FFmpeg from: {binaries['ffmpeg']}")
logger.info(f"Using FFprobe from: {binaries['ffprobe']}")
logger.info("FFmpeg manager initialized successfully")
def kill_all_processes(self) -> None:
"""Kill all active FFmpeg processes"""
try:
# First try graceful termination
for process in self._active_processes:
try:
if process.poll() is None: # Process is still running
process.terminate()
except Exception as e:
logger.error(f"Error terminating FFmpeg process: {e}")
# Give processes a moment to terminate
import time
time.sleep(0.5)
# Force kill any remaining processes
for process in self._active_processes:
try:
if process.poll() is None: # Process is still running
process.kill()
except Exception as e:
logger.error(f"Error killing FFmpeg process: {e}")
# Find and kill any orphaned FFmpeg processes
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
if 'ffmpeg' in proc.info['name'].lower():
proc.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
except Exception as e:
logger.error(f"Error killing orphaned FFmpeg process: {e}")
self._active_processes.clear()
logger.info("All FFmpeg processes terminated")
except Exception as e:
logger.error(f"Error killing FFmpeg processes: {e}")
def _initialize_binaries(self) -> Dict[str, Path]:
"""Initialize FFmpeg and FFprobe binaries with proper error handling"""
try:
# Verify existing binaries if they exist
if self.downloader.ffmpeg_path.exists() and self.downloader.ffprobe_path.exists():
logger.info(f"Found existing FFmpeg: {self.downloader.ffmpeg_path}")
logger.info(f"Found existing FFprobe: {self.downloader.ffprobe_path}")
if self.downloader.verify():
# Set executable permissions
if platform.system() != "Windows":
try:
os.chmod(str(self.downloader.ffmpeg_path), 0o755)
os.chmod(str(self.downloader.ffprobe_path), 0o755)
except Exception as e:
raise PermissionError(f"Failed to set binary permissions: {e}")
return {
"ffmpeg": self.downloader.ffmpeg_path,
"ffprobe": self.downloader.ffprobe_path
}
else:
logger.warning("Existing binaries are not functional, downloading new copies")
# Download and verify binaries
logger.info("Downloading FFmpeg and FFprobe...")
try:
binaries = self.downloader.download()
except Exception as e:
raise DownloadError(f"Failed to download FFmpeg: {e}")
if not self.downloader.verify():
raise VerificationError("Downloaded binaries are not functional")
# Set executable permissions
try:
if platform.system() != "Windows":
os.chmod(str(binaries["ffmpeg"]), 0o755)
os.chmod(str(binaries["ffprobe"]), 0o755)
except Exception as e:
raise PermissionError(f"Failed to set binary permissions: {e}")
return binaries
except Exception as e:
logger.error(f"Failed to initialize binaries: {e}")
if isinstance(e, (DownloadError, VerificationError, PermissionError)):
raise
raise FFmpegError(f"Failed to initialize binaries: {e}")
def _verify_ffmpeg(self) -> None:
"""Verify FFmpeg functionality with comprehensive checks"""
try:
# Check FFmpeg version with enhanced error handling
version_cmd = [str(self.ffmpeg_path), "-version"]
try:
result = subprocess.run(
version_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=10,
check=False, # Don't raise on non-zero return code
env={"PATH": os.environ.get("PATH", "")} # Ensure PATH is set
)
except subprocess.TimeoutExpired:
raise TimeoutError("FFmpeg version check timed out")
except Exception as e:
raise VerificationError(f"FFmpeg version check failed: {e}")
if result.returncode != 0:
error = handle_ffmpeg_error(result.stderr)
logger.error(f"FFmpeg version check failed: {result.stderr}")
raise error
logger.info(f"FFmpeg version: {result.stdout.split()[2]}")
# Check FFprobe version with enhanced error handling
probe_cmd = [str(self.ffprobe_path), "-version"]
try:
result = subprocess.run(
probe_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=10,
check=False, # Don't raise on non-zero return code
env={"PATH": os.environ.get("PATH", "")} # Ensure PATH is set
)
except subprocess.TimeoutExpired:
raise TimeoutError("FFprobe version check timed out")
except Exception as e:
raise VerificationError(f"FFprobe version check failed: {e}")
if result.returncode != 0:
error = handle_ffmpeg_error(result.stderr)
logger.error(f"FFprobe version check failed: {result.stderr}")
raise error
logger.info(f"FFprobe version: {result.stdout.split()[2]}")
# Check FFmpeg capabilities with enhanced error handling
caps_cmd = [str(self.ffmpeg_path), "-hide_banner", "-encoders"]
try:
result = subprocess.run(
caps_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=10,
check=False, # Don't raise on non-zero return code
env={"PATH": os.environ.get("PATH", "")} # Ensure PATH is set
)
except subprocess.TimeoutExpired:
raise TimeoutError("FFmpeg capabilities check timed out")
except Exception as e:
raise VerificationError(f"FFmpeg capabilities check failed: {e}")
if result.returncode != 0:
error = handle_ffmpeg_error(result.stderr)
logger.error(f"FFmpeg capabilities check failed: {result.stderr}")
raise error
# Verify encoders
required_encoders = ["libx264"]
if self._gpu_info["nvidia"]:
required_encoders.append("h264_nvenc")
elif self._gpu_info["amd"]:
required_encoders.append("h264_amf")
elif self._gpu_info["intel"]:
required_encoders.append("h264_qsv")
available_encoders = result.stdout.lower()
missing_encoders = [
encoder for encoder in required_encoders
if encoder not in available_encoders
]
if missing_encoders:
logger.warning(f"Missing encoders: {', '.join(missing_encoders)}")
if "libx264" in missing_encoders:
raise EncodingError("Required encoder libx264 not available")
logger.info("FFmpeg verification completed successfully")
except Exception as e:
logger.error(f"FFmpeg verification failed: {traceback.format_exc()}")
if isinstance(e, (TimeoutError, EncodingError, VerificationError)):
raise
raise VerificationError(f"FFmpeg verification failed: {e}")
self.process_manager.kill_all_processes()
def analyze_video(self, input_path: str) -> Dict[str, Any]:
"""Analyze video content for optimal encoding settings"""
try:
if not os.path.exists(input_path):
if not input_path or not Path(input_path).exists():
raise FileNotFoundError(f"Input file not found: {input_path}")
return self.video_analyzer.analyze_video(input_path)
except Exception as e:
@@ -307,27 +102,15 @@ class FFmpegManager:
def get_ffmpeg_path(self) -> str:
"""Get path to FFmpeg binary"""
if not self.ffmpeg_path.exists():
raise FFmpegNotFoundError("FFmpeg is not available")
return str(self.ffmpeg_path)
return self.binary_manager.get_ffmpeg_path()
def get_ffprobe_path(self) -> str:
"""Get path to FFprobe binary"""
if not self.ffprobe_path.exists():
raise FFmpegNotFoundError("FFprobe is not available")
return str(self.ffprobe_path)
return self.binary_manager.get_ffprobe_path()
def force_download(self) -> bool:
"""Force re-download of FFmpeg binary"""
try:
logger.info("Force downloading FFmpeg...")
binaries = self.downloader.download()
self.ffmpeg_path = binaries["ffmpeg"]
self.ffprobe_path = binaries["ffprobe"]
return self.downloader.verify()
except Exception as e:
logger.error(f"Failed to force download FFmpeg: {e}")
return False
return self.binary_manager.force_download(self._gpu_info)
@property
def gpu_info(self) -> Dict[str, bool]:

View File

@@ -0,0 +1,127 @@
"""Module for managing FFmpeg processes"""
import logging
import psutil
import subprocess
import time
from typing import Set, Optional
logger = logging.getLogger("FFmpegProcessManager")
class ProcessManager:
"""Manages FFmpeg process execution and lifecycle"""
def __init__(self):
self._active_processes: Set[subprocess.Popen] = set()
def add_process(self, process: subprocess.Popen) -> None:
"""Add a process to track"""
self._active_processes.add(process)
def remove_process(self, process: subprocess.Popen) -> None:
"""Remove a process from tracking"""
self._active_processes.discard(process)
def kill_all_processes(self) -> None:
"""Kill all active FFmpeg processes"""
try:
# First try graceful termination
self._terminate_processes()
# Give processes a moment to terminate
time.sleep(0.5)
# Force kill any remaining processes
self._kill_remaining_processes()
# Find and kill any orphaned FFmpeg processes
self._kill_orphaned_processes()
self._active_processes.clear()
logger.info("All FFmpeg processes terminated")
except Exception as e:
logger.error(f"Error killing FFmpeg processes: {e}")
def _terminate_processes(self) -> None:
"""Attempt graceful termination of processes"""
for process in self._active_processes:
try:
if process.poll() is None: # Process is still running
process.terminate()
except Exception as e:
logger.error(f"Error terminating FFmpeg process: {e}")
def _kill_remaining_processes(self) -> None:
"""Force kill any remaining processes"""
for process in self._active_processes:
try:
if process.poll() is None: # Process is still running
process.kill()
except Exception as e:
logger.error(f"Error killing FFmpeg process: {e}")
def _kill_orphaned_processes(self) -> None:
"""Find and kill any orphaned FFmpeg processes"""
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
if 'ffmpeg' in proc.info['name'].lower():
proc.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
except Exception as e:
logger.error(f"Error killing orphaned FFmpeg process: {e}")
def execute_command(
self,
command: list,
timeout: Optional[int] = None,
check: bool = False
) -> subprocess.CompletedProcess:
"""Execute an FFmpeg command with proper process management
Args:
command: Command list to execute
timeout: Optional timeout in seconds
check: Whether to check return code
Returns:
subprocess.CompletedProcess: Result of command execution
"""
process = None
try:
process = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
self.add_process(process)
stdout, stderr = process.communicate(timeout=timeout)
result = subprocess.CompletedProcess(
args=command,
returncode=process.returncode,
stdout=stdout,
stderr=stderr
)
if check and process.returncode != 0:
raise subprocess.CalledProcessError(
returncode=process.returncode,
cmd=command,
output=stdout,
stderr=stderr
)
return result
except subprocess.TimeoutExpired:
if process:
process.kill()
_, stderr = process.communicate()
raise
finally:
if process:
self.remove_process(process)

View File

@@ -0,0 +1,160 @@
"""Module for verifying FFmpeg functionality"""
import logging
import os
import subprocess
from pathlib import Path
from typing import Dict, List, Optional
from .exceptions import (
TimeoutError,
VerificationError,
EncodingError,
handle_ffmpeg_error
)
logger = logging.getLogger("FFmpegVerification")
class VerificationManager:
"""Handles verification of FFmpeg functionality"""
def __init__(self, process_manager):
self.process_manager = process_manager
def verify_ffmpeg(
self,
ffmpeg_path: Path,
ffprobe_path: Path,
gpu_info: Dict[str, bool]
) -> None:
"""Verify FFmpeg functionality with comprehensive checks
Args:
ffmpeg_path: Path to FFmpeg binary
ffprobe_path: Path to FFprobe binary
gpu_info: Dictionary of GPU availability
Raises:
VerificationError: If verification fails
TimeoutError: If verification times out
EncodingError: If required encoders are missing
"""
try:
# Check FFmpeg version
self._verify_ffmpeg_version(ffmpeg_path)
# Check FFprobe version
self._verify_ffprobe_version(ffprobe_path)
# Check FFmpeg capabilities
self._verify_ffmpeg_capabilities(ffmpeg_path, gpu_info)
logger.info("FFmpeg verification completed successfully")
except Exception as e:
logger.error(f"FFmpeg verification failed: {e}")
if isinstance(e, (TimeoutError, EncodingError, VerificationError)):
raise
raise VerificationError(f"FFmpeg verification failed: {e}")
def _verify_ffmpeg_version(self, ffmpeg_path: Path) -> None:
"""Verify FFmpeg version"""
try:
result = self._execute_command(
[str(ffmpeg_path), "-version"],
"FFmpeg version check"
)
logger.info(f"FFmpeg version: {result.stdout.split()[2]}")
except Exception as e:
raise VerificationError(f"FFmpeg version check failed: {e}")
def _verify_ffprobe_version(self, ffprobe_path: Path) -> None:
"""Verify FFprobe version"""
try:
result = self._execute_command(
[str(ffprobe_path), "-version"],
"FFprobe version check"
)
logger.info(f"FFprobe version: {result.stdout.split()[2]}")
except Exception as e:
raise VerificationError(f"FFprobe version check failed: {e}")
def _verify_ffmpeg_capabilities(
self,
ffmpeg_path: Path,
gpu_info: Dict[str, bool]
) -> None:
"""Verify FFmpeg capabilities and encoders"""
try:
result = self._execute_command(
[str(ffmpeg_path), "-hide_banner", "-encoders"],
"FFmpeg capabilities check"
)
# Verify required encoders
required_encoders = self._get_required_encoders(gpu_info)
available_encoders = result.stdout.lower()
missing_encoders = [
encoder for encoder in required_encoders
if encoder not in available_encoders
]
if missing_encoders:
logger.warning(f"Missing encoders: {', '.join(missing_encoders)}")
if "libx264" in missing_encoders:
raise EncodingError("Required encoder libx264 not available")
except Exception as e:
if isinstance(e, EncodingError):
raise
raise VerificationError(f"FFmpeg capabilities check failed: {e}")
def _execute_command(
self,
command: List[str],
operation: str,
timeout: int = 10
) -> subprocess.CompletedProcess:
"""Execute a command with proper error handling"""
try:
result = self.process_manager.execute_command(
command,
timeout=timeout,
check=False
)
if result.returncode != 0:
error = handle_ffmpeg_error(result.stderr)
logger.error(f"{operation} failed: {result.stderr}")
raise error
return result
except subprocess.TimeoutExpired:
raise TimeoutError(f"{operation} timed out")
except Exception as e:
if isinstance(e, (TimeoutError, EncodingError)):
raise
raise VerificationError(f"{operation} failed: {e}")
def _get_required_encoders(self, gpu_info: Dict[str, bool]) -> List[str]:
"""Get list of required encoders based on GPU availability"""
required_encoders = ["libx264"]
if gpu_info["nvidia"]:
required_encoders.append("h264_nvenc")
elif gpu_info["amd"]:
required_encoders.append("h264_amf")
elif gpu_info["intel"]:
required_encoders.append("h264_qsv")
return required_encoders
def verify_binary_permissions(self, binary_path: Path) -> None:
"""Verify and set binary permissions"""
try:
if os.name != "nt": # Not Windows
os.chmod(str(binary_path), 0o755)
except Exception as e:
raise VerificationError(f"Failed to set binary permissions: {e}")

View File

@@ -0,0 +1,252 @@
"""Module for managing cleanup operations in the video processor"""
import logging
import asyncio
from enum import Enum
from dataclasses import dataclass
from typing import Optional, Dict, Any, List, Set
from datetime import datetime
logger = logging.getLogger("VideoArchiver")
class CleanupStage(Enum):
"""Cleanup stages"""
QUEUE = "queue"
FFMPEG = "ffmpeg"
TASKS = "tasks"
RESOURCES = "resources"
class CleanupStrategy(Enum):
"""Cleanup strategies"""
NORMAL = "normal"
FORCE = "force"
GRACEFUL = "graceful"
@dataclass
class CleanupResult:
"""Result of a cleanup operation"""
success: bool
stage: CleanupStage
error: Optional[str] = None
duration: float = 0.0
class CleanupTracker:
"""Tracks cleanup operations"""
def __init__(self):
self.cleanup_history: List[Dict[str, Any]] = []
self.active_cleanups: Set[str] = set()
self.start_times: Dict[str, datetime] = {}
self.stage_results: Dict[str, List[CleanupResult]] = {}
def start_cleanup(self, cleanup_id: str) -> None:
"""Start tracking a cleanup operation"""
self.active_cleanups.add(cleanup_id)
self.start_times[cleanup_id] = datetime.utcnow()
self.stage_results[cleanup_id] = []
def record_stage_result(
self,
cleanup_id: str,
result: CleanupResult
) -> None:
"""Record result of a cleanup stage"""
if cleanup_id in self.stage_results:
self.stage_results[cleanup_id].append(result)
def end_cleanup(self, cleanup_id: str) -> None:
"""End tracking a cleanup operation"""
if cleanup_id in self.active_cleanups:
end_time = datetime.utcnow()
self.cleanup_history.append({
"id": cleanup_id,
"start_time": self.start_times[cleanup_id],
"end_time": end_time,
"duration": (end_time - self.start_times[cleanup_id]).total_seconds(),
"results": self.stage_results[cleanup_id]
})
self.active_cleanups.remove(cleanup_id)
self.start_times.pop(cleanup_id)
self.stage_results.pop(cleanup_id)
def get_cleanup_stats(self) -> Dict[str, Any]:
"""Get cleanup statistics"""
return {
"total_cleanups": len(self.cleanup_history),
"active_cleanups": len(self.active_cleanups),
"success_rate": self._calculate_success_rate(),
"average_duration": self._calculate_average_duration(),
"stage_success_rates": self._calculate_stage_success_rates()
}
def _calculate_success_rate(self) -> float:
"""Calculate overall cleanup success rate"""
if not self.cleanup_history:
return 1.0
successful = sum(
1 for cleanup in self.cleanup_history
if all(result.success for result in cleanup["results"])
)
return successful / len(self.cleanup_history)
def _calculate_average_duration(self) -> float:
"""Calculate average cleanup duration"""
if not self.cleanup_history:
return 0.0
total_duration = sum(cleanup["duration"] for cleanup in self.cleanup_history)
return total_duration / len(self.cleanup_history)
def _calculate_stage_success_rates(self) -> Dict[str, float]:
"""Calculate success rates by stage"""
stage_attempts: Dict[str, int] = {}
stage_successes: Dict[str, int] = {}
for cleanup in self.cleanup_history:
for result in cleanup["results"]:
stage = result.stage.value
stage_attempts[stage] = stage_attempts.get(stage, 0) + 1
if result.success:
stage_successes[stage] = stage_successes.get(stage, 0) + 1
return {
stage: stage_successes.get(stage, 0) / attempts
for stage, attempts in stage_attempts.items()
}
class CleanupManager:
"""Manages cleanup operations for the video processor"""
def __init__(
self,
queue_handler,
ffmpeg_mgr: Optional[object] = None,
strategy: CleanupStrategy = CleanupStrategy.NORMAL
):
self.queue_handler = queue_handler
self.ffmpeg_mgr = ffmpeg_mgr
self.strategy = strategy
self._queue_task: Optional[asyncio.Task] = None
self.tracker = CleanupTracker()
async def cleanup(self) -> None:
"""Perform normal cleanup of resources"""
cleanup_id = f"cleanup_{datetime.utcnow().timestamp()}"
self.tracker.start_cleanup(cleanup_id)
try:
logger.info("Starting normal cleanup...")
# Clean up in stages
stages = [
(CleanupStage.QUEUE, self._cleanup_queue),
(CleanupStage.FFMPEG, self._cleanup_ffmpeg),
(CleanupStage.TASKS, self._cleanup_tasks)
]
for stage, cleanup_func in stages:
try:
start_time = datetime.utcnow()
await cleanup_func()
duration = (datetime.utcnow() - start_time).total_seconds()
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(True, stage, duration=duration)
)
except Exception as e:
logger.error(f"Error in {stage.value} cleanup: {e}")
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(False, stage, str(e))
)
if self.strategy != CleanupStrategy.GRACEFUL:
raise
logger.info("Normal cleanup completed successfully")
except Exception as e:
logger.error(f"Error during normal cleanup: {str(e)}", exc_info=True)
raise
finally:
self.tracker.end_cleanup(cleanup_id)
async def force_cleanup(self) -> None:
"""Force cleanup of resources when normal cleanup fails"""
cleanup_id = f"force_cleanup_{datetime.utcnow().timestamp()}"
self.tracker.start_cleanup(cleanup_id)
try:
logger.info("Starting force cleanup...")
# Force cleanup in stages
stages = [
(CleanupStage.QUEUE, self._force_cleanup_queue),
(CleanupStage.FFMPEG, self._force_cleanup_ffmpeg),
(CleanupStage.TASKS, self._force_cleanup_tasks)
]
for stage, cleanup_func in stages:
try:
start_time = datetime.utcnow()
await cleanup_func()
duration = (datetime.utcnow() - start_time).total_seconds()
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(True, stage, duration=duration)
)
except Exception as e:
logger.error(f"Error in force {stage.value} cleanup: {e}")
self.tracker.record_stage_result(
cleanup_id,
CleanupResult(False, stage, str(e))
)
logger.info("Force cleanup completed")
except Exception as e:
logger.error(f"Error during force cleanup: {str(e)}", exc_info=True)
finally:
self.tracker.end_cleanup(cleanup_id)
async def _cleanup_queue(self) -> None:
"""Clean up queue handler"""
await self.queue_handler.cleanup()
async def _cleanup_ffmpeg(self) -> None:
"""Clean up FFmpeg manager"""
if self.ffmpeg_mgr:
self.ffmpeg_mgr.kill_all_processes()
async def _cleanup_tasks(self) -> None:
"""Clean up tasks"""
if self._queue_task and not self._queue_task.done():
self._queue_task.cancel()
try:
await self._queue_task
except asyncio.CancelledError:
pass
async def _force_cleanup_queue(self) -> None:
"""Force clean up queue handler"""
await self.queue_handler.force_cleanup()
async def _force_cleanup_ffmpeg(self) -> None:
"""Force clean up FFmpeg manager"""
if self.ffmpeg_mgr:
self.ffmpeg_mgr.kill_all_processes()
async def _force_cleanup_tasks(self) -> None:
"""Force clean up tasks"""
if self._queue_task and not self._queue_task.done():
self._queue_task.cancel()
def set_queue_task(self, task: asyncio.Task) -> None:
"""Set the queue processing task for cleanup purposes"""
self._queue_task = task
def get_cleanup_stats(self) -> Dict[str, Any]:
"""Get cleanup statistics"""
return {
"stats": self.tracker.get_cleanup_stats(),
"strategy": self.strategy.value,
"active_cleanups": len(self.tracker.active_cleanups)
}

View File

@@ -2,19 +2,151 @@
import logging
import asyncio
from enum import Enum
from typing import Optional, Tuple, Dict, Any
from datetime import datetime
import discord
from discord.ext import commands
from discord import app_commands
from datetime import datetime
from typing import Dict, Any, Optional, Tuple
from .message_handler import MessageHandler
from .queue_handler import QueueHandler
from .progress_tracker import ProgressTracker
from .status_display import StatusDisplay
from .cleanup_manager import CleanupManager
from .reactions import REACTIONS
logger = logging.getLogger("VideoArchiver")
class ProcessorState(Enum):
"""Possible states of the video processor"""
INITIALIZING = "initializing"
READY = "ready"
PROCESSING = "processing"
PAUSED = "paused"
ERROR = "error"
SHUTDOWN = "shutdown"
class OperationType(Enum):
"""Types of processor operations"""
MESSAGE_PROCESSING = "message_processing"
VIDEO_PROCESSING = "video_processing"
QUEUE_MANAGEMENT = "queue_management"
CLEANUP = "cleanup"
class OperationTracker:
"""Tracks processor operations"""
def __init__(self):
self.operations: Dict[str, Dict[str, Any]] = {}
self.operation_history: List[Dict[str, Any]] = []
self.error_count = 0
self.success_count = 0
def start_operation(
self,
op_type: OperationType,
details: Dict[str, Any]
) -> str:
"""Start tracking an operation"""
op_id = f"{op_type.value}_{datetime.utcnow().timestamp()}"
self.operations[op_id] = {
"type": op_type.value,
"start_time": datetime.utcnow(),
"status": "running",
"details": details
}
return op_id
def end_operation(
self,
op_id: str,
success: bool,
error: Optional[str] = None
) -> None:
"""End tracking an operation"""
if op_id in self.operations:
self.operations[op_id].update({
"end_time": datetime.utcnow(),
"status": "success" if success else "error",
"error": error
})
# Move to history
self.operation_history.append(self.operations.pop(op_id))
# Update counts
if success:
self.success_count += 1
else:
self.error_count += 1
def get_active_operations(self) -> Dict[str, Dict[str, Any]]:
"""Get currently active operations"""
return self.operations.copy()
def get_operation_stats(self) -> Dict[str, Any]:
"""Get operation statistics"""
return {
"total_operations": len(self.operation_history) + len(self.operations),
"active_operations": len(self.operations),
"success_count": self.success_count,
"error_count": self.error_count,
"success_rate": (
self.success_count / (self.success_count + self.error_count)
if (self.success_count + self.error_count) > 0
else 0
)
}
class HealthMonitor:
"""Monitors processor health"""
def __init__(self, processor: 'VideoProcessor'):
self.processor = processor
self.last_check: Optional[datetime] = None
self.health_status: Dict[str, bool] = {}
self._monitor_task: Optional[asyncio.Task] = None
async def start_monitoring(self) -> None:
"""Start health monitoring"""
self._monitor_task = asyncio.create_task(self._monitor_health())
async def stop_monitoring(self) -> None:
"""Stop health monitoring"""
if self._monitor_task:
self._monitor_task.cancel()
try:
await self._monitor_task
except asyncio.CancelledError:
pass
async def _monitor_health(self) -> None:
"""Monitor processor health"""
while True:
try:
self.last_check = datetime.utcnow()
# Check component health
self.health_status.update({
"queue_handler": self.processor.queue_handler.is_healthy(),
"message_handler": self.processor.message_handler.is_healthy(),
"progress_tracker": self.processor.progress_tracker.is_healthy()
})
# Check operation health
op_stats = self.processor.operation_tracker.get_operation_stats()
self.health_status["operations"] = (
op_stats["success_rate"] >= 0.9 # 90% success rate threshold
)
await asyncio.sleep(60) # Check every minute
except Exception as e:
logger.error(f"Health monitoring error: {e}")
await asyncio.sleep(30) # Shorter interval on error
def is_healthy(self) -> bool:
"""Check if processor is healthy"""
return all(self.health_status.values())
class VideoProcessor:
"""Handles video processing operations"""
@@ -34,91 +166,101 @@ class VideoProcessor:
self.db = db
self.queue_manager = queue_manager
# Initialize state
self.state = ProcessorState.INITIALIZING
self.operation_tracker = OperationTracker()
self.health_monitor = HealthMonitor(self)
# Initialize handlers
self.queue_handler = QueueHandler(bot, config_manager, components)
self.message_handler = MessageHandler(bot, config_manager, queue_manager)
self.progress_tracker = ProgressTracker()
self.cleanup_manager = CleanupManager(self.queue_handler, ffmpeg_mgr)
# Pass db to queue handler if it exists
if self.db:
self.queue_handler.db = self.db
# Store queue task reference but don't start processing here
# Queue processing is managed by VideoArchiver class
# Store queue task reference
self._queue_task = None
# Mark as ready
self.state = ProcessorState.READY
logger.info("VideoProcessor initialized successfully")
async def start(self) -> None:
"""Start processor operations"""
await self.health_monitor.start_monitoring()
async def process_video(self, item) -> Tuple[bool, Optional[str]]:
"""Process a video from the queue by delegating to queue handler"""
return await self.queue_handler.process_video(item)
"""Process a video from the queue"""
op_id = self.operation_tracker.start_operation(
OperationType.VIDEO_PROCESSING,
{"item": str(item)}
)
try:
self.state = ProcessorState.PROCESSING
result = await self.queue_handler.process_video(item)
success = result[0]
error = None if success else result[1]
self.operation_tracker.end_operation(op_id, success, error)
return result
except Exception as e:
self.operation_tracker.end_operation(op_id, False, str(e))
raise
finally:
self.state = ProcessorState.READY
async def process_message(self, message: discord.Message) -> None:
"""Process a message for video content"""
op_id = self.operation_tracker.start_operation(
OperationType.MESSAGE_PROCESSING,
{"message_id": message.id}
)
try:
await self.message_handler.process_message(message)
async def cleanup(self):
"""Clean up resources and stop processing"""
try:
logger.info("Starting VideoProcessor cleanup...")
# Clean up queue handler
try:
await self.queue_handler.cleanup()
self.operation_tracker.end_operation(op_id, True)
except Exception as e:
logger.error(f"Error cleaning up queue handler: {e}")
# Clean up FFmpeg manager
if self.ffmpeg_mgr:
try:
self.ffmpeg_mgr.kill_all_processes()
except Exception as e:
logger.error(f"Error cleaning up FFmpeg manager: {e}")
# Cancel queue processing task if we have one
if self._queue_task and not self._queue_task.done():
self._queue_task.cancel()
try:
await self._queue_task
except asyncio.CancelledError:
pass
except Exception as e:
logger.error(f"Error cancelling queue task: {e}")
logger.info("VideoProcessor cleanup completed successfully")
except Exception as e:
logger.error(f"Error during VideoProcessor cleanup: {str(e)}", exc_info=True)
self.operation_tracker.end_operation(op_id, False, str(e))
raise
async def force_cleanup(self):
"""Force cleanup of resources when normal cleanup fails"""
async def cleanup(self) -> None:
"""Clean up resources and stop processing"""
op_id = self.operation_tracker.start_operation(
OperationType.CLEANUP,
{"type": "normal"}
)
try:
logger.info("Starting force cleanup of VideoProcessor...")
self.state = ProcessorState.SHUTDOWN
await self.health_monitor.stop_monitoring()
await self.cleanup_manager.cleanup()
self.operation_tracker.end_operation(op_id, True)
except Exception as e:
self.operation_tracker.end_operation(op_id, False, str(e))
logger.error(f"Error during cleanup: {e}", exc_info=True)
raise
async def force_cleanup(self) -> None:
"""Force cleanup of resources"""
op_id = self.operation_tracker.start_operation(
OperationType.CLEANUP,
{"type": "force"}
)
# Force cleanup queue handler
try:
await self.queue_handler.force_cleanup()
self.state = ProcessorState.SHUTDOWN
await self.health_monitor.stop_monitoring()
await self.cleanup_manager.force_cleanup()
self.operation_tracker.end_operation(op_id, True)
except Exception as e:
logger.error(f"Error force cleaning queue handler: {e}")
self.operation_tracker.end_operation(op_id, False, str(e))
raise
# Force cleanup FFmpeg
if self.ffmpeg_mgr:
try:
self.ffmpeg_mgr.kill_all_processes()
except Exception as e:
logger.error(f"Error force cleaning FFmpeg manager: {e}")
# Force cancel queue task
if self._queue_task and not self._queue_task.done():
self._queue_task.cancel()
logger.info("VideoProcessor force cleanup completed")
except Exception as e:
logger.error(f"Error during VideoProcessor force cleanup: {str(e)}", exc_info=True)
async def show_queue_details(self, ctx: commands.Context):
"""Display detailed queue status and progress information"""
async def show_queue_details(self, ctx: commands.Context) -> None:
"""Display detailed queue status"""
try:
if not self.queue_manager:
await ctx.send("Queue manager is not initialized.")
@@ -127,110 +269,36 @@ class VideoProcessor:
# Get queue status
queue_status = self.queue_manager.get_queue_status(ctx.guild.id)
# Create embed for queue overview
embed = discord.Embed(
title="Queue Status Details",
color=discord.Color.blue(),
timestamp=datetime.utcnow(),
)
# Get active operations
active_ops = self.operation_tracker.get_active_operations()
# Queue statistics
embed.add_field(
name="Queue Statistics",
value=f"```\n"
f"Pending: {queue_status['pending']}\n"
f"Processing: {queue_status['processing']}\n"
f"Completed: {queue_status['completed']}\n"
f"Failed: {queue_status['failed']}\n"
f"Success Rate: {queue_status['metrics']['success_rate']:.1%}\n"
f"Avg Processing Time: {queue_status['metrics']['avg_processing_time']:.1f}s\n"
f"```",
inline=False,
# Create and send status embed
embed = await StatusDisplay.create_queue_status_embed(
queue_status,
active_ops
)
# Active operations
active_ops = self.progress_tracker.get_active_operations()
# Active downloads
downloads = active_ops['downloads']
if downloads:
active_downloads = ""
for url, progress in downloads.items():
active_downloads += (
f"URL: {url[:50]}...\n"
f"Progress: {progress.get('percent', 0):.1f}%\n"
f"Speed: {progress.get('speed', 'N/A')}\n"
f"ETA: {progress.get('eta', 'N/A')}\n"
f"Size: {progress.get('downloaded_bytes', 0)}/{progress.get('total_bytes', 0)} bytes\n"
f"Started: {progress.get('start_time', 'N/A')}\n"
f"Retries: {progress.get('retries', 0)}\n"
f"-------------------\n"
)
embed.add_field(
name="Active Downloads",
value=f"```\n{active_downloads}```",
inline=False,
)
else:
embed.add_field(
name="Active Downloads",
value="```\nNo active downloads```",
inline=False,
)
# Active compressions
compressions = active_ops['compressions']
if compressions:
active_compressions = ""
for file_id, progress in compressions.items():
active_compressions += (
f"File: {progress.get('filename', 'Unknown')}\n"
f"Progress: {progress.get('percent', 0):.1f}%\n"
f"Time Elapsed: {progress.get('elapsed_time', 'N/A')}\n"
f"Input Size: {progress.get('input_size', 0)} bytes\n"
f"Current Size: {progress.get('current_size', 0)} bytes\n"
f"Target Size: {progress.get('target_size', 0)} bytes\n"
f"Codec: {progress.get('codec', 'Unknown')}\n"
f"Hardware Accel: {progress.get('hardware_accel', False)}\n"
f"-------------------\n"
)
embed.add_field(
name="Active Compressions",
value=f"```\n{active_compressions}```",
inline=False,
)
else:
embed.add_field(
name="Active Compressions",
value="```\nNo active compressions```",
inline=False,
)
# Error statistics
if queue_status["metrics"]["errors_by_type"]:
error_stats = "\n".join(
f"{error_type}: {count}"
for error_type, count in queue_status["metrics"]["errors_by_type"].items()
)
embed.add_field(
name="Error Statistics",
value=f"```\n{error_stats}```",
inline=False,
)
# Hardware acceleration statistics
embed.add_field(
name="Hardware Statistics",
value=f"```\n"
f"Hardware Accel Failures: {queue_status['metrics']['hardware_accel_failures']}\n"
f"Compression Failures: {queue_status['metrics']['compression_failures']}\n"
f"Peak Memory Usage: {queue_status['metrics']['peak_memory_usage']:.1f}MB\n"
f"```",
inline=False,
)
await ctx.send(embed=embed)
except Exception as e:
logger.error(f"Error showing queue details: {str(e)}", exc_info=True)
logger.error(f"Error showing queue details: {e}", exc_info=True)
await ctx.send(f"Error getting queue details: {str(e)}")
def set_queue_task(self, task: asyncio.Task) -> None:
"""Set the queue processing task"""
self._queue_task = task
self.cleanup_manager.set_queue_task(task)
def get_status(self) -> Dict[str, Any]:
"""Get processor status"""
return {
"state": self.state.value,
"health": self.health_monitor.is_healthy(),
"operations": self.operation_tracker.get_operation_stats(),
"active_operations": self.operation_tracker.get_active_operations(),
"last_health_check": (
self.health_monitor.last_check.isoformat()
if self.health_monitor.last_check
else None
),
"health_status": self.health_monitor.health_status
}

View File

@@ -1,130 +1,256 @@
"""Message processing and URL extraction for VideoProcessor"""
import logging
import asyncio
from enum import Enum
from typing import Optional, Dict, Any, List, Tuple
from datetime import datetime
import discord
from typing import List, Tuple, Optional
from videoarchiver.utils.video_downloader import is_video_url_pattern
from .url_extractor import URLExtractor
from .message_validator import MessageValidator
from .queue_processor import QueueProcessor
from .reactions import REACTIONS
logger = logging.getLogger("VideoArchiver")
class MessageState(Enum):
"""Possible states of message processing"""
RECEIVED = "received"
VALIDATING = "validating"
EXTRACTING = "extracting"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
IGNORED = "ignored"
class ProcessingStage(Enum):
"""Message processing stages"""
VALIDATION = "validation"
EXTRACTION = "extraction"
QUEUEING = "queueing"
COMPLETION = "completion"
class MessageCache:
"""Caches message validation results"""
def __init__(self, max_size: int = 1000):
self.max_size = max_size
self._cache: Dict[int, Dict[str, Any]] = {}
self._access_times: Dict[int, datetime] = {}
def add(self, message_id: int, result: Dict[str, Any]) -> None:
"""Add a result to cache"""
if len(self._cache) >= self.max_size:
self._cleanup_oldest()
self._cache[message_id] = result
self._access_times[message_id] = datetime.utcnow()
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
"""Get a cached result"""
if message_id in self._cache:
self._access_times[message_id] = datetime.utcnow()
return self._cache[message_id]
return None
def _cleanup_oldest(self) -> None:
"""Remove oldest cache entries"""
if not self._access_times:
return
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
del self._cache[oldest]
del self._access_times[oldest]
class ProcessingTracker:
"""Tracks message processing state and progress"""
def __init__(self):
self.states: Dict[int, MessageState] = {}
self.stages: Dict[int, ProcessingStage] = {}
self.errors: Dict[int, str] = {}
self.start_times: Dict[int, datetime] = {}
self.end_times: Dict[int, datetime] = {}
def start_processing(self, message_id: int) -> None:
"""Start tracking a message"""
self.states[message_id] = MessageState.RECEIVED
self.start_times[message_id] = datetime.utcnow()
def update_state(
self,
message_id: int,
state: MessageState,
stage: Optional[ProcessingStage] = None,
error: Optional[str] = None
) -> None:
"""Update message state"""
self.states[message_id] = state
if stage:
self.stages[message_id] = stage
if error:
self.errors[message_id] = error
if state in (MessageState.COMPLETED, MessageState.FAILED, MessageState.IGNORED):
self.end_times[message_id] = datetime.utcnow()
def get_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
return {
"state": self.states.get(message_id),
"stage": self.stages.get(message_id),
"error": self.errors.get(message_id),
"start_time": self.start_times.get(message_id),
"end_time": self.end_times.get(message_id),
"duration": (
(self.end_times[message_id] - self.start_times[message_id]).total_seconds()
if message_id in self.end_times and message_id in self.start_times
else None
)
}
class MessageHandler:
"""Handles processing of messages for video content"""
def __init__(self, bot, config_manager, queue_manager):
self.bot = bot
self.config_manager = config_manager
self.queue_manager = queue_manager
self.url_extractor = URLExtractor()
self.message_validator = MessageValidator()
self.queue_processor = QueueProcessor(queue_manager)
# Initialize tracking and caching
self.tracker = ProcessingTracker()
self.validation_cache = MessageCache()
self._processing_lock = asyncio.Lock()
async def process_message(self, message: discord.Message) -> None:
"""Process a message for video content"""
# Start tracking
self.tracker.start_processing(message.id)
try:
# Check if message contains any content to process
if not message.content and not message.attachments:
logger.debug(f"No content or attachments in message {message.id}")
return
# Get guild settings
settings = await self.config_manager.get_guild_settings(message.guild.id)
if not settings:
logger.warning(f"No settings found for guild {message.guild.id}")
return
# Check if video archiving is enabled for this guild
if not settings.get("enabled", False):
logger.debug(f"Video archiving is disabled for guild {message.guild.id}")
return
# Log settings for debugging
logger.debug(f"Guild {message.guild.id} settings: {settings}")
# Check if channel is enabled (empty list means all channels)
enabled_channels = settings.get("enabled_channels", [])
if enabled_channels and message.channel.id not in enabled_channels:
logger.debug(f"Channel {message.channel.id} not in enabled channels: {enabled_channels}")
return
# Check if user has allowed role (empty list means all roles)
allowed_roles = settings.get("allowed_roles", [])
if allowed_roles:
user_roles = [role.id for role in message.author.roles]
if not any(role_id in allowed_roles for role_id in user_roles):
logger.debug(f"User {message.author.id} does not have any allowed roles")
return
# Extract URLs from message
urls = await self._extract_urls(message, settings)
if not urls:
logger.debug("No valid URLs found in message")
return
# Process each URL
await self._process_urls(message, urls)
async with self._processing_lock:
await self._process_message_internal(message)
except Exception as e:
logger.error(f"Error processing message: {str(e)}", exc_info=True)
self.tracker.update_state(
message.id,
MessageState.FAILED,
error=str(e)
)
try:
await message.add_reaction(REACTIONS["error"])
except:
pass
async def _extract_urls(self, message: discord.Message, settings: dict) -> List[str]:
"""Extract video URLs from message content and attachments"""
urls = []
# Extract from message content
if message.content:
logger.debug(f"Processing message content: {message.content}")
enabled_sites = settings.get("enabled_sites", [])
logger.debug(f"Enabled sites: {enabled_sites}")
for word in message.content.split():
logger.debug(f"Checking word: {word}")
if is_video_url_pattern(word):
# If enabled_sites is empty or None, allow all sites
if not enabled_sites or any(site in word.lower() for site in enabled_sites):
logger.debug(f"Found matching URL: {word}")
urls.append(word)
else:
logger.debug(f"URL {word} doesn't match any enabled sites")
else:
logger.debug(f"Word {word} is not a valid video URL")
# Extract from attachments
for attachment in message.attachments:
logger.debug(f"Checking attachment: {attachment.filename}")
if any(attachment.filename.lower().endswith(ext) for ext in ['.mp4', '.mov', '.avi', '.webm']):
logger.debug(f"Found video attachment: {attachment.url}")
urls.append(attachment.url)
return urls
async def _process_urls(self, message: discord.Message, urls: List[str]) -> None:
"""Process extracted URLs by adding them to the queue"""
for url in urls:
async def _process_message_internal(self, message: discord.Message) -> None:
"""Internal message processing logic"""
try:
logger.info(f"Adding URL to queue: {url}")
await message.add_reaction(REACTIONS['queued'])
await self.queue_manager.add_to_queue(
url=url,
message_id=message.id,
channel_id=message.channel.id,
guild_id=message.guild.id,
author_id=message.author.id,
priority=0
# Get guild settings
settings = await self.config_manager.get_guild_settings(message.guild.id)
if not settings:
logger.warning(f"No settings found for guild {message.guild.id}")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
# Check cache for validation
cached_validation = self.validation_cache.get(message.id)
if cached_validation:
is_valid = cached_validation["valid"]
reason = cached_validation["reason"]
else:
# Validate message
self.tracker.update_state(
message.id,
MessageState.VALIDATING,
ProcessingStage.VALIDATION
)
logger.info(f"Successfully added video to queue: {url}")
is_valid, reason = await self.message_validator.validate_message(
message,
settings
)
# Cache result
self.validation_cache.add(message.id, {
"valid": is_valid,
"reason": reason
})
if not is_valid:
logger.debug(f"Message validation failed: {reason}")
self.tracker.update_state(
message.id,
MessageState.IGNORED,
error=reason
)
return
# Extract URLs
self.tracker.update_state(
message.id,
MessageState.EXTRACTING,
ProcessingStage.EXTRACTION
)
urls = await self.url_extractor.extract_urls(
message,
enabled_sites=settings.get("enabled_sites")
)
if not urls:
logger.debug("No valid URLs found in message")
self.tracker.update_state(message.id, MessageState.IGNORED)
return
# Process URLs
self.tracker.update_state(
message.id,
MessageState.PROCESSING,
ProcessingStage.QUEUEING
)
await self.queue_processor.process_urls(message, urls)
# Mark completion
self.tracker.update_state(
message.id,
MessageState.COMPLETED,
ProcessingStage.COMPLETION
)
except Exception as e:
logger.error(f"Failed to add video to queue: {str(e)}")
await message.add_reaction(REACTIONS['error'])
continue
self.tracker.update_state(
message.id,
MessageState.FAILED,
error=str(e)
)
raise
async def format_archive_message(self, author: Optional[discord.Member],
async def format_archive_message(
self,
author: Optional[discord.Member],
channel: discord.TextChannel,
url: str) -> str:
url: str
) -> str:
"""Format message for archive channel"""
author_mention = author.mention if author else "Unknown User"
channel_mention = channel.mention if channel else "Unknown Channel"
return await self.queue_processor.format_archive_message(
author,
channel,
url
)
return (f"Video archived from {author_mention} in {channel_mention}\n"
f"Original URL: {url}")
def get_message_status(self, message_id: int) -> Dict[str, Any]:
"""Get processing status for a message"""
return self.tracker.get_status(message_id)
def is_healthy(self) -> bool:
"""Check if handler is healthy"""
# Check for any stuck messages
current_time = datetime.utcnow()
for message_id, start_time in self.tracker.start_times.items():
if (
message_id in self.tracker.states and
self.tracker.states[message_id] not in (
MessageState.COMPLETED,
MessageState.FAILED,
MessageState.IGNORED
) and
(current_time - start_time).total_seconds() > 300 # 5 minutes timeout
):
return False
return True

View File

@@ -0,0 +1,225 @@
"""Message validation functionality for video processing"""
import logging
from enum import Enum
from dataclasses import dataclass
from typing import Dict, Optional, Tuple, List, Any, Callable, Set
from datetime import datetime
import discord
logger = logging.getLogger("VideoArchiver")
class ValidationResult(Enum):
"""Possible validation results"""
VALID = "valid"
INVALID = "invalid"
IGNORED = "ignored"
@dataclass
class ValidationContext:
"""Context for message validation"""
message: discord.Message
settings: Dict[str, Any]
guild_id: int
channel_id: int
author_id: int
roles: Set[int]
content_length: int
attachment_count: int
is_bot: bool
timestamp: datetime
@classmethod
def from_message(cls, message: discord.Message, settings: Dict[str, Any]) -> 'ValidationContext':
"""Create context from message"""
return cls(
message=message,
settings=settings,
guild_id=message.guild.id,
channel_id=message.channel.id,
author_id=message.author.id,
roles={role.id for role in message.author.roles},
content_length=len(message.content) if message.content else 0,
attachment_count=len(message.attachments),
is_bot=message.author.bot,
timestamp=message.created_at
)
@dataclass
class ValidationRule:
"""Defines a validation rule"""
name: str
description: str
validate: Callable[[ValidationContext], Tuple[bool, Optional[str]]]
enabled: bool = True
priority: int = 0
class ValidationCache:
"""Caches validation results"""
def __init__(self, max_size: int = 1000):
self.max_size = max_size
self._cache: Dict[int, Dict[str, Any]] = {}
self._access_times: Dict[int, datetime] = {}
def add(self, message_id: int, result: Dict[str, Any]) -> None:
"""Add validation result to cache"""
if len(self._cache) >= self.max_size:
self._cleanup_oldest()
self._cache[message_id] = result
self._access_times[message_id] = datetime.utcnow()
def get(self, message_id: int) -> Optional[Dict[str, Any]]:
"""Get cached validation result"""
if message_id in self._cache:
self._access_times[message_id] = datetime.utcnow()
return self._cache[message_id]
return None
def _cleanup_oldest(self) -> None:
"""Remove oldest cache entries"""
if not self._access_times:
return
oldest = min(self._access_times.items(), key=lambda x: x[1])[0]
del self._cache[oldest]
del self._access_times[oldest]
class ValidationRuleManager:
"""Manages validation rules"""
def __init__(self):
self.rules: List[ValidationRule] = [
ValidationRule(
name="content_check",
description="Check if message has content to process",
validate=self._validate_content,
priority=1
),
ValidationRule(
name="guild_enabled",
description="Check if archiving is enabled for guild",
validate=self._validate_guild_enabled,
priority=2
),
ValidationRule(
name="channel_enabled",
description="Check if channel is enabled for archiving",
validate=self._validate_channel,
priority=3
),
ValidationRule(
name="user_roles",
description="Check if user has required roles",
validate=self._validate_user_roles,
priority=4
)
]
self.rules.sort(key=lambda x: x.priority)
def _validate_content(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
"""Validate message content"""
if not ctx.content_length and not ctx.attachment_count:
return False, "No content or attachments"
return True, None
def _validate_guild_enabled(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
"""Validate guild settings"""
if not ctx.settings.get("enabled", False):
return False, "Video archiving disabled for guild"
return True, None
def _validate_channel(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
"""Validate channel settings"""
enabled_channels = ctx.settings.get("enabled_channels", [])
if enabled_channels and ctx.channel_id not in enabled_channels:
return False, "Channel not enabled for archiving"
return True, None
def _validate_user_roles(self, ctx: ValidationContext) -> Tuple[bool, Optional[str]]:
"""Validate user roles"""
allowed_roles = ctx.settings.get("allowed_roles", [])
if allowed_roles and not (ctx.roles & set(allowed_roles)):
return False, "User does not have required roles"
return True, None
class MessageValidator:
"""Handles validation of messages for video processing"""
def __init__(self):
self.rule_manager = ValidationRuleManager()
self.cache = ValidationCache()
self.validation_stats: Dict[str, int] = {
"total": 0,
"valid": 0,
"invalid": 0,
"ignored": 0,
"cached": 0
}
async def validate_message(
self,
message: discord.Message,
settings: Dict
) -> Tuple[bool, Optional[str]]:
"""Validate if a message should be processed"""
self.validation_stats["total"] += 1
# Check cache
cached = self.cache.get(message.id)
if cached:
self.validation_stats["cached"] += 1
return cached["valid"], cached.get("reason")
# Create validation context
ctx = ValidationContext.from_message(message, settings)
# Run validation rules
for rule in self.rule_manager.rules:
if not rule.enabled:
continue
try:
valid, reason = rule.validate(ctx)
if not valid:
self.validation_stats["invalid"] += 1
# Cache result
self.cache.add(message.id, {
"valid": False,
"reason": reason,
"rule": rule.name
})
return False, reason
except Exception as e:
logger.error(f"Error in validation rule {rule.name}: {e}")
return False, f"Validation error: {str(e)}"
# Message passed all rules
self.validation_stats["valid"] += 1
self.cache.add(message.id, {
"valid": True,
"reason": None
})
return True, None
def get_stats(self) -> Dict[str, Any]:
"""Get validation statistics"""
return {
"validation_stats": self.validation_stats.copy(),
"rules": [
{
"name": rule.name,
"description": rule.description,
"enabled": rule.enabled,
"priority": rule.priority
}
for rule in self.rule_manager.rules
]
}
def clear_cache(self, message_id: Optional[int] = None) -> None:
"""Clear validation cache"""
if message_id:
self.cache._cache.pop(message_id, None)
self.cache._access_times.pop(message_id, None)
else:
self.cache = ValidationCache(self.cache.max_size)

View File

@@ -0,0 +1,237 @@
"""Queue processing functionality for video processing"""
import logging
import asyncio
from enum import Enum
from dataclasses import dataclass
from typing import List, Optional, Dict, Any, Set
from datetime import datetime
import discord
from .reactions import REACTIONS
logger = logging.getLogger("VideoArchiver")
class QueuePriority(Enum):
"""Queue item priorities"""
HIGH = 0
NORMAL = 1
LOW = 2
@dataclass
class QueueItem:
"""Represents an item in the processing queue"""
url: str
message_id: int
channel_id: int
guild_id: int
author_id: int
priority: QueuePriority
added_at: datetime
metadata: Optional[Dict[str, Any]] = None
attempts: int = 0
last_attempt: Optional[datetime] = None
error: Optional[str] = None
class ProcessingStrategy(Enum):
"""Available processing strategies"""
FIFO = "fifo" # First in, first out
PRIORITY = "priority" # Process by priority
SMART = "smart" # Smart processing based on various factors
class QueueMetrics:
"""Tracks queue processing metrics"""
def __init__(self):
self.total_processed = 0
self.successful = 0
self.failed = 0
self.processing_times: List[float] = []
self.errors: Dict[str, int] = {}
self.last_processed: Optional[datetime] = None
def record_success(self, processing_time: float) -> None:
"""Record successful processing"""
self.total_processed += 1
self.successful += 1
self.processing_times.append(processing_time)
self.last_processed = datetime.utcnow()
def record_failure(self, error: str) -> None:
"""Record processing failure"""
self.total_processed += 1
self.failed += 1
self.errors[error] = self.errors.get(error, 0) + 1
self.last_processed = datetime.utcnow()
def get_stats(self) -> Dict[str, Any]:
"""Get queue metrics"""
avg_time = (
sum(self.processing_times) / len(self.processing_times)
if self.processing_times
else 0
)
return {
"total_processed": self.total_processed,
"successful": self.successful,
"failed": self.failed,
"success_rate": (
self.successful / self.total_processed
if self.total_processed > 0
else 0
),
"average_processing_time": avg_time,
"error_counts": self.errors.copy(),
"last_processed": self.last_processed
}
class QueueProcessor:
"""Handles adding videos to the processing queue"""
def __init__(
self,
queue_manager,
strategy: ProcessingStrategy = ProcessingStrategy.SMART,
max_retries: int = 3
):
self.queue_manager = queue_manager
self.strategy = strategy
self.max_retries = max_retries
self.metrics = QueueMetrics()
self._processing: Set[str] = set()
self._processing_lock = asyncio.Lock()
async def process_urls(
self,
message: discord.Message,
urls: List[str],
priority: QueuePriority = QueuePriority.NORMAL
) -> None:
"""Process extracted URLs by adding them to the queue"""
for url in urls:
try:
logger.info(f"Adding URL to queue: {url}")
await message.add_reaction(REACTIONS['queued'])
# Create queue item
item = QueueItem(
url=url,
message_id=message.id,
channel_id=message.channel.id,
guild_id=message.guild.id,
author_id=message.author.id,
priority=priority,
added_at=datetime.utcnow()
)
# Add to queue with appropriate strategy
await self._add_to_queue(item)
logger.info(f"Successfully added video to queue: {url}")
except Exception as e:
logger.error(f"Failed to add video to queue: {str(e)}")
await message.add_reaction(REACTIONS['error'])
continue
async def _add_to_queue(self, item: QueueItem) -> None:
"""Add item to queue using current strategy"""
async with self._processing_lock:
if item.url in self._processing:
logger.debug(f"URL already being processed: {item.url}")
return
self._processing.add(item.url)
try:
# Apply processing strategy
if self.strategy == ProcessingStrategy.PRIORITY:
await self._add_with_priority(item)
elif self.strategy == ProcessingStrategy.SMART:
await self._add_with_smart_strategy(item)
else: # FIFO
await self._add_fifo(item)
finally:
async with self._processing_lock:
self._processing.remove(item.url)
async def _add_with_priority(self, item: QueueItem) -> None:
"""Add item with priority handling"""
await self.queue_manager.add_to_queue(
url=item.url,
message_id=item.message_id,
channel_id=item.channel_id,
guild_id=item.guild_id,
author_id=item.author_id,
priority=item.priority.value
)
async def _add_with_smart_strategy(self, item: QueueItem) -> None:
"""Add item using smart processing strategy"""
# Calculate priority based on various factors
priority = await self._calculate_smart_priority(item)
await self.queue_manager.add_to_queue(
url=item.url,
message_id=item.message_id,
channel_id=item.channel_id,
guild_id=item.guild_id,
author_id=item.author_id,
priority=priority
)
async def _add_fifo(self, item: QueueItem) -> None:
"""Add item using FIFO strategy"""
await self.queue_manager.add_to_queue(
url=item.url,
message_id=item.message_id,
channel_id=item.channel_id,
guild_id=item.guild_id,
author_id=item.author_id,
priority=QueuePriority.NORMAL.value
)
async def _calculate_smart_priority(self, item: QueueItem) -> int:
"""Calculate priority using smart strategy"""
base_priority = item.priority.value
# Adjust based on queue metrics
stats = self.metrics.get_stats()
if stats["total_processed"] > 0:
# Boost priority if queue is processing efficiently
if stats["success_rate"] > 0.9: # 90% success rate
base_priority -= 1
# Lower priority if having issues
elif stats["success_rate"] < 0.5: # 50% success rate
base_priority += 1
# Adjust based on retries
if item.attempts > 0:
base_priority += item.attempts
# Ensure priority stays in valid range
return max(0, min(base_priority, len(QueuePriority) - 1))
async def format_archive_message(
self,
author: Optional[discord.Member],
channel: discord.TextChannel,
url: str
) -> str:
"""Format message for archive channel"""
author_mention = author.mention if author else "Unknown User"
channel_mention = channel.mention if channel else "Unknown Channel"
return (
f"Video archived from {author_mention} in {channel_mention}\n"
f"Original URL: {url}"
)
def get_metrics(self) -> Dict[str, Any]:
"""Get queue processing metrics"""
return {
"metrics": self.metrics.get_stats(),
"strategy": self.strategy.value,
"active_processing": len(self._processing),
"max_retries": self.max_retries
}

View File

@@ -0,0 +1,316 @@
"""Module for handling queue status display and formatting"""
import discord
from enum import Enum
from dataclasses import dataclass
from datetime import datetime
from typing import Dict, Any, List, Optional
import logging
logger = logging.getLogger("VideoArchiver")
class DisplayTheme:
"""Defines display themes"""
DEFAULT = {
"title_color": discord.Color.blue(),
"success_color": discord.Color.green(),
"warning_color": discord.Color.gold(),
"error_color": discord.Color.red(),
"info_color": discord.Color.blurple()
}
@dataclass
class DisplayTemplate:
"""Template for status display sections"""
name: str
format_string: str
inline: bool = False
order: int = 0
condition: Optional[str] = None
class DisplaySection(Enum):
"""Available display sections"""
QUEUE_STATS = "queue_stats"
DOWNLOADS = "downloads"
COMPRESSIONS = "compressions"
ERRORS = "errors"
HARDWARE = "hardware"
class StatusFormatter:
"""Formats status information for display"""
@staticmethod
def format_bytes(bytes: int) -> str:
"""Format bytes into human readable format"""
for unit in ['B', 'KB', 'MB', 'GB']:
if bytes < 1024:
return f"{bytes:.1f}{unit}"
bytes /= 1024
return f"{bytes:.1f}TB"
@staticmethod
def format_time(seconds: float) -> str:
"""Format time duration"""
if seconds < 60:
return f"{seconds:.1f}s"
minutes = seconds / 60
if minutes < 60:
return f"{minutes:.1f}m"
hours = minutes / 60
return f"{hours:.1f}h"
@staticmethod
def format_percentage(value: float) -> str:
"""Format percentage value"""
return f"{value:.1f}%"
class DisplayManager:
"""Manages status display configuration"""
def __init__(self):
self.templates: Dict[DisplaySection, DisplayTemplate] = {
DisplaySection.QUEUE_STATS: DisplayTemplate(
name="Queue Statistics",
format_string=(
"```\n"
"Pending: {pending}\n"
"Processing: {processing}\n"
"Completed: {completed}\n"
"Failed: {failed}\n"
"Success Rate: {success_rate}\n"
"Avg Processing Time: {avg_processing_time}\n"
"```"
),
order=1
),
DisplaySection.DOWNLOADS: DisplayTemplate(
name="Active Downloads",
format_string=(
"```\n"
"URL: {url}\n"
"Progress: {percent}\n"
"Speed: {speed}\n"
"ETA: {eta}\n"
"Size: {size}\n"
"Started: {start_time}\n"
"Retries: {retries}\n"
"```"
),
order=2
),
DisplaySection.COMPRESSIONS: DisplayTemplate(
name="Active Compressions",
format_string=(
"```\n"
"File: {filename}\n"
"Progress: {percent}\n"
"Time Elapsed: {elapsed_time}\n"
"Input Size: {input_size}\n"
"Current Size: {current_size}\n"
"Target Size: {target_size}\n"
"Codec: {codec}\n"
"Hardware Accel: {hardware_accel}\n"
"```"
),
order=3
),
DisplaySection.ERRORS: DisplayTemplate(
name="Error Statistics",
format_string="```\n{error_stats}```",
condition="has_errors",
order=4
),
DisplaySection.HARDWARE: DisplayTemplate(
name="Hardware Statistics",
format_string=(
"```\n"
"Hardware Accel Failures: {hw_failures}\n"
"Compression Failures: {comp_failures}\n"
"Peak Memory Usage: {memory_usage}\n"
"```"
),
order=5
)
}
self.theme = DisplayTheme.DEFAULT
class StatusDisplay:
"""Handles formatting and display of queue status information"""
def __init__(self):
self.display_manager = DisplayManager()
self.formatter = StatusFormatter()
async def create_queue_status_embed(
self,
queue_status: Dict[str, Any],
active_ops: Dict[str, Any]
) -> discord.Embed:
"""Create an embed displaying queue status and active operations"""
embed = discord.Embed(
title="Queue Status Details",
color=self.display_manager.theme["title_color"],
timestamp=datetime.utcnow()
)
# Add sections in order
sections = sorted(
self.display_manager.templates.items(),
key=lambda x: x[1].order
)
for section, template in sections:
# Check condition if exists
if template.condition:
if not self._check_condition(template.condition, queue_status, active_ops):
continue
# Add section based on type
if section == DisplaySection.QUEUE_STATS:
self._add_queue_statistics(embed, queue_status, template)
elif section == DisplaySection.DOWNLOADS:
self._add_active_downloads(embed, active_ops.get('downloads', {}), template)
elif section == DisplaySection.COMPRESSIONS:
self._add_active_compressions(embed, active_ops.get('compressions', {}), template)
elif section == DisplaySection.ERRORS:
self._add_error_statistics(embed, queue_status, template)
elif section == DisplaySection.HARDWARE:
self._add_hardware_statistics(embed, queue_status, template)
return embed
def _check_condition(
self,
condition: str,
queue_status: Dict[str, Any],
active_ops: Dict[str, Any]
) -> bool:
"""Check if condition for displaying section is met"""
if condition == "has_errors":
return bool(queue_status["metrics"]["errors_by_type"])
return True
def _add_queue_statistics(
self,
embed: discord.Embed,
queue_status: Dict[str, Any],
template: DisplayTemplate
) -> None:
"""Add queue statistics to the embed"""
embed.add_field(
name=template.name,
value=template.format_string.format(
pending=queue_status['pending'],
processing=queue_status['processing'],
completed=queue_status['completed'],
failed=queue_status['failed'],
success_rate=self.formatter.format_percentage(
queue_status['metrics']['success_rate'] * 100
),
avg_processing_time=self.formatter.format_time(
queue_status['metrics']['avg_processing_time']
)
),
inline=template.inline
)
def _add_active_downloads(
self,
embed: discord.Embed,
downloads: Dict[str, Any],
template: DisplayTemplate
) -> None:
"""Add active downloads information to the embed"""
if downloads:
content = []
for url, progress in downloads.items():
content.append(template.format_string.format(
url=url[:50] + "..." if len(url) > 50 else url,
percent=self.formatter.format_percentage(progress.get('percent', 0)),
speed=progress.get('speed', 'N/A'),
eta=progress.get('eta', 'N/A'),
size=f"{self.formatter.format_bytes(progress.get('downloaded_bytes', 0))}/"
f"{self.formatter.format_bytes(progress.get('total_bytes', 0))}",
start_time=progress.get('start_time', 'N/A'),
retries=progress.get('retries', 0)
))
embed.add_field(
name=template.name,
value="".join(content),
inline=template.inline
)
else:
embed.add_field(
name=template.name,
value="```\nNo active downloads```",
inline=template.inline
)
def _add_active_compressions(
self,
embed: discord.Embed,
compressions: Dict[str, Any],
template: DisplayTemplate
) -> None:
"""Add active compressions information to the embed"""
if compressions:
content = []
for file_id, progress in compressions.items():
content.append(template.format_string.format(
filename=progress.get('filename', 'Unknown'),
percent=self.formatter.format_percentage(progress.get('percent', 0)),
elapsed_time=progress.get('elapsed_time', 'N/A'),
input_size=self.formatter.format_bytes(progress.get('input_size', 0)),
current_size=self.formatter.format_bytes(progress.get('current_size', 0)),
target_size=self.formatter.format_bytes(progress.get('target_size', 0)),
codec=progress.get('codec', 'Unknown'),
hardware_accel=progress.get('hardware_accel', False)
))
embed.add_field(
name=template.name,
value="".join(content),
inline=template.inline
)
else:
embed.add_field(
name=template.name,
value="```\nNo active compressions```",
inline=template.inline
)
def _add_error_statistics(
self,
embed: discord.Embed,
queue_status: Dict[str, Any],
template: DisplayTemplate
) -> None:
"""Add error statistics to the embed"""
if queue_status["metrics"]["errors_by_type"]:
error_stats = "\n".join(
f"{error_type}: {count}"
for error_type, count in queue_status["metrics"]["errors_by_type"].items()
)
embed.add_field(
name=template.name,
value=template.format_string.format(error_stats=error_stats),
inline=template.inline
)
def _add_hardware_statistics(
self,
embed: discord.Embed,
queue_status: Dict[str, Any],
template: DisplayTemplate
) -> None:
"""Add hardware statistics to the embed"""
embed.add_field(
name=template.name,
value=template.format_string.format(
hw_failures=queue_status['metrics']['hardware_accel_failures'],
comp_failures=queue_status['metrics']['compression_failures'],
memory_usage=self.formatter.format_bytes(
queue_status['metrics']['peak_memory_usage'] * 1024 * 1024 # Convert MB to bytes
)
),
inline=template.inline
)

View File

@@ -0,0 +1,264 @@
"""URL extraction functionality for video processing"""
import logging
import re
from enum import Enum
from dataclasses import dataclass
from typing import List, Dict, Optional, Set, Pattern
import discord
from urllib.parse import urlparse, parse_qs
logger = logging.getLogger("VideoArchiver")
@dataclass
class URLPattern:
"""Defines a URL pattern for a video site"""
site: str
pattern: Pattern
requires_api: bool = False
supports_timestamp: bool = False
supports_playlist: bool = False
@dataclass
class URLMetadata:
"""Metadata about an extracted URL"""
url: str
site: str
timestamp: Optional[int] = None
playlist_id: Optional[str] = None
video_id: Optional[str] = None
quality: Optional[str] = None
class URLType(Enum):
"""Types of video URLs"""
DIRECT = "direct"
PLATFORM = "platform"
UNKNOWN = "unknown"
class URLPatternManager:
"""Manages URL patterns for different video sites"""
def __init__(self):
self.patterns: Dict[str, URLPattern] = {
"youtube": URLPattern(
site="youtube",
pattern=re.compile(
r'(?:https?://)?(?:www\.)?'
r'(?:youtube\.com/watch\?v=|youtu\.be/)'
r'([a-zA-Z0-9_-]{11})'
),
supports_timestamp=True,
supports_playlist=True
),
"vimeo": URLPattern(
site="vimeo",
pattern=re.compile(
r'(?:https?://)?(?:www\.)?'
r'vimeo\.com/(?:channels/(?:\w+/)?|groups/(?:[^/]*/)*|)'
r'(\d+)(?:|/\w+)*'
),
supports_timestamp=True
),
"twitter": URLPattern(
site="twitter",
pattern=re.compile(
r'(?:https?://)?(?:www\.)?'
r'(?:twitter\.com|x\.com)/\w+/status/(\d+)'
),
requires_api=True
),
# Add more patterns as needed
}
self.direct_extensions = {'.mp4', '.mov', '.avi', '.webm', '.mkv'}
def get_pattern(self, site: str) -> Optional[URLPattern]:
"""Get pattern for a site"""
return self.patterns.get(site.lower())
def is_supported_site(self, url: str, enabled_sites: Optional[List[str]]) -> bool:
"""Check if URL is from a supported site"""
if not enabled_sites:
return True
parsed = urlparse(url.lower())
domain = parsed.netloc.replace('www.', '')
return any(site.lower() in domain for site in enabled_sites)
class URLValidator:
"""Validates extracted URLs"""
def __init__(self, pattern_manager: URLPatternManager):
self.pattern_manager = pattern_manager
def get_url_type(self, url: str) -> URLType:
"""Determine URL type"""
parsed = urlparse(url)
if any(parsed.path.lower().endswith(ext) for ext in self.pattern_manager.direct_extensions):
return URLType.DIRECT
if any(pattern.pattern.match(url) for pattern in self.pattern_manager.patterns.values()):
return URLType.PLATFORM
return URLType.UNKNOWN
def is_valid_url(self, url: str) -> bool:
"""Validate URL format"""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except Exception:
return False
class URLMetadataExtractor:
"""Extracts metadata from URLs"""
def __init__(self, pattern_manager: URLPatternManager):
self.pattern_manager = pattern_manager
def extract_metadata(self, url: str) -> Optional[URLMetadata]:
"""Extract metadata from URL"""
try:
parsed = urlparse(url)
# Handle direct video URLs
if any(parsed.path.lower().endswith(ext) for ext in self.pattern_manager.direct_extensions):
return URLMetadata(url=url, site="direct")
# Handle platform URLs
for site, pattern in self.pattern_manager.patterns.items():
if match := pattern.pattern.match(url):
metadata = URLMetadata(
url=url,
site=site,
video_id=match.group(1)
)
# Extract additional metadata
if pattern.supports_timestamp:
metadata.timestamp = self._extract_timestamp(parsed)
if pattern.supports_playlist:
metadata.playlist_id = self._extract_playlist_id(parsed)
return metadata
return None
except Exception as e:
logger.error(f"Error extracting metadata from URL {url}: {e}")
return None
def _extract_timestamp(self, parsed_url: urlparse) -> Optional[int]:
"""Extract timestamp from URL"""
try:
params = parse_qs(parsed_url.query)
if 't' in params:
return int(params['t'][0])
return None
except Exception:
return None
def _extract_playlist_id(self, parsed_url: urlparse) -> Optional[str]:
"""Extract playlist ID from URL"""
try:
params = parse_qs(parsed_url.query)
if 'list' in params:
return params['list'][0]
return None
except Exception:
return None
class URLExtractor:
"""Handles extraction of video URLs from messages"""
def __init__(self):
self.pattern_manager = URLPatternManager()
self.validator = URLValidator(self.pattern_manager)
self.metadata_extractor = URLMetadataExtractor(self.pattern_manager)
self._url_cache: Dict[str, Set[str]] = {}
async def extract_urls(
self,
message: discord.Message,
enabled_sites: Optional[List[str]] = None
) -> List[URLMetadata]:
"""Extract video URLs from message content and attachments"""
urls = []
# Check cache
cache_key = f"{message.id}_{'-'.join(enabled_sites) if enabled_sites else 'all'}"
if cache_key in self._url_cache:
return [
self.metadata_extractor.extract_metadata(url)
for url in self._url_cache[cache_key]
if url # Filter out None values
]
# Extract URLs
content_urls = await self._extract_from_content(message.content, enabled_sites)
attachment_urls = await self._extract_from_attachments(message.attachments)
# Process all URLs
all_urls = content_urls + attachment_urls
valid_urls = []
for url in all_urls:
if not self.validator.is_valid_url(url):
logger.debug(f"Invalid URL format: {url}")
continue
if not self.pattern_manager.is_supported_site(url, enabled_sites):
logger.debug(f"URL {url} doesn't match any enabled sites")
continue
metadata = self.metadata_extractor.extract_metadata(url)
if metadata:
urls.append(metadata)
valid_urls.append(url)
else:
logger.debug(f"Could not extract metadata from URL: {url}")
# Update cache
self._url_cache[cache_key] = set(valid_urls)
return urls
async def _extract_from_content(
self,
content: str,
enabled_sites: Optional[List[str]]
) -> List[str]:
"""Extract video URLs from message content"""
if not content:
return []
urls = []
for word in content.split():
if self.validator.get_url_type(word) != URLType.UNKNOWN:
urls.append(word)
return urls
async def _extract_from_attachments(
self,
attachments: List[discord.Attachment]
) -> List[str]:
"""Extract video URLs from message attachments"""
return [
attachment.url
for attachment in attachments
if any(
attachment.filename.lower().endswith(ext)
for ext in self.pattern_manager.direct_extensions
)
]
def clear_cache(self, message_id: Optional[int] = None) -> None:
"""Clear URL cache"""
if message_id:
keys_to_remove = [
key for key in self._url_cache
if key.startswith(f"{message_id}_")
]
for key in keys_to_remove:
self._url_cache.pop(key, None)
else:
self._url_cache.clear()

View File

@@ -0,0 +1,500 @@
"""Module for cleaning guild-specific queue items"""
import logging
from enum import Enum
from dataclasses import dataclass, field
from typing import Dict, List, Set, Tuple, Any, Optional
from datetime import datetime
from ..models import QueueItem
logger = logging.getLogger("GuildCleaner")
class GuildCleanupStrategy(Enum):
"""Guild cleanup strategies"""
FULL = "full" # Clear all guild items
SELECTIVE = "selective" # Clear only specific categories
GRACEFUL = "graceful" # Clear with grace period
class CleanupCategory(Enum):
"""Categories for cleanup"""
QUEUE = "queue"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
TRACKING = "tracking"
@dataclass
class GuildCleanupConfig:
"""Configuration for guild cleanup"""
categories: Set[CleanupCategory] = field(default_factory=lambda: set(CleanupCategory))
grace_period: int = 300 # 5 minutes
preserve_completed: bool = False
preserve_failed: bool = False
batch_size: int = 100
@dataclass
class GuildCleanupResult:
"""Result of a guild cleanup operation"""
guild_id: int
timestamp: datetime
strategy: GuildCleanupStrategy
items_cleared: int
categories_cleared: Set[CleanupCategory]
initial_counts: Dict[str, int]
final_counts: Dict[str, int]
duration: float
error: Optional[str] = None
class GuildCleanupTracker:
"""Tracks guild cleanup operations"""
def __init__(self, max_history: int = 1000):
self.max_history = max_history
self.history: List[GuildCleanupResult] = []
self.cleanup_counts: Dict[int, int] = {} # guild_id -> count
self.total_items_cleared = 0
self.last_cleanup: Optional[datetime] = None
def record_cleanup(self, result: GuildCleanupResult) -> None:
"""Record a cleanup operation"""
self.history.append(result)
if len(self.history) > self.max_history:
self.history.pop(0)
self.cleanup_counts[result.guild_id] = (
self.cleanup_counts.get(result.guild_id, 0) + 1
)
self.total_items_cleared += result.items_cleared
self.last_cleanup = result.timestamp
def get_stats(self) -> Dict[str, Any]:
"""Get cleanup statistics"""
return {
"total_cleanups": len(self.history),
"total_items_cleared": self.total_items_cleared,
"guilds_cleaned": len(self.cleanup_counts),
"last_cleanup": (
self.last_cleanup.isoformat()
if self.last_cleanup
else None
),
"recent_cleanups": [
{
"guild_id": r.guild_id,
"timestamp": r.timestamp.isoformat(),
"strategy": r.strategy.value,
"items_cleared": r.items_cleared,
"categories": [c.value for c in r.categories_cleared]
}
for r in self.history[-5:] # Last 5 cleanups
]
}
class GuildCleaner:
"""Handles cleanup of guild-specific queue items"""
def __init__(
self,
strategy: GuildCleanupStrategy = GuildCleanupStrategy.GRACEFUL,
config: Optional[GuildCleanupConfig] = None
):
self.strategy = strategy
self.config = config or GuildCleanupConfig()
self.tracker = GuildCleanupTracker()
async def clear_guild_items(
self,
guild_id: int,
queue: List[QueueItem],
processing: Dict[str, QueueItem],
completed: Dict[str, QueueItem],
failed: Dict[str, QueueItem],
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]]
) -> Tuple[int, Dict[str, int]]:
"""Clear all queue items for a specific guild"""
start_time = datetime.utcnow()
cleared_categories = set()
try:
# Get initial counts
initial_counts = self._get_item_counts(
guild_id,
queue,
processing,
completed,
failed
)
# Get URLs for this guild
guild_urls = guild_queues.get(guild_id, set())
# Clear items based on strategy
cleared_count = 0
if self.strategy == GuildCleanupStrategy.FULL:
cleared_count = await self._full_cleanup(
guild_id,
queue,
processing,
completed,
failed,
guild_queues,
channel_queues,
cleared_categories
)
elif self.strategy == GuildCleanupStrategy.SELECTIVE:
cleared_count = await self._selective_cleanup(
guild_id,
queue,
processing,
completed,
failed,
guild_queues,
channel_queues,
cleared_categories
)
else: # GRACEFUL
cleared_count = await self._graceful_cleanup(
guild_id,
queue,
processing,
completed,
failed,
guild_queues,
channel_queues,
cleared_categories
)
# Get final counts
final_counts = self._get_item_counts(
guild_id,
queue,
processing,
completed,
failed
)
# Record cleanup result
duration = (datetime.utcnow() - start_time).total_seconds()
result = GuildCleanupResult(
guild_id=guild_id,
timestamp=datetime.utcnow(),
strategy=self.strategy,
items_cleared=cleared_count,
categories_cleared=cleared_categories,
initial_counts=initial_counts,
final_counts=final_counts,
duration=duration
)
self.tracker.record_cleanup(result)
logger.info(self.format_guild_cleanup_report(
guild_id,
initial_counts,
final_counts,
duration
))
return cleared_count, initial_counts
except Exception as e:
logger.error(f"Error clearing guild {guild_id} queue: {e}")
self.tracker.record_cleanup(GuildCleanupResult(
guild_id=guild_id,
timestamp=datetime.utcnow(),
strategy=self.strategy,
items_cleared=0,
categories_cleared=set(),
initial_counts={},
final_counts={},
duration=0,
error=str(e)
))
raise
async def _full_cleanup(
self,
guild_id: int,
queue: List[QueueItem],
processing: Dict[str, QueueItem],
completed: Dict[str, QueueItem],
failed: Dict[str, QueueItem],
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
cleared_categories: Set[CleanupCategory]
) -> int:
"""Perform full cleanup"""
cleared_count = 0
# Clear from pending queue
queue[:] = [item for item in queue if item.guild_id != guild_id]
cleared_count += len(queue)
cleared_categories.add(CleanupCategory.QUEUE)
# Clear from processing
cleared = await self._clear_from_dict(
processing, guild_id, 'processing'
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.PROCESSING)
# Clear from completed
cleared = await self._clear_from_dict(
completed, guild_id, 'completed'
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.COMPLETED)
# Clear from failed
cleared = await self._clear_from_dict(
failed, guild_id, 'failed'
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.FAILED)
# Clear tracking
cleared = await self._clear_tracking(
guild_id,
guild_queues,
channel_queues
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.TRACKING)
return cleared_count
async def _selective_cleanup(
self,
guild_id: int,
queue: List[QueueItem],
processing: Dict[str, QueueItem],
completed: Dict[str, QueueItem],
failed: Dict[str, QueueItem],
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
cleared_categories: Set[CleanupCategory]
) -> int:
"""Perform selective cleanup"""
cleared_count = 0
# Clear only configured categories
if CleanupCategory.QUEUE in self.config.categories:
queue[:] = [item for item in queue if item.guild_id != guild_id]
cleared_count += len(queue)
cleared_categories.add(CleanupCategory.QUEUE)
if CleanupCategory.PROCESSING in self.config.categories:
cleared = await self._clear_from_dict(
processing, guild_id, 'processing'
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.PROCESSING)
if (
CleanupCategory.COMPLETED in self.config.categories and
not self.config.preserve_completed
):
cleared = await self._clear_from_dict(
completed, guild_id, 'completed'
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.COMPLETED)
if (
CleanupCategory.FAILED in self.config.categories and
not self.config.preserve_failed
):
cleared = await self._clear_from_dict(
failed, guild_id, 'failed'
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.FAILED)
if CleanupCategory.TRACKING in self.config.categories:
cleared = await self._clear_tracking(
guild_id,
guild_queues,
channel_queues
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.TRACKING)
return cleared_count
async def _graceful_cleanup(
self,
guild_id: int,
queue: List[QueueItem],
processing: Dict[str, QueueItem],
completed: Dict[str, QueueItem],
failed: Dict[str, QueueItem],
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
cleared_categories: Set[CleanupCategory]
) -> int:
"""Perform graceful cleanup"""
cleared_count = 0
cutoff_time = datetime.utcnow().timestamp() - self.config.grace_period
# Clear queue items beyond grace period
queue[:] = [
item for item in queue
if not (
item.guild_id == guild_id and
item.added_at.timestamp() < cutoff_time
)
]
cleared_count += len(queue)
cleared_categories.add(CleanupCategory.QUEUE)
# Clear processing items beyond grace period
for url in list(processing.keys()):
item = processing[url]
if (
item.guild_id == guild_id and
item.added_at.timestamp() < cutoff_time
):
processing.pop(url)
cleared_count += 1
cleared_categories.add(CleanupCategory.PROCESSING)
# Clear completed and failed based on config
if not self.config.preserve_completed:
cleared = await self._clear_from_dict(
completed, guild_id, 'completed'
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.COMPLETED)
if not self.config.preserve_failed:
cleared = await self._clear_from_dict(
failed, guild_id, 'failed'
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.FAILED)
# Clear tracking
cleared = await self._clear_tracking(
guild_id,
guild_queues,
channel_queues
)
cleared_count += cleared
cleared_categories.add(CleanupCategory.TRACKING)
return cleared_count
async def _clear_from_dict(
self,
items_dict: Dict[str, QueueItem],
guild_id: int,
category: str
) -> int:
"""Clear guild items from a dictionary"""
cleared = 0
batch_count = 0
for url in list(items_dict.keys()):
if items_dict[url].guild_id == guild_id:
items_dict.pop(url)
cleared += 1
batch_count += 1
# Process in batches
if batch_count >= self.config.batch_size:
await asyncio.sleep(0) # Yield to event loop
batch_count = 0
logger.debug(f"Cleared {cleared} {category} items for guild {guild_id}")
return cleared
async def _clear_tracking(
self,
guild_id: int,
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]]
) -> int:
"""Clear guild tracking data"""
cleared = 0
guild_urls = guild_queues.get(guild_id, set())
# Clear guild tracking
if guild_id in guild_queues:
cleared += len(guild_queues[guild_id])
guild_queues.pop(guild_id)
# Clear channel tracking
await self._clear_channel_tracking(channel_queues, guild_urls)
return cleared
async def _clear_channel_tracking(
self,
channel_queues: Dict[int, Set[str]],
guild_urls: Set[str]
) -> None:
"""Clear channel tracking for guild URLs"""
batch_count = 0
for channel_id in list(channel_queues.keys()):
channel_queues[channel_id] = {
url for url in channel_queues[channel_id]
if url not in guild_urls
}
if not channel_queues[channel_id]:
channel_queues.pop(channel_id)
batch_count += 1
if batch_count >= self.config.batch_size:
await asyncio.sleep(0) # Yield to event loop
batch_count = 0
def _get_item_counts(
self,
guild_id: int,
queue: List[QueueItem],
processing: Dict[str, QueueItem],
completed: Dict[str, QueueItem],
failed: Dict[str, QueueItem]
) -> Dict[str, int]:
"""Get item counts for a guild"""
return {
'queue': len([item for item in queue if item.guild_id == guild_id]),
'processing': len([item for item in processing.values() if item.guild_id == guild_id]),
'completed': len([item for item in completed.values() if item.guild_id == guild_id]),
'failed': len([item for item in failed.values() if item.guild_id == guild_id])
}
def format_guild_cleanup_report(
self,
guild_id: int,
initial_counts: Dict[str, int],
final_counts: Dict[str, int],
duration: float
) -> str:
"""Format a guild cleanup report"""
return (
f"Guild {guild_id} Cleanup Results:\n"
f"Strategy: {self.strategy.value}\n"
f"Duration: {duration:.2f}s\n"
f"Items:\n"
f"- Queue: {initial_counts['queue']} -> {final_counts['queue']}\n"
f"- Processing: {initial_counts['processing']} -> {final_counts['processing']}\n"
f"- Completed: {initial_counts['completed']} -> {final_counts['completed']}\n"
f"- Failed: {initial_counts['failed']} -> {final_counts['failed']}\n"
f"Total cleared: {sum(initial_counts.values()) - sum(final_counts.values())} items"
)
def get_cleaner_stats(self) -> Dict[str, Any]:
"""Get comprehensive cleaner statistics"""
return {
"strategy": self.strategy.value,
"config": {
"categories": [c.value for c in self.config.categories],
"grace_period": self.config.grace_period,
"preserve_completed": self.config.preserve_completed,
"preserve_failed": self.config.preserve_failed,
"batch_size": self.config.batch_size
},
"tracker": self.tracker.get_stats()
}

View File

@@ -0,0 +1,336 @@
"""Module for cleaning historical queue items"""
import logging
from enum import Enum
from dataclasses import dataclass, field
from typing import Dict, Optional, List, Any, Set
from datetime import datetime, timedelta
from ..models import QueueItem
logger = logging.getLogger("HistoryCleaner")
class CleanupStrategy(Enum):
"""Cleanup strategies"""
AGGRESSIVE = "aggressive" # Remove more aggressively
CONSERVATIVE = "conservative" # Remove conservatively
BALANCED = "balanced" # Balance between retention and cleanup
class CleanupPolicy(Enum):
"""Cleanup policies"""
AGE = "age" # Clean based on age
SIZE = "size" # Clean based on size
HYBRID = "hybrid" # Consider both age and size
@dataclass
class CleanupThresholds:
"""Thresholds for cleanup operations"""
max_history_age: int = 43200 # 12 hours
max_completed_items: int = 10000
max_failed_items: int = 5000
min_retention_time: int = 3600 # 1 hour
size_threshold: int = 100 * 1024 * 1024 # 100MB
@dataclass
class CleanupResult:
"""Result of a cleanup operation"""
timestamp: datetime
items_cleaned: int
space_freed: int
duration: float
strategy: CleanupStrategy
policy: CleanupPolicy
details: Dict[str, Any] = field(default_factory=dict)
class CleanupTracker:
"""Tracks cleanup operations"""
def __init__(self, max_history: int = 1000):
self.max_history = max_history
self.history: List[CleanupResult] = []
self.total_items_cleaned = 0
self.total_space_freed = 0
self.last_cleanup: Optional[datetime] = None
def record_cleanup(self, result: CleanupResult) -> None:
"""Record a cleanup operation"""
self.history.append(result)
if len(self.history) > self.max_history:
self.history.pop(0)
self.total_items_cleaned += result.items_cleaned
self.total_space_freed += result.space_freed
self.last_cleanup = result.timestamp
def get_stats(self) -> Dict[str, Any]:
"""Get cleanup statistics"""
return {
"total_cleanups": len(self.history),
"total_items_cleaned": self.total_items_cleaned,
"total_space_freed": self.total_space_freed,
"last_cleanup": (
self.last_cleanup.isoformat()
if self.last_cleanup
else None
),
"recent_cleanups": [
{
"timestamp": r.timestamp.isoformat(),
"items_cleaned": r.items_cleaned,
"space_freed": r.space_freed,
"strategy": r.strategy.value,
"policy": r.policy.value
}
for r in self.history[-5:] # Last 5 cleanups
]
}
class HistoryCleaner:
"""Handles cleanup of historical queue items"""
def __init__(
self,
strategy: CleanupStrategy = CleanupStrategy.BALANCED,
policy: CleanupPolicy = CleanupPolicy.HYBRID,
thresholds: Optional[CleanupThresholds] = None
):
self.strategy = strategy
self.policy = policy
self.thresholds = thresholds or CleanupThresholds()
self.tracker = CleanupTracker()
def _normalize_datetime(self, dt_value: any) -> datetime:
"""Normalize a datetime value"""
current_time = datetime.utcnow()
if not isinstance(dt_value, datetime):
try:
if isinstance(dt_value, str):
return datetime.fromisoformat(dt_value)
else:
return current_time
except (ValueError, TypeError):
return current_time
return dt_value
async def cleanup_completed(
self,
completed: Dict[str, QueueItem],
cleanup_cutoff: datetime
) -> int:
"""Clean up completed items"""
start_time = datetime.utcnow()
items_cleaned = 0
space_freed = 0
completed_count = len(completed)
try:
# Determine cleanup approach based on strategy and policy
if self.policy == CleanupPolicy.SIZE:
items_to_clean = self._get_items_by_size(completed)
elif self.policy == CleanupPolicy.HYBRID:
items_to_clean = self._get_items_hybrid(completed, cleanup_cutoff)
else: # AGE policy
items_to_clean = self._get_items_by_age(completed, cleanup_cutoff)
# Clean items
for url in items_to_clean:
try:
item = completed[url]
space_freed += self._estimate_item_size(item)
completed.pop(url)
items_cleaned += 1
except Exception as e:
logger.error(f"Error cleaning completed item {url}: {e}")
completed.pop(url)
items_cleaned += 1
# Record cleanup
self._record_cleanup_result(
items_cleaned,
space_freed,
start_time,
"completed"
)
logger.debug(f"Cleaned {items_cleaned} completed items")
return items_cleaned
except Exception as e:
logger.error(f"Error during completed items cleanup: {e}")
return 0
async def cleanup_failed(
self,
failed: Dict[str, QueueItem],
cleanup_cutoff: datetime
) -> int:
"""Clean up failed items"""
start_time = datetime.utcnow()
items_cleaned = 0
space_freed = 0
failed_count = len(failed)
try:
# Determine cleanup approach
if self.policy == CleanupPolicy.SIZE:
items_to_clean = self._get_items_by_size(failed)
elif self.policy == CleanupPolicy.HYBRID:
items_to_clean = self._get_items_hybrid(failed, cleanup_cutoff)
else: # AGE policy
items_to_clean = self._get_items_by_age(failed, cleanup_cutoff)
# Clean items
for url in items_to_clean:
try:
item = failed[url]
space_freed += self._estimate_item_size(item)
failed.pop(url)
items_cleaned += 1
except Exception as e:
logger.error(f"Error cleaning failed item {url}: {e}")
failed.pop(url)
items_cleaned += 1
# Record cleanup
self._record_cleanup_result(
items_cleaned,
space_freed,
start_time,
"failed"
)
logger.debug(f"Cleaned {items_cleaned} failed items")
return items_cleaned
except Exception as e:
logger.error(f"Error during failed items cleanup: {e}")
return 0
def _get_items_by_age(
self,
items: Dict[str, QueueItem],
cutoff: datetime
) -> Set[str]:
"""Get items to clean based on age"""
to_clean = set()
for url, item in items.items():
item.added_at = self._normalize_datetime(item.added_at)
if item.added_at < cutoff:
to_clean.add(url)
return to_clean
def _get_items_by_size(self, items: Dict[str, QueueItem]) -> Set[str]:
"""Get items to clean based on size"""
to_clean = set()
total_size = 0
# Sort items by size estimate
sorted_items = sorted(
items.items(),
key=lambda x: self._estimate_item_size(x[1]),
reverse=True
)
for url, item in sorted_items:
total_size += self._estimate_item_size(item)
if total_size > self.thresholds.size_threshold:
to_clean.add(url)
return to_clean
def _get_items_hybrid(
self,
items: Dict[str, QueueItem],
cutoff: datetime
) -> Set[str]:
"""Get items to clean using hybrid approach"""
by_age = self._get_items_by_age(items, cutoff)
by_size = self._get_items_by_size(items)
if self.strategy == CleanupStrategy.AGGRESSIVE:
return by_age.union(by_size)
elif self.strategy == CleanupStrategy.CONSERVATIVE:
return by_age.intersection(by_size)
else: # BALANCED
return by_age
def _estimate_item_size(self, item: QueueItem) -> int:
"""Estimate size of an item in bytes"""
# This could be enhanced with actual file size tracking
base_size = 1024 # 1KB base size
return base_size * (item.retry_count + 1)
def _record_cleanup_result(
self,
items_cleaned: int,
space_freed: int,
start_time: datetime,
cleanup_type: str
) -> None:
"""Record cleanup result"""
duration = (datetime.utcnow() - start_time).total_seconds()
result = CleanupResult(
timestamp=datetime.utcnow(),
items_cleaned=items_cleaned,
space_freed=space_freed,
duration=duration,
strategy=self.strategy,
policy=self.policy,
details={"type": cleanup_type}
)
self.tracker.record_cleanup(result)
def get_cleanup_cutoff(self) -> datetime:
"""Get the cutoff time for cleanup"""
if self.strategy == CleanupStrategy.AGGRESSIVE:
age = self.thresholds.max_history_age // 2
elif self.strategy == CleanupStrategy.CONSERVATIVE:
age = self.thresholds.max_history_age * 2
else: # BALANCED
age = self.thresholds.max_history_age
return datetime.utcnow() - timedelta(seconds=max(
age,
self.thresholds.min_retention_time
))
def format_cleanup_report(
self,
initial_completed: int,
final_completed: int,
initial_failed: int,
final_failed: int
) -> str:
"""Format a cleanup report"""
stats = self.tracker.get_stats()
return (
f"History Cleanup Results:\n"
f"- Completed items: {initial_completed} -> {final_completed}\n"
f"- Failed items: {initial_failed} -> {final_failed}\n"
f"- Total items cleaned: {(initial_completed - final_completed) + (initial_failed - final_failed)}\n"
f"- Space freed: {stats['total_space_freed']} bytes\n"
f"- Strategy: {self.strategy.value}\n"
f"- Policy: {self.policy.value}\n"
f"- Total cleanups: {stats['total_cleanups']}"
)
def get_cleaner_stats(self) -> Dict[str, Any]:
"""Get comprehensive cleaner statistics"""
return {
"strategy": self.strategy.value,
"policy": self.policy.value,
"thresholds": {
"max_history_age": self.thresholds.max_history_age,
"max_completed_items": self.thresholds.max_completed_items,
"max_failed_items": self.thresholds.max_failed_items,
"min_retention_time": self.thresholds.min_retention_time,
"size_threshold": self.thresholds.size_threshold
},
"tracker": self.tracker.get_stats()
}

View File

@@ -0,0 +1,452 @@
"""Module for cleaning queue tracking data"""
import logging
import asyncio
from enum import Enum
from dataclasses import dataclass, field
from typing import Dict, List, Set, Tuple, Any, Optional
from datetime import datetime
from ..models import QueueItem
logger = logging.getLogger("TrackingCleaner")
class TrackingCleanupStrategy(Enum):
"""Tracking cleanup strategies"""
AGGRESSIVE = "aggressive" # Remove all invalid entries
CONSERVATIVE = "conservative" # Keep recent invalid entries
BALANCED = "balanced" # Balance between cleanup and retention
class TrackingType(Enum):
"""Types of tracking data"""
GUILD = "guild"
CHANNEL = "channel"
URL = "url"
@dataclass
class TrackingCleanupConfig:
"""Configuration for tracking cleanup"""
batch_size: int = 100
retention_period: int = 3600 # 1 hour
validate_urls: bool = True
cleanup_empty: bool = True
max_invalid_ratio: float = 0.5 # 50% invalid threshold
@dataclass
class TrackingCleanupResult:
"""Result of a tracking cleanup operation"""
timestamp: datetime
strategy: TrackingCleanupStrategy
items_cleaned: int
guilds_cleaned: int
channels_cleaned: int
duration: float
initial_counts: Dict[str, int]
final_counts: Dict[str, int]
error: Optional[str] = None
class TrackingValidator:
"""Validates tracking data"""
@staticmethod
def validate_url(url: str) -> bool:
"""Validate URL format"""
try:
return bool(url and isinstance(url, str) and "://" in url)
except Exception:
return False
@staticmethod
def validate_id(id_value: int) -> bool:
"""Validate ID format"""
try:
return bool(isinstance(id_value, int) and id_value > 0)
except Exception:
return False
class TrackingCleanupTracker:
"""Tracks cleanup operations"""
def __init__(self, max_history: int = 1000):
self.max_history = max_history
self.history: List[TrackingCleanupResult] = []
self.total_items_cleaned = 0
self.total_guilds_cleaned = 0
self.total_channels_cleaned = 0
self.last_cleanup: Optional[datetime] = None
def record_cleanup(self, result: TrackingCleanupResult) -> None:
"""Record a cleanup operation"""
self.history.append(result)
if len(self.history) > self.max_history:
self.history.pop(0)
self.total_items_cleaned += result.items_cleaned
self.total_guilds_cleaned += result.guilds_cleaned
self.total_channels_cleaned += result.channels_cleaned
self.last_cleanup = result.timestamp
def get_stats(self) -> Dict[str, Any]:
"""Get cleanup statistics"""
return {
"total_cleanups": len(self.history),
"total_items_cleaned": self.total_items_cleaned,
"total_guilds_cleaned": self.total_guilds_cleaned,
"total_channels_cleaned": self.total_channels_cleaned,
"last_cleanup": (
self.last_cleanup.isoformat()
if self.last_cleanup
else None
),
"recent_cleanups": [
{
"timestamp": r.timestamp.isoformat(),
"strategy": r.strategy.value,
"items_cleaned": r.items_cleaned,
"guilds_cleaned": r.guilds_cleaned,
"channels_cleaned": r.channels_cleaned,
"duration": r.duration
}
for r in self.history[-5:] # Last 5 cleanups
]
}
class TrackingCleaner:
"""Handles cleanup of queue tracking data"""
def __init__(
self,
strategy: TrackingCleanupStrategy = TrackingCleanupStrategy.BALANCED,
config: Optional[TrackingCleanupConfig] = None
):
self.strategy = strategy
self.config = config or TrackingCleanupConfig()
self.tracker = TrackingCleanupTracker()
self.validator = TrackingValidator()
async def cleanup_tracking(
self,
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
queue: List[QueueItem],
processing: Dict[str, QueueItem]
) -> Tuple[int, Dict[str, int]]:
"""Clean up tracking data"""
start_time = datetime.utcnow()
try:
# Get initial counts
initial_counts = self._get_tracking_counts(
guild_queues,
channel_queues
)
# Get valid URLs
valid_urls = self._get_valid_urls(queue, processing)
# Clean tracking data based on strategy
items_cleaned = 0
guilds_cleaned = 0
channels_cleaned = 0
if self.strategy == TrackingCleanupStrategy.AGGRESSIVE:
cleaned = await self._aggressive_cleanup(
guild_queues,
channel_queues,
valid_urls
)
elif self.strategy == TrackingCleanupStrategy.CONSERVATIVE:
cleaned = await self._conservative_cleanup(
guild_queues,
channel_queues,
valid_urls
)
else: # BALANCED
cleaned = await self._balanced_cleanup(
guild_queues,
channel_queues,
valid_urls
)
items_cleaned = cleaned[0]
guilds_cleaned = cleaned[1]
channels_cleaned = cleaned[2]
# Get final counts
final_counts = self._get_tracking_counts(
guild_queues,
channel_queues
)
# Record cleanup result
duration = (datetime.utcnow() - start_time).total_seconds()
result = TrackingCleanupResult(
timestamp=datetime.utcnow(),
strategy=self.strategy,
items_cleaned=items_cleaned,
guilds_cleaned=guilds_cleaned,
channels_cleaned=channels_cleaned,
duration=duration,
initial_counts=initial_counts,
final_counts=final_counts
)
self.tracker.record_cleanup(result)
logger.info(self.format_tracking_cleanup_report(
initial_counts,
final_counts,
duration
))
return items_cleaned, initial_counts
except Exception as e:
logger.error(f"Error cleaning tracking data: {e}")
self.tracker.record_cleanup(TrackingCleanupResult(
timestamp=datetime.utcnow(),
strategy=self.strategy,
items_cleaned=0,
guilds_cleaned=0,
channels_cleaned=0,
duration=0,
initial_counts={},
final_counts={},
error=str(e)
))
raise
async def _aggressive_cleanup(
self,
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
valid_urls: Set[str]
) -> Tuple[int, int, int]:
"""Perform aggressive cleanup"""
items_cleaned = 0
guilds_cleaned = 0
channels_cleaned = 0
# Clean guild tracking
guild_cleaned = await self._cleanup_guild_tracking(
guild_queues,
valid_urls,
validate_all=True
)
items_cleaned += guild_cleaned[0]
guilds_cleaned += guild_cleaned[1]
# Clean channel tracking
channel_cleaned = await self._cleanup_channel_tracking(
channel_queues,
valid_urls,
validate_all=True
)
items_cleaned += channel_cleaned[0]
channels_cleaned += channel_cleaned[1]
return items_cleaned, guilds_cleaned, channels_cleaned
async def _conservative_cleanup(
self,
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
valid_urls: Set[str]
) -> Tuple[int, int, int]:
"""Perform conservative cleanup"""
items_cleaned = 0
guilds_cleaned = 0
channels_cleaned = 0
# Only clean if invalid ratio exceeds threshold
for guild_id, urls in list(guild_queues.items()):
invalid_ratio = len(urls - valid_urls) / len(urls) if urls else 0
if invalid_ratio > self.config.max_invalid_ratio:
cleaned = await self._cleanup_guild_tracking(
{guild_id: urls},
valid_urls,
validate_all=False
)
items_cleaned += cleaned[0]
guilds_cleaned += cleaned[1]
for channel_id, urls in list(channel_queues.items()):
invalid_ratio = len(urls - valid_urls) / len(urls) if urls else 0
if invalid_ratio > self.config.max_invalid_ratio:
cleaned = await self._cleanup_channel_tracking(
{channel_id: urls},
valid_urls,
validate_all=False
)
items_cleaned += cleaned[0]
channels_cleaned += cleaned[1]
return items_cleaned, guilds_cleaned, channels_cleaned
async def _balanced_cleanup(
self,
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
valid_urls: Set[str]
) -> Tuple[int, int, int]:
"""Perform balanced cleanup"""
items_cleaned = 0
guilds_cleaned = 0
channels_cleaned = 0
# Clean guild tracking with validation
guild_cleaned = await self._cleanup_guild_tracking(
guild_queues,
valid_urls,
validate_all=self.config.validate_urls
)
items_cleaned += guild_cleaned[0]
guilds_cleaned += guild_cleaned[1]
# Clean channel tracking with validation
channel_cleaned = await self._cleanup_channel_tracking(
channel_queues,
valid_urls,
validate_all=self.config.validate_urls
)
items_cleaned += channel_cleaned[0]
channels_cleaned += channel_cleaned[1]
return items_cleaned, guilds_cleaned, channels_cleaned
async def _cleanup_guild_tracking(
self,
guild_queues: Dict[int, Set[str]],
valid_urls: Set[str],
validate_all: bool
) -> Tuple[int, int]:
"""Clean up guild tracking data"""
items_cleaned = 0
guilds_cleaned = 0
batch_count = 0
for guild_id in list(guild_queues.keys()):
if not self.validator.validate_id(guild_id):
guild_queues.pop(guild_id)
guilds_cleaned += 1
continue
original_size = len(guild_queues[guild_id])
guild_queues[guild_id] = {
url for url in guild_queues[guild_id]
if (
(not validate_all or self.validator.validate_url(url)) and
url in valid_urls
)
}
items_cleaned += original_size - len(guild_queues[guild_id])
if self.config.cleanup_empty and not guild_queues[guild_id]:
guild_queues.pop(guild_id)
guilds_cleaned += 1
batch_count += 1
if batch_count >= self.config.batch_size:
await asyncio.sleep(0) # Yield to event loop
batch_count = 0
logger.debug(f"Cleaned {items_cleaned} guild tracking items")
return items_cleaned, guilds_cleaned
async def _cleanup_channel_tracking(
self,
channel_queues: Dict[int, Set[str]],
valid_urls: Set[str],
validate_all: bool
) -> Tuple[int, int]:
"""Clean up channel tracking data"""
items_cleaned = 0
channels_cleaned = 0
batch_count = 0
for channel_id in list(channel_queues.keys()):
if not self.validator.validate_id(channel_id):
channel_queues.pop(channel_id)
channels_cleaned += 1
continue
original_size = len(channel_queues[channel_id])
channel_queues[channel_id] = {
url for url in channel_queues[channel_id]
if (
(not validate_all or self.validator.validate_url(url)) and
url in valid_urls
)
}
items_cleaned += original_size - len(channel_queues[channel_id])
if self.config.cleanup_empty and not channel_queues[channel_id]:
channel_queues.pop(channel_id)
channels_cleaned += 1
batch_count += 1
if batch_count >= self.config.batch_size:
await asyncio.sleep(0) # Yield to event loop
batch_count = 0
logger.debug(f"Cleaned {items_cleaned} channel tracking items")
return items_cleaned, channels_cleaned
def _get_valid_urls(
self,
queue: List[QueueItem],
processing: Dict[str, QueueItem]
) -> Set[str]:
"""Get set of valid URLs"""
valid_urls = {item.url for item in queue}
valid_urls.update(processing.keys())
return valid_urls
def _get_tracking_counts(
self,
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]]
) -> Dict[str, int]:
"""Get tracking data counts"""
return {
'guilds': len(guild_queues),
'channels': len(channel_queues),
'guild_urls': sum(len(urls) for urls in guild_queues.values()),
'channel_urls': sum(len(urls) for urls in channel_queues.values())
}
def format_tracking_cleanup_report(
self,
initial_counts: Dict[str, int],
final_counts: Dict[str, int],
duration: float
) -> str:
"""Format a tracking cleanup report"""
total_cleaned = (
(initial_counts['guild_urls'] - final_counts['guild_urls']) +
(initial_counts['channel_urls'] - final_counts['channel_urls'])
)
return (
f"Tracking Cleanup Results:\n"
f"Strategy: {self.strategy.value}\n"
f"Duration: {duration:.2f}s\n"
f"Items:\n"
f"- Guild Queues: {initial_counts['guilds']} -> {final_counts['guilds']}\n"
f"- Channel Queues: {initial_counts['channels']} -> {final_counts['channels']}\n"
f"- Guild URLs: {initial_counts['guild_urls']} -> {final_counts['guild_urls']}\n"
f"- Channel URLs: {initial_counts['channel_urls']} -> {final_counts['channel_urls']}\n"
f"Total items cleaned: {total_cleaned}"
)
def get_cleaner_stats(self) -> Dict[str, Any]:
"""Get comprehensive cleaner statistics"""
return {
"strategy": self.strategy.value,
"config": {
"batch_size": self.config.batch_size,
"retention_period": self.config.retention_period,
"validate_urls": self.config.validate_urls,
"cleanup_empty": self.config.cleanup_empty,
"max_invalid_ratio": self.config.max_invalid_ratio
},
"tracker": self.tracker.get_stats()
}

View File

@@ -2,316 +2,459 @@
import asyncio
import logging
from enum import Enum
from dataclasses import dataclass, field
from typing import Dict, List, Set, Optional, Any, Tuple
from datetime import datetime, timedelta
from typing import Dict, List, Set, Optional
from .models import QueueItem, QueueMetrics
# Configure logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
from .models import QueueItem, QueueMetrics
from .cleaners.history_cleaner import (
HistoryCleaner,
CleanupStrategy as HistoryStrategy
)
from .cleaners.guild_cleaner import (
GuildCleaner,
GuildCleanupStrategy
)
from .cleaners.tracking_cleaner import (
TrackingCleaner,
TrackingCleanupStrategy
)
logger = logging.getLogger("QueueCleanup")
class QueueCleaner:
"""Handles cleanup of old queue items and tracking data"""
class CleanupMode(Enum):
"""Cleanup operation modes"""
NORMAL = "normal" # Regular cleanup
AGGRESSIVE = "aggressive" # More aggressive cleanup
MAINTENANCE = "maintenance" # Maintenance mode cleanup
EMERGENCY = "emergency" # Emergency cleanup
def __init__(
self,
cleanup_interval: int = 1800, # 30 minutes
max_history_age: int = 43200, # 12 hours
class CleanupPhase(Enum):
"""Cleanup operation phases"""
HISTORY = "history"
TRACKING = "tracking"
GUILD = "guild"
VERIFICATION = "verification"
@dataclass
class CleanupConfig:
"""Configuration for cleanup operations"""
cleanup_interval: int = 1800 # 30 minutes
max_history_age: int = 43200 # 12 hours
batch_size: int = 100
max_concurrent_cleanups: int = 3
verification_interval: int = 300 # 5 minutes
emergency_threshold: int = 10000 # Items threshold for emergency
@dataclass
class CleanupResult:
"""Result of a cleanup operation"""
timestamp: datetime
mode: CleanupMode
duration: float
items_cleaned: Dict[CleanupPhase, int]
error: Optional[str] = None
class CleanupScheduler:
"""Schedules cleanup operations"""
def __init__(self, config: CleanupConfig):
self.config = config
self.next_cleanup: Optional[datetime] = None
self.next_verification: Optional[datetime] = None
self._last_emergency: Optional[datetime] = None
def should_cleanup(self, queue_size: int) -> Tuple[bool, CleanupMode]:
"""Determine if cleanup should run"""
now = datetime.utcnow()
# Check for emergency cleanup
if (
queue_size > self.config.emergency_threshold and
(
not self._last_emergency or
now - self._last_emergency > timedelta(minutes=5)
)
):
self.cleanup_interval = cleanup_interval
self.max_history_age = max_history_age
self._last_emergency = now
return True, CleanupMode.EMERGENCY
# Check scheduled cleanup
if not self.next_cleanup or now >= self.next_cleanup:
self.next_cleanup = now + timedelta(
seconds=self.config.cleanup_interval
)
return True, CleanupMode.NORMAL
# Check verification
if not self.next_verification or now >= self.next_verification:
self.next_verification = now + timedelta(
seconds=self.config.verification_interval
)
return True, CleanupMode.MAINTENANCE
return False, CleanupMode.NORMAL
class CleanupCoordinator:
"""Coordinates cleanup operations"""
def __init__(self):
self.active_cleanups: Set[CleanupPhase] = set()
self._cleanup_lock = asyncio.Lock()
self._phase_locks: Dict[CleanupPhase, asyncio.Lock] = {
phase: asyncio.Lock() for phase in CleanupPhase
}
async def start_cleanup(self, phase: CleanupPhase) -> bool:
"""Start a cleanup phase"""
async with self._cleanup_lock:
if phase in self.active_cleanups:
return False
self.active_cleanups.add(phase)
return True
async def end_cleanup(self, phase: CleanupPhase) -> None:
"""End a cleanup phase"""
async with self._cleanup_lock:
self.active_cleanups.discard(phase)
async def acquire_phase(self, phase: CleanupPhase) -> bool:
"""Acquire lock for a cleanup phase"""
return await self._phase_locks[phase].acquire()
def release_phase(self, phase: CleanupPhase) -> None:
"""Release lock for a cleanup phase"""
self._phase_locks[phase].release()
class CleanupTracker:
"""Tracks cleanup operations"""
def __init__(self, max_history: int = 1000):
self.max_history = max_history
self.history: List[CleanupResult] = []
self.total_items_cleaned = 0
self.last_cleanup: Optional[datetime] = None
self.cleanup_counts: Dict[CleanupMode, int] = {
mode: 0 for mode in CleanupMode
}
def record_cleanup(self, result: CleanupResult) -> None:
"""Record a cleanup operation"""
self.history.append(result)
if len(self.history) > self.max_history:
self.history.pop(0)
self.total_items_cleaned += sum(result.items_cleaned.values())
self.last_cleanup = result.timestamp
self.cleanup_counts[result.mode] += 1
def get_stats(self) -> Dict[str, Any]:
"""Get cleanup statistics"""
return {
"total_cleanups": len(self.history),
"total_items_cleaned": self.total_items_cleaned,
"last_cleanup": (
self.last_cleanup.isoformat()
if self.last_cleanup
else None
),
"cleanup_counts": {
mode.value: count
for mode, count in self.cleanup_counts.items()
},
"recent_cleanups": [
{
"timestamp": r.timestamp.isoformat(),
"mode": r.mode.value,
"duration": r.duration,
"items_cleaned": {
phase.value: count
for phase, count in r.items_cleaned.items()
}
}
for r in self.history[-5:] # Last 5 cleanups
]
}
class QueueCleaner:
"""Handles cleanup of queue items and tracking data"""
def __init__(self, config: Optional[CleanupConfig] = None):
self.config = config or CleanupConfig()
self.scheduler = CleanupScheduler(self.config)
self.coordinator = CleanupCoordinator()
self.tracker = CleanupTracker()
# Initialize cleaners
self.history_cleaner = HistoryCleaner()
self.guild_cleaner = GuildCleaner()
self.tracking_cleaner = TrackingCleaner()
self._shutdown = False
self._cleanup_task: Optional[asyncio.Task] = None
self._last_cleanup_time = datetime.utcnow()
async def start_cleanup(
async def start(
self,
queue: List[QueueItem],
completed: Dict[str, QueueItem],
failed: Dict[str, QueueItem],
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
processing: Dict[str, QueueItem],
metrics: QueueMetrics,
queue_lock: asyncio.Lock
state_manager,
metrics_manager
) -> None:
"""Start periodic cleanup process
Args:
queue: Reference to the queue list
completed: Reference to completed items dict
failed: Reference to failed items dict
guild_queues: Reference to guild tracking dict
channel_queues: Reference to channel tracking dict
processing: Reference to processing dict
metrics: Reference to queue metrics
queue_lock: Lock for queue operations
"""
"""Start periodic cleanup process"""
if self._cleanup_task is not None:
logger.warning("Cleanup task already running")
return
logger.info("Starting queue cleanup task...")
self._cleanup_task = asyncio.create_task(
self._cleanup_loop(
queue,
completed,
failed,
guild_queues,
channel_queues,
processing,
metrics,
queue_lock
)
self._cleanup_loop(state_manager, metrics_manager)
)
async def _cleanup_loop(
self,
queue: List[QueueItem],
completed: Dict[str, QueueItem],
failed: Dict[str, QueueItem],
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
processing: Dict[str, QueueItem],
metrics: QueueMetrics,
queue_lock: asyncio.Lock
state_manager,
metrics_manager
) -> None:
"""Main cleanup loop"""
while not self._shutdown:
try:
# Check if cleanup should run
queue_size = len(await state_manager.get_queue())
should_run, mode = self.scheduler.should_cleanup(queue_size)
if should_run:
await self._perform_cleanup(
queue,
completed,
failed,
guild_queues,
channel_queues,
processing,
metrics,
queue_lock
state_manager,
metrics_manager,
mode
)
self._last_cleanup_time = datetime.utcnow()
await asyncio.sleep(self.cleanup_interval)
await asyncio.sleep(1) # Short sleep to prevent CPU hogging
except asyncio.CancelledError:
logger.info("Queue cleanup cancelled")
break
except Exception as e:
logger.error(f"Error in cleanup loop: {str(e)}")
# Shorter sleep on error to retry sooner
await asyncio.sleep(30)
await asyncio.sleep(30) # Longer sleep on error
def stop_cleanup(self) -> None:
async def stop(self) -> None:
"""Stop the cleanup process"""
logger.info("Stopping queue cleanup...")
self._shutdown = True
if self._cleanup_task and not self._cleanup_task.done():
self._cleanup_task.cancel()
try:
await self._cleanup_task
except asyncio.CancelledError:
pass
self._cleanup_task = None
async def _perform_cleanup(
self,
queue: List[QueueItem],
completed: Dict[str, QueueItem],
failed: Dict[str, QueueItem],
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
processing: Dict[str, QueueItem],
metrics: QueueMetrics,
queue_lock: asyncio.Lock
state_manager,
metrics_manager,
mode: CleanupMode
) -> None:
"""Perform cleanup operations
Args:
queue: Reference to the queue list
completed: Reference to completed items dict
failed: Reference to failed items dict
guild_queues: Reference to guild tracking dict
channel_queues: Reference to channel tracking dict
processing: Reference to processing dict
metrics: Reference to queue metrics
queue_lock: Lock for queue operations
"""
try:
current_time = datetime.utcnow()
cleanup_cutoff = current_time - timedelta(seconds=self.max_history_age)
items_cleaned = 0
async with queue_lock:
# Clean up completed items
completed_count = len(completed)
for url in list(completed.keys()):
try:
item = completed[url]
if not isinstance(item.added_at, datetime):
try:
if isinstance(item.added_at, str):
item.added_at = datetime.fromisoformat(item.added_at)
else:
item.added_at = current_time
except (ValueError, TypeError):
item.added_at = current_time
if item.added_at < cleanup_cutoff:
completed.pop(url)
items_cleaned += 1
except Exception as e:
logger.error(f"Error cleaning completed item {url}: {e}")
completed.pop(url)
items_cleaned += 1
# Clean up failed items
failed_count = len(failed)
for url in list(failed.keys()):
try:
item = failed[url]
if not isinstance(item.added_at, datetime):
try:
if isinstance(item.added_at, str):
item.added_at = datetime.fromisoformat(item.added_at)
else:
item.added_at = current_time
except (ValueError, TypeError):
item.added_at = current_time
if item.added_at < cleanup_cutoff:
failed.pop(url)
items_cleaned += 1
except Exception as e:
logger.error(f"Error cleaning failed item {url}: {e}")
failed.pop(url)
items_cleaned += 1
# Clean up guild tracking
guild_count = len(guild_queues)
for guild_id in list(guild_queues.keys()):
original_size = len(guild_queues[guild_id])
guild_queues[guild_id] = {
url for url in guild_queues[guild_id]
if url in queue or url in processing
"""Perform cleanup operations"""
start_time = datetime.utcnow()
items_cleaned: Dict[CleanupPhase, int] = {
phase: 0 for phase in CleanupPhase
}
items_cleaned += original_size - len(guild_queues[guild_id])
if not guild_queues[guild_id]:
guild_queues.pop(guild_id)
# Clean up channel tracking
channel_count = len(channel_queues)
for channel_id in list(channel_queues.keys()):
original_size = len(channel_queues[channel_id])
channel_queues[channel_id] = {
url for url in channel_queues[channel_id]
if url in queue or url in processing
}
items_cleaned += original_size - len(channel_queues[channel_id])
if not channel_queues[channel_id]:
channel_queues.pop(channel_id)
try:
# Get current state
queue = await state_manager.get_queue()
processing = await state_manager.get_processing()
completed = await state_manager.get_completed()
failed = await state_manager.get_failed()
guild_queues = await state_manager.get_guild_queues()
channel_queues = await state_manager.get_channel_queues()
# Clean historical items
if await self.coordinator.start_cleanup(CleanupPhase.HISTORY):
try:
await self.coordinator.acquire_phase(CleanupPhase.HISTORY)
cleanup_cutoff = self.history_cleaner.get_cleanup_cutoff()
# Adjust strategy based on mode
if mode == CleanupMode.AGGRESSIVE:
self.history_cleaner.strategy = HistoryStrategy.AGGRESSIVE
elif mode == CleanupMode.MAINTENANCE:
self.history_cleaner.strategy = HistoryStrategy.CONSERVATIVE
completed_cleaned = await self.history_cleaner.cleanup_completed(
completed,
cleanup_cutoff
)
failed_cleaned = await self.history_cleaner.cleanup_failed(
failed,
cleanup_cutoff
)
items_cleaned[CleanupPhase.HISTORY] = (
completed_cleaned + failed_cleaned
)
finally:
self.coordinator.release_phase(CleanupPhase.HISTORY)
await self.coordinator.end_cleanup(CleanupPhase.HISTORY)
# Clean tracking data
if await self.coordinator.start_cleanup(CleanupPhase.TRACKING):
try:
await self.coordinator.acquire_phase(CleanupPhase.TRACKING)
# Adjust strategy based on mode
if mode == CleanupMode.AGGRESSIVE:
self.tracking_cleaner.strategy = TrackingCleanupStrategy.AGGRESSIVE
elif mode == CleanupMode.MAINTENANCE:
self.tracking_cleaner.strategy = TrackingCleanupStrategy.CONSERVATIVE
tracking_cleaned, _ = await self.tracking_cleaner.cleanup_tracking(
guild_queues,
channel_queues,
queue,
processing
)
items_cleaned[CleanupPhase.TRACKING] = tracking_cleaned
finally:
self.coordinator.release_phase(CleanupPhase.TRACKING)
await self.coordinator.end_cleanup(CleanupPhase.TRACKING)
# Update state
await state_manager.update_state(
completed=completed,
failed=failed,
guild_queues=guild_queues,
channel_queues=channel_queues
)
# Record cleanup result
duration = (datetime.utcnow() - start_time).total_seconds()
result = CleanupResult(
timestamp=datetime.utcnow(),
mode=mode,
duration=duration,
items_cleaned=items_cleaned
)
self.tracker.record_cleanup(result)
# Update metrics
metrics.last_cleanup = current_time
metrics_manager.update_cleanup_time()
logger.info(
f"Queue cleanup completed:\n"
f"- Items cleaned: {items_cleaned}\n"
f"- Completed items: {completed_count} -> {len(completed)}\n"
f"- Failed items: {failed_count} -> {len(failed)}\n"
f"- Guild queues: {guild_count} -> {len(guild_queues)}\n"
f"- Channel queues: {channel_count} -> {len(channel_queues)}\n"
f"- Current queue size: {len(queue)}\n"
f"- Processing items: {len(processing)}"
f"Cleanup completed ({mode.value}):\n" +
"\n".join(
f"- {phase.value}: {count} items"
for phase, count in items_cleaned.items()
if count > 0
) +
f"\nTotal duration: {duration:.2f}s"
)
except Exception as e:
logger.error(f"Error during cleanup: {str(e)}")
# Don't re-raise to keep cleanup running
duration = (datetime.utcnow() - start_time).total_seconds()
self.tracker.record_cleanup(CleanupResult(
timestamp=datetime.utcnow(),
mode=mode,
duration=duration,
items_cleaned=items_cleaned,
error=str(e)
))
raise CleanupError(f"Cleanup failed: {str(e)}")
async def clear_guild_queue(
self,
guild_id: int,
queue: List[QueueItem],
processing: Dict[str, QueueItem],
completed: Dict[str, QueueItem],
failed: Dict[str, QueueItem],
guild_queues: Dict[int, Set[str]],
channel_queues: Dict[int, Set[str]],
queue_lock: asyncio.Lock
state_manager
) -> int:
"""Clear all queue items for a specific guild
Args:
guild_id: ID of the guild to clear
queue: Reference to the queue list
processing: Reference to processing dict
completed: Reference to completed items dict
failed: Reference to failed items dict
guild_queues: Reference to guild tracking dict
channel_queues: Reference to channel tracking dict
queue_lock: Lock for queue operations
Returns:
Number of items cleared
"""
"""Clear all queue items for a specific guild"""
try:
cleared_count = 0
async with queue_lock:
# Get URLs for this guild
guild_urls = guild_queues.get(guild_id, set())
initial_counts = {
'queue': len([item for item in queue if item.guild_id == guild_id]),
'processing': len([item for item in processing.values() if item.guild_id == guild_id]),
'completed': len([item for item in completed.values() if item.guild_id == guild_id]),
'failed': len([item for item in failed.values() if item.guild_id == guild_id])
}
if not await self.coordinator.start_cleanup(CleanupPhase.GUILD):
raise CleanupError("Guild cleanup already in progress")
# Clear from pending queue
queue[:] = [item for item in queue if item.guild_id != guild_id]
try:
await self.coordinator.acquire_phase(CleanupPhase.GUILD)
# Clear from processing
for url in list(processing.keys()):
if processing[url].guild_id == guild_id:
processing.pop(url)
cleared_count += 1
# Get current state
queue = await state_manager.get_queue()
processing = await state_manager.get_processing()
completed = await state_manager.get_completed()
failed = await state_manager.get_failed()
guild_queues = await state_manager.get_guild_queues()
channel_queues = await state_manager.get_channel_queues()
# Clear from completed
for url in list(completed.keys()):
if completed[url].guild_id == guild_id:
completed.pop(url)
cleared_count += 1
# Clear from failed
for url in list(failed.keys()):
if failed[url].guild_id == guild_id:
failed.pop(url)
cleared_count += 1
# Clear guild tracking
if guild_id in guild_queues:
cleared_count += len(guild_queues[guild_id])
guild_queues.pop(guild_id)
# Clear channel tracking for this guild's channels
for channel_id in list(channel_queues.keys()):
channel_queues[channel_id] = {
url for url in channel_queues[channel_id]
if url not in guild_urls
}
if not channel_queues[channel_id]:
channel_queues.pop(channel_id)
logger.info(
f"Cleared guild {guild_id} queue:\n"
f"- Queue: {initial_counts['queue']} items\n"
f"- Processing: {initial_counts['processing']} items\n"
f"- Completed: {initial_counts['completed']} items\n"
f"- Failed: {initial_counts['failed']} items\n"
f"Total cleared: {cleared_count} items"
# Clear guild items
cleared_count, counts = await self.guild_cleaner.clear_guild_items(
guild_id,
queue,
processing,
completed,
failed,
guild_queues,
channel_queues
)
# Update state
await state_manager.update_state(
queue=queue,
processing=processing,
completed=completed,
failed=failed,
guild_queues=guild_queues,
channel_queues=channel_queues
)
return cleared_count
finally:
self.coordinator.release_phase(CleanupPhase.GUILD)
await self.coordinator.end_cleanup(CleanupPhase.GUILD)
except Exception as e:
logger.error(f"Error clearing guild queue: {str(e)}")
raise CleanupError(f"Failed to clear guild queue: {str(e)}")
def get_cleaner_stats(self) -> Dict[str, Any]:
"""Get comprehensive cleaner statistics"""
return {
"config": {
"cleanup_interval": self.config.cleanup_interval,
"max_history_age": self.config.max_history_age,
"batch_size": self.config.batch_size,
"max_concurrent_cleanups": self.config.max_concurrent_cleanups,
"verification_interval": self.config.verification_interval,
"emergency_threshold": self.config.emergency_threshold
},
"scheduler": {
"next_cleanup": (
self.scheduler.next_cleanup.isoformat()
if self.scheduler.next_cleanup
else None
),
"next_verification": (
self.scheduler.next_verification.isoformat()
if self.scheduler.next_verification
else None
),
"last_emergency": (
self.scheduler._last_emergency.isoformat()
if self.scheduler._last_emergency
else None
)
},
"coordinator": {
"active_cleanups": [
phase.value for phase in self.coordinator.active_cleanups
]
},
"tracker": self.tracker.get_stats(),
"cleaners": {
"history": self.history_cleaner.get_cleaner_stats(),
"guild": self.guild_cleaner.get_cleaner_stats(),
"tracking": self.tracking_cleaner.get_cleaner_stats()
}
}
class CleanupError(Exception):
"""Base exception for cleanup-related errors"""
pass

View File

@@ -0,0 +1,441 @@
"""Module for queue health checks"""
import logging
import psutil
import time
from enum import Enum
from dataclasses import dataclass, field
from typing import Dict, Optional, Tuple, List, Any, Set
from datetime import datetime, timedelta
logger = logging.getLogger("QueueHealthChecker")
class HealthStatus(Enum):
"""Possible health status values"""
HEALTHY = "healthy"
WARNING = "warning"
CRITICAL = "critical"
UNKNOWN = "unknown"
class HealthCategory(Enum):
"""Health check categories"""
MEMORY = "memory"
PERFORMANCE = "performance"
ACTIVITY = "activity"
ERRORS = "errors"
DEADLOCKS = "deadlocks"
SYSTEM = "system"
@dataclass
class HealthThresholds:
"""Defines thresholds for health checks"""
memory_warning_mb: int = 384 # 384MB
memory_critical_mb: int = 512 # 512MB
deadlock_warning_sec: int = 30 # 30 seconds
deadlock_critical_sec: int = 60 # 1 minute
error_rate_warning: float = 0.1 # 10% errors
error_rate_critical: float = 0.2 # 20% errors
inactivity_warning_sec: int = 30
inactivity_critical_sec: int = 60
cpu_warning_percent: float = 80.0
cpu_critical_percent: float = 90.0
@dataclass
class HealthCheckResult:
"""Result of a health check"""
category: HealthCategory
status: HealthStatus
message: str
value: Optional[float] = None
timestamp: datetime = field(default_factory=datetime.utcnow)
details: Dict[str, Any] = field(default_factory=dict)
class HealthHistory:
"""Tracks health check history"""
def __init__(self, max_history: int = 1000):
self.max_history = max_history
self.history: List[HealthCheckResult] = []
self.status_changes: List[Dict[str, Any]] = []
self.critical_events: List[Dict[str, Any]] = []
def add_result(self, result: HealthCheckResult) -> None:
"""Add a health check result"""
self.history.append(result)
if len(self.history) > self.max_history:
self.history.pop(0)
# Track status changes
if self.history[-2:-1] and self.history[-1].status != self.history[-2].status:
self.status_changes.append({
"timestamp": result.timestamp,
"category": result.category.value,
"from_status": self.history[-2].status.value,
"to_status": result.status.value,
"message": result.message
})
# Track critical events
if result.status == HealthStatus.CRITICAL:
self.critical_events.append({
"timestamp": result.timestamp,
"category": result.category.value,
"message": result.message,
"details": result.details
})
def get_status_summary(self) -> Dict[str, Any]:
"""Get summary of health status history"""
return {
"total_checks": len(self.history),
"status_changes": len(self.status_changes),
"critical_events": len(self.critical_events),
"recent_status_changes": self.status_changes[-5:],
"recent_critical_events": self.critical_events[-5:]
}
class SystemHealthMonitor:
"""Monitors system health metrics"""
def __init__(self):
self.process = psutil.Process()
async def check_system_health(self) -> Dict[str, Any]:
"""Check system health metrics"""
try:
cpu_percent = self.process.cpu_percent()
memory_info = self.process.memory_info()
io_counters = self.process.io_counters()
return {
"cpu_percent": cpu_percent,
"memory_rss": memory_info.rss / 1024 / 1024, # MB
"memory_vms": memory_info.vms / 1024 / 1024, # MB
"io_read_mb": io_counters.read_bytes / 1024 / 1024,
"io_write_mb": io_counters.write_bytes / 1024 / 1024,
"thread_count": self.process.num_threads(),
"open_files": len(self.process.open_files()),
"connections": len(self.process.connections())
}
except Exception as e:
logger.error(f"Error checking system health: {e}")
return {}
class HealthChecker:
"""Handles health checks for the queue system"""
def __init__(
self,
thresholds: Optional[HealthThresholds] = None,
history_size: int = 1000
):
self.thresholds = thresholds or HealthThresholds()
self.history = HealthHistory(history_size)
self.system_monitor = SystemHealthMonitor()
self._last_gc_time: Optional[datetime] = None
async def check_health(
self,
metrics: Dict[str, Any],
queue_info: Dict[str, Any]
) -> Dict[str, Any]:
"""Perform comprehensive health check"""
results = []
# Check memory health
memory_result = await self._check_memory_health()
results.append(memory_result)
# Check performance health
perf_result = self._check_performance_health(metrics)
results.append(perf_result)
# Check activity health
activity_result = self._check_activity_health(
queue_info["last_activity"],
queue_info["processing_count"] > 0
)
results.append(activity_result)
# Check error health
error_result = self._check_error_health(metrics)
results.append(error_result)
# Check for deadlocks
deadlock_result = self._check_deadlocks(queue_info)
results.append(deadlock_result)
# Check system health
system_result = await self._check_system_health()
results.append(system_result)
# Record results
for result in results:
self.history.add_result(result)
# Determine overall health
overall_status = self._determine_overall_status(results)
return {
"timestamp": datetime.utcnow().isoformat(),
"overall_status": overall_status.value,
"checks": [
{
"category": r.category.value,
"status": r.status.value,
"message": r.message,
"value": r.value,
"details": r.details
}
for r in results
],
"history": self.history.get_status_summary()
}
async def _check_memory_health(self) -> HealthCheckResult:
"""Check memory health"""
try:
memory_usage = psutil.Process().memory_info().rss / 1024 / 1024 # MB
if memory_usage > self.thresholds.memory_critical_mb:
if (
not self._last_gc_time or
datetime.utcnow() - self._last_gc_time > timedelta(minutes=5)
):
import gc
gc.collect()
self._last_gc_time = datetime.utcnow()
memory_usage = psutil.Process().memory_info().rss / 1024 / 1024
status = HealthStatus.CRITICAL
message = f"Critical memory usage: {memory_usage:.1f}MB"
elif memory_usage > self.thresholds.memory_warning_mb:
status = HealthStatus.WARNING
message = f"High memory usage: {memory_usage:.1f}MB"
else:
status = HealthStatus.HEALTHY
message = f"Normal memory usage: {memory_usage:.1f}MB"
return HealthCheckResult(
category=HealthCategory.MEMORY,
status=status,
message=message,
value=memory_usage
)
except Exception as e:
logger.error(f"Error checking memory health: {e}")
return HealthCheckResult(
category=HealthCategory.MEMORY,
status=HealthStatus.UNKNOWN,
message=f"Error checking memory: {str(e)}"
)
def _check_performance_health(self, metrics: Dict[str, Any]) -> HealthCheckResult:
"""Check performance health"""
try:
avg_time = metrics.get("avg_processing_time", 0)
success_rate = metrics.get("success_rate", 1.0)
if success_rate < 0.5: # Less than 50% success
status = HealthStatus.CRITICAL
message = f"Critical performance: {success_rate:.1%} success rate"
elif success_rate < 0.8: # Less than 80% success
status = HealthStatus.WARNING
message = f"Degraded performance: {success_rate:.1%} success rate"
else:
status = HealthStatus.HEALTHY
message = f"Normal performance: {success_rate:.1%} success rate"
return HealthCheckResult(
category=HealthCategory.PERFORMANCE,
status=status,
message=message,
value=success_rate,
details={"avg_processing_time": avg_time}
)
except Exception as e:
logger.error(f"Error checking performance health: {e}")
return HealthCheckResult(
category=HealthCategory.PERFORMANCE,
status=HealthStatus.UNKNOWN,
message=f"Error checking performance: {str(e)}"
)
def _check_activity_health(
self,
last_activity_time: float,
has_processing_items: bool
) -> HealthCheckResult:
"""Check activity health"""
if not has_processing_items:
return HealthCheckResult(
category=HealthCategory.ACTIVITY,
status=HealthStatus.HEALTHY,
message="No items being processed"
)
inactive_time = time.time() - last_activity_time
if inactive_time > self.thresholds.inactivity_critical_sec:
status = HealthStatus.CRITICAL
message = f"No activity for {inactive_time:.1f}s"
elif inactive_time > self.thresholds.inactivity_warning_sec:
status = HealthStatus.WARNING
message = f"Limited activity for {inactive_time:.1f}s"
else:
status = HealthStatus.HEALTHY
message = "Normal activity levels"
return HealthCheckResult(
category=HealthCategory.ACTIVITY,
status=status,
message=message,
value=inactive_time
)
def _check_error_health(self, metrics: Dict[str, Any]) -> HealthCheckResult:
"""Check error health"""
try:
error_rate = metrics.get("error_rate", 0.0)
error_count = metrics.get("total_errors", 0)
if error_rate > self.thresholds.error_rate_critical:
status = HealthStatus.CRITICAL
message = f"Critical error rate: {error_rate:.1%}"
elif error_rate > self.thresholds.error_rate_warning:
status = HealthStatus.WARNING
message = f"High error rate: {error_rate:.1%}"
else:
status = HealthStatus.HEALTHY
message = f"Normal error rate: {error_rate:.1%}"
return HealthCheckResult(
category=HealthCategory.ERRORS,
status=status,
message=message,
value=error_rate,
details={"error_count": error_count}
)
except Exception as e:
logger.error(f"Error checking error health: {e}")
return HealthCheckResult(
category=HealthCategory.ERRORS,
status=HealthStatus.UNKNOWN,
message=f"Error checking errors: {str(e)}"
)
def _check_deadlocks(self, queue_info: Dict[str, Any]) -> HealthCheckResult:
"""Check for potential deadlocks"""
try:
stuck_items = queue_info.get("stuck_items", [])
if not stuck_items:
return HealthCheckResult(
category=HealthCategory.DEADLOCKS,
status=HealthStatus.HEALTHY,
message="No stuck items detected"
)
longest_stuck = max(
time.time() - item["start_time"]
for item in stuck_items
)
if longest_stuck > self.thresholds.deadlock_critical_sec:
status = HealthStatus.CRITICAL
message = f"Potential deadlock: {len(stuck_items)} items stuck"
elif longest_stuck > self.thresholds.deadlock_warning_sec:
status = HealthStatus.WARNING
message = f"Slow processing: {len(stuck_items)} items delayed"
else:
status = HealthStatus.HEALTHY
message = "Normal processing time"
return HealthCheckResult(
category=HealthCategory.DEADLOCKS,
status=status,
message=message,
value=longest_stuck,
details={"stuck_items": len(stuck_items)}
)
except Exception as e:
logger.error(f"Error checking deadlocks: {e}")
return HealthCheckResult(
category=HealthCategory.DEADLOCKS,
status=HealthStatus.UNKNOWN,
message=f"Error checking deadlocks: {str(e)}"
)
async def _check_system_health(self) -> HealthCheckResult:
"""Check system health"""
try:
metrics = await self.system_monitor.check_system_health()
if not metrics:
return HealthCheckResult(
category=HealthCategory.SYSTEM,
status=HealthStatus.UNKNOWN,
message="Unable to get system metrics"
)
cpu_percent = metrics["cpu_percent"]
if cpu_percent > self.thresholds.cpu_critical_percent:
status = HealthStatus.CRITICAL
message = f"Critical CPU usage: {cpu_percent:.1f}%"
elif cpu_percent > self.thresholds.cpu_warning_percent:
status = HealthStatus.WARNING
message = f"High CPU usage: {cpu_percent:.1f}%"
else:
status = HealthStatus.HEALTHY
message = f"Normal CPU usage: {cpu_percent:.1f}%"
return HealthCheckResult(
category=HealthCategory.SYSTEM,
status=status,
message=message,
value=cpu_percent,
details=metrics
)
except Exception as e:
logger.error(f"Error checking system health: {e}")
return HealthCheckResult(
category=HealthCategory.SYSTEM,
status=HealthStatus.UNKNOWN,
message=f"Error checking system: {str(e)}"
)
def _determine_overall_status(
self,
results: List[HealthCheckResult]
) -> HealthStatus:
"""Determine overall health status"""
if any(r.status == HealthStatus.CRITICAL for r in results):
return HealthStatus.CRITICAL
if any(r.status == HealthStatus.WARNING for r in results):
return HealthStatus.WARNING
if any(r.status == HealthStatus.UNKNOWN for r in results):
return HealthStatus.UNKNOWN
return HealthStatus.HEALTHY
def format_health_report(
self,
results: List[HealthCheckResult]
) -> str:
"""Format a detailed health report"""
lines = ["Queue Health Report:"]
for result in results:
lines.append(
f"\n{result.category.value.title()}:"
f"\n- Status: {result.status.value}"
f"\n- {result.message}"
)
if result.details:
for key, value in result.details.items():
lines.append(f" - {key}: {value}")
return "\n".join(lines)

View File

@@ -2,274 +2,292 @@
import asyncio
import logging
import time
from typing import Dict, Optional, Set, Tuple, Callable, Any, List
from datetime import datetime
from enum import Enum
from dataclasses import dataclass, field
from typing import Optional, Tuple, Dict, Any, List, Set
from datetime import datetime, timedelta
from .models import QueueItem, QueueMetrics
from .persistence import QueuePersistenceManager, QueueError
from .monitoring import QueueMonitor, MonitoringError
from .cleanup import QueueCleaner, CleanupError
from .state_manager import QueueStateManager
from .processor import QueueProcessor
from .metrics_manager import QueueMetricsManager
from .persistence import QueuePersistenceManager
from .monitoring import QueueMonitor, MonitoringLevel
from .cleanup import QueueCleaner
from .models import QueueItem, QueueError, CleanupError
# Configure logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("QueueManager")
class QueueState(Enum):
"""Queue operational states"""
UNINITIALIZED = "uninitialized"
INITIALIZING = "initializing"
RUNNING = "running"
PAUSED = "paused"
STOPPING = "stopping"
STOPPED = "stopped"
ERROR = "error"
class QueueMode(Enum):
"""Queue processing modes"""
NORMAL = "normal" # Standard processing
BATCH = "batch" # Batch processing
PRIORITY = "priority" # Priority-based processing
MAINTENANCE = "maintenance" # Maintenance mode
@dataclass
class QueueConfig:
"""Queue configuration settings"""
max_retries: int = 3
retry_delay: int = 5
max_queue_size: int = 1000
cleanup_interval: int = 3600 # 1 hour
max_history_age: int = 86400 # 24 hours
deadlock_threshold: int = 300 # 5 minutes
check_interval: int = 60 # 1 minute
batch_size: int = 10
max_concurrent: int = 3
persistence_enabled: bool = True
monitoring_level: MonitoringLevel = MonitoringLevel.NORMAL
@dataclass
class QueueStats:
"""Queue statistics"""
start_time: datetime = field(default_factory=datetime.utcnow)
total_processed: int = 0
total_failed: int = 0
uptime: timedelta = field(default_factory=lambda: timedelta())
peak_queue_size: int = 0
peak_memory_usage: float = 0.0
state_changes: List[Dict[str, Any]] = field(default_factory=list)
class QueueCoordinator:
"""Coordinates queue operations"""
def __init__(self):
self.state = QueueState.UNINITIALIZED
self.mode = QueueMode.NORMAL
self._state_lock = asyncio.Lock()
self._mode_lock = asyncio.Lock()
self._paused = asyncio.Event()
self._paused.set()
async def set_state(self, state: QueueState) -> None:
"""Set queue state"""
async with self._state_lock:
self.state = state
async def set_mode(self, mode: QueueMode) -> None:
"""Set queue mode"""
async with self._mode_lock:
self.mode = mode
async def pause(self) -> None:
"""Pause queue processing"""
self._paused.clear()
await self.set_state(QueueState.PAUSED)
async def resume(self) -> None:
"""Resume queue processing"""
self._paused.set()
await self.set_state(QueueState.RUNNING)
async def wait_if_paused(self) -> None:
"""Wait if queue is paused"""
await self._paused.wait()
class EnhancedVideoQueueManager:
"""Enhanced queue manager with improved memory management and performance"""
"""Enhanced queue manager with improved organization and maintainability"""
def __init__(
self,
max_retries: int = 3,
retry_delay: int = 5,
max_queue_size: int = 1000,
cleanup_interval: int = 3600, # 1 hour
max_history_age: int = 86400, # 24 hours
persistence_path: Optional[str] = None,
backup_interval: int = 300, # 5 minutes
deadlock_threshold: int = 300, # 5 minutes
check_interval: int = 60, # 1 minute
):
"""Initialize queue manager"""
# Configuration
self.max_retries = max_retries
self.retry_delay = retry_delay
self.max_queue_size = max_queue_size
def __init__(self, config: Optional[QueueConfig] = None):
"""Initialize queue manager components"""
self.config = config or QueueConfig()
self.coordinator = QueueCoordinator()
self.stats = QueueStats()
# Queue storage
self._queue: List[QueueItem] = []
self._processing: Dict[str, QueueItem] = {}
self._completed: Dict[str, QueueItem] = {}
self._failed: Dict[str, QueueItem] = {}
# Tracking
self._guild_queues: Dict[int, Set[str]] = {}
self._channel_queues: Dict[int, Set[str]] = {}
self._active_tasks: Set[asyncio.Task] = set()
# Single lock for all operations to prevent deadlocks
self._lock = asyncio.Lock()
# State
self._shutdown = False
self._initialized = False
self._init_event = asyncio.Event()
self.metrics = QueueMetrics()
# Components
self.persistence = QueuePersistenceManager(persistence_path) if persistence_path else None
# Initialize managers
self.state_manager = QueueStateManager(self.config.max_queue_size)
self.metrics_manager = QueueMetricsManager()
self.monitor = QueueMonitor(
deadlock_threshold=deadlock_threshold,
max_retries=max_retries,
check_interval=check_interval
deadlock_threshold=self.config.deadlock_threshold,
max_retries=self.config.max_retries,
check_interval=self.config.check_interval
)
self.cleaner = QueueCleaner(
cleanup_interval=cleanup_interval,
max_history_age=max_history_age
cleanup_interval=self.config.cleanup_interval,
max_history_age=self.config.max_history_age
)
# Initialize persistence if enabled
self.persistence = (
QueuePersistenceManager()
if self.config.persistence_enabled
else None
)
# Initialize processor
self.processor = QueueProcessor(
state_manager=self.state_manager,
monitor=self.monitor,
max_retries=self.config.max_retries,
retry_delay=self.config.retry_delay,
batch_size=self.config.batch_size,
max_concurrent=self.config.max_concurrent
)
# Background tasks
self._maintenance_task: Optional[asyncio.Task] = None
self._stats_task: Optional[asyncio.Task] = None
async def initialize(self) -> None:
"""Initialize the queue manager components sequentially"""
if self._initialized:
"""Initialize the queue manager components"""
if self.coordinator.state != QueueState.UNINITIALIZED:
logger.info("Queue manager already initialized")
return
try:
await self.coordinator.set_state(QueueState.INITIALIZING)
logger.info("Starting queue manager initialization...")
async with self._lock:
# Load persisted state first if available
# Load persisted state if available
if self.persistence:
await self._load_persisted_state()
# Start monitoring task
monitor_task = asyncio.create_task(
self.monitor.start_monitoring(
self._queue,
self._processing,
self.metrics,
self._lock
# Start monitoring with configured level
self.monitor.strategy.level = self.config.monitoring_level
await self.monitor.start(
self.state_manager,
self.metrics_manager
)
)
self._active_tasks.add(monitor_task)
logger.info("Queue monitoring started")
# Start cleanup task
cleanup_task = asyncio.create_task(
self.cleaner.start_cleanup(
self._queue,
self._completed,
self._failed,
self._guild_queues,
self._channel_queues,
self._processing,
self.metrics,
self._lock
await self.cleaner.start(
state_manager=self.state_manager,
metrics_manager=self.metrics_manager
)
)
self._active_tasks.add(cleanup_task)
logger.info("Queue cleanup started")
# Signal initialization complete
self._initialized = True
self._init_event.set()
# Start background tasks
self._start_background_tasks()
await self.coordinator.set_state(QueueState.RUNNING)
logger.info("Queue manager initialization completed")
except Exception as e:
await self.coordinator.set_state(QueueState.ERROR)
logger.error(f"Failed to initialize queue manager: {e}")
self._shutdown = True
raise
async def _load_persisted_state(self) -> None:
"""Load persisted queue state"""
try:
state = self.persistence.load_queue_state()
state = await self.persistence.load_queue_state()
if state:
self._queue = state["queue"]
self._completed = state["completed"]
self._failed = state["failed"]
self._processing = state["processing"]
# Update metrics
metrics_data = state.get("metrics", {})
self.metrics.total_processed = metrics_data.get("total_processed", 0)
self.metrics.total_failed = metrics_data.get("total_failed", 0)
self.metrics.avg_processing_time = metrics_data.get("avg_processing_time", 0.0)
self.metrics.success_rate = metrics_data.get("success_rate", 0.0)
self.metrics.errors_by_type = metrics_data.get("errors_by_type", {})
self.metrics.compression_failures = metrics_data.get("compression_failures", 0)
self.metrics.hardware_accel_failures = metrics_data.get("hardware_accel_failures", 0)
await self.state_manager.restore_state(state)
self.metrics_manager.restore_metrics(state.get("metrics", {}))
logger.info("Loaded persisted queue state")
except Exception as e:
logger.error(f"Failed to load persisted state: {e}")
async def process_queue(
self,
processor: Callable[[QueueItem], Tuple[bool, Optional[str]]]
) -> None:
"""Process items in the queue"""
# Wait for initialization to complete
await self._init_event.wait()
def _start_background_tasks(self) -> None:
"""Start background maintenance tasks"""
self._maintenance_task = asyncio.create_task(
self._maintenance_loop()
)
self._stats_task = asyncio.create_task(
self._stats_loop()
)
logger.info("Queue processor started")
last_persist_time = time.time()
persist_interval = 60 # Persist state every 60 seconds
while not self._shutdown:
async def _maintenance_loop(self) -> None:
"""Background maintenance loop"""
while self.coordinator.state not in (QueueState.STOPPED, QueueState.ERROR):
try:
items = []
async with self._lock:
# Get up to 5 items from queue
while len(items) < 5 and self._queue:
item = self._queue.pop(0)
items.append(item)
self._processing[item.url] = item
# Update activity timestamp
self.monitor.update_activity()
if not items:
await asyncio.sleep(0.1)
await asyncio.sleep(300) # Every 5 minutes
if self.coordinator.mode == QueueMode.MAINTENANCE:
continue
# Process items concurrently
tasks = []
for item in items:
task = asyncio.create_task(self._process_item(processor, item))
tasks.append(task)
# Perform maintenance tasks
await self._perform_maintenance()
try:
await asyncio.gather(*tasks, return_exceptions=True)
except asyncio.CancelledError:
logger.info("Queue processing cancelled")
break
except Exception as e:
logger.error(f"Error in queue processing: {e}")
logger.error(f"Error in maintenance loop: {e}")
# Persist state if interval has passed
current_time = time.time()
if self.persistence and (current_time - last_persist_time) >= persist_interval:
async def _stats_loop(self) -> None:
"""Background statistics loop"""
while self.coordinator.state not in (QueueState.STOPPED, QueueState.ERROR):
try:
await asyncio.sleep(60) # Every minute
await self._update_stats()
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Error in stats loop: {e}")
async def _perform_maintenance(self) -> None:
"""Perform maintenance tasks"""
try:
# Switch to maintenance mode
previous_mode = self.coordinator.mode
await self.coordinator.set_mode(QueueMode.MAINTENANCE)
# Perform maintenance tasks
await self._cleanup_old_data()
await self._optimize_queue()
await self._persist_state()
last_persist_time = current_time
except asyncio.CancelledError:
logger.info("Queue processing cancelled")
break
# Restore previous mode
await self.coordinator.set_mode(previous_mode)
except Exception as e:
logger.error(f"Critical error in queue processor: {e}")
await asyncio.sleep(0.1)
logger.error(f"Error during maintenance: {e}")
await asyncio.sleep(0)
async def _process_item(
self,
processor: Callable[[QueueItem], Tuple[bool, Optional[str]]],
item: QueueItem
) -> None:
"""Process a single queue item"""
async def _cleanup_old_data(self) -> None:
"""Clean up old data"""
try:
logger.info(f"Processing queue item: {item.url}")
item.start_processing()
self.metrics.last_activity_time = time.time()
self.monitor.update_activity()
success, error = await processor(item)
async with self._lock:
item.finish_processing(success, error)
self._processing.pop(item.url, None)
if success:
self._completed[item.url] = item
logger.info(f"Successfully processed: {item.url}")
else:
if item.retry_count < self.max_retries:
item.retry_count += 1
item.status = "pending"
item.last_retry = datetime.utcnow()
item.priority = max(0, item.priority - 1)
self._queue.append(item)
logger.warning(f"Retrying: {item.url} (attempt {item.retry_count})")
else:
self._failed[item.url] = item
logger.error(f"Failed after {self.max_retries} attempts: {item.url}")
self.metrics.update(
processing_time=item.processing_time,
success=success,
error=error
await self.cleaner.cleanup_old_data(
self.state_manager,
self.metrics_manager
)
except Exception as e:
logger.error(f"Error processing {item.url}: {e}")
async with self._lock:
item.finish_processing(False, str(e))
self._processing.pop(item.url, None)
self._failed[item.url] = item
self.metrics.update(
processing_time=item.processing_time,
success=False,
error=str(e)
)
async def _persist_state(self) -> None:
"""Persist current state to storage"""
if not self.persistence:
return
logger.error(f"Error cleaning up old data: {e}")
async def _optimize_queue(self) -> None:
"""Optimize queue performance"""
try:
async with self._lock:
await self.persistence.persist_queue_state(
self._queue,
self._processing,
self._completed,
self._failed,
self.metrics
)
# Reorder queue based on priorities
await self.state_manager.optimize_queue()
# Update monitoring level based on queue size
queue_size = len(await self.state_manager.get_all_items())
if queue_size > self.config.max_queue_size * 0.8:
self.monitor.strategy.level = MonitoringLevel.INTENSIVE
elif queue_size < self.config.max_queue_size * 0.2:
self.monitor.strategy.level = self.config.monitoring_level
except Exception as e:
logger.error(f"Failed to persist state: {e}")
logger.error(f"Error optimizing queue: {e}")
async def _update_stats(self) -> None:
"""Update queue statistics"""
try:
self.stats.uptime = datetime.utcnow() - self.stats.start_time
# Update peak values
queue_size = len(await self.state_manager.get_all_items())
self.stats.peak_queue_size = max(
self.stats.peak_queue_size,
queue_size
)
memory_usage = self.metrics_manager.peak_memory_usage
self.stats.peak_memory_usage = max(
self.stats.peak_memory_usage,
memory_usage
)
except Exception as e:
logger.error(f"Error updating stats: {e}")
async def add_to_queue(
self,
@@ -281,17 +299,13 @@ class EnhancedVideoQueueManager:
priority: int = 0,
) -> bool:
"""Add a video to the processing queue"""
if self._shutdown:
raise QueueError("Queue manager is shutting down")
if self.coordinator.state in (QueueState.STOPPED, QueueState.ERROR):
raise QueueError("Queue manager is not running")
# Wait for initialization
await self._init_event.wait()
# Wait if queue is paused
await self.coordinator.wait_if_paused()
try:
async with self._lock:
if len(self._queue) >= self.max_queue_size:
raise QueueError("Queue is full")
item = QueueItem(
url=url,
message_id=message_id,
@@ -302,59 +316,128 @@ class EnhancedVideoQueueManager:
priority=priority,
)
if guild_id not in self._guild_queues:
self._guild_queues[guild_id] = set()
self._guild_queues[guild_id].add(url)
if channel_id not in self._channel_queues:
self._channel_queues[channel_id] = set()
self._channel_queues[channel_id].add(url)
self._queue.append(item)
self._queue.sort(key=lambda x: (-x.priority, x.added_at))
self.metrics.last_activity_time = time.time()
self.monitor.update_activity()
if self.persistence:
success = await self.state_manager.add_item(item)
if success and self.persistence:
await self._persist_state()
logger.info(f"Added to queue: {url} (priority: {priority})")
return True
return success
except Exception as e:
logger.error(f"Error adding to queue: {e}")
raise QueueError(f"Failed to add to queue: {str(e)}")
def get_queue_status(self, guild_id: int) -> dict:
def get_queue_status(self, guild_id: int) -> Dict[str, Any]:
"""Get current queue status for a guild"""
try:
pending = len([item for item in self._queue if item.guild_id == guild_id])
processing = len([item for item in self._processing.values() if item.guild_id == guild_id])
completed = len([item for item in self._completed.values() if item.guild_id == guild_id])
failed = len([item for item in self._failed.values() if item.guild_id == guild_id])
status = self.state_manager.get_guild_status(guild_id)
metrics = self.metrics_manager.get_metrics()
monitor_stats = self.monitor.get_monitoring_stats()
return {
"pending": pending,
"processing": processing,
"completed": completed,
"failed": failed,
"metrics": {
"total_processed": self.metrics.total_processed,
"total_failed": self.metrics.total_failed,
"success_rate": self.metrics.success_rate,
"avg_processing_time": self.metrics.avg_processing_time,
"peak_memory_usage": self.metrics.peak_memory_usage,
"last_cleanup": self.metrics.last_cleanup.strftime("%Y-%m-%d %H:%M:%S"),
"errors_by_type": self.metrics.errors_by_type,
"compression_failures": self.metrics.compression_failures,
"hardware_accel_failures": self.metrics.hardware_accel_failures,
"last_activity": time.time() - self.metrics.last_activity_time,
},
**status,
"metrics": metrics,
"monitoring": monitor_stats,
"state": self.coordinator.state.value,
"mode": self.coordinator.mode.value,
"stats": {
"uptime": self.stats.uptime.total_seconds(),
"peak_queue_size": self.stats.peak_queue_size,
"peak_memory_usage": self.stats.peak_memory_usage,
"total_processed": self.stats.total_processed,
"total_failed": self.stats.total_failed
}
}
except Exception as e:
logger.error(f"Error getting queue status: {e}")
return self._get_default_status()
async def pause(self) -> None:
"""Pause queue processing"""
await self.coordinator.pause()
logger.info("Queue processing paused")
async def resume(self) -> None:
"""Resume queue processing"""
await self.coordinator.resume()
logger.info("Queue processing resumed")
async def cleanup(self) -> None:
"""Clean up resources and stop queue processing"""
try:
await self.coordinator.set_state(QueueState.STOPPING)
logger.info("Starting queue manager cleanup...")
# Cancel background tasks
if self._maintenance_task:
self._maintenance_task.cancel()
if self._stats_task:
self._stats_task.cancel()
# Stop processor
await self.processor.stop_processing()
# Stop monitoring and cleanup
await self.monitor.stop()
await self.cleaner.stop()
# Final state persistence
if self.persistence:
await self._persist_state()
# Clear state
await self.state_manager.clear_state()
await self.coordinator.set_state(QueueState.STOPPED)
logger.info("Queue manager cleanup completed")
except Exception as e:
await self.coordinator.set_state(QueueState.ERROR)
logger.error(f"Error during cleanup: {e}")
raise CleanupError(f"Failed to clean up queue manager: {str(e)}")
async def force_stop(self) -> None:
"""Force stop all queue operations immediately"""
await self.coordinator.set_state(QueueState.STOPPING)
logger.info("Force stopping queue manager...")
# Cancel background tasks
if self._maintenance_task:
self._maintenance_task.cancel()
if self._stats_task:
self._stats_task.cancel()
# Force stop all components
await self.processor.stop_processing()
await self.monitor.stop()
await self.cleaner.stop()
# Clear state
await self.state_manager.clear_state()
await self.coordinator.set_state(QueueState.STOPPED)
logger.info("Queue manager force stopped")
async def _persist_state(self) -> None:
"""Persist current state to storage"""
if not self.persistence:
return
try:
state = await self.state_manager.get_state_for_persistence()
state["metrics"] = self.metrics_manager.get_metrics()
state["stats"] = {
"uptime": self.stats.uptime.total_seconds(),
"peak_queue_size": self.stats.peak_queue_size,
"peak_memory_usage": self.stats.peak_memory_usage,
"total_processed": self.stats.total_processed,
"total_failed": self.stats.total_failed
}
await self.persistence.persist_queue_state(state)
except Exception as e:
logger.error(f"Failed to persist state: {e}")
def _get_default_status(self) -> Dict[str, Any]:
"""Get default status when error occurs"""
return {
"pending": 0,
"processing": 0,
@@ -372,85 +455,13 @@ class EnhancedVideoQueueManager:
"hardware_accel_failures": 0,
"last_activity": 0,
},
"state": QueueState.ERROR.value,
"mode": QueueMode.NORMAL.value,
"stats": {
"uptime": 0,
"peak_queue_size": 0,
"peak_memory_usage": 0,
"total_processed": 0,
"total_failed": 0
}
}
async def cleanup(self) -> None:
"""Clean up resources and stop queue processing"""
try:
self._shutdown = True
logger.info("Starting queue manager cleanup...")
# Stop monitoring and cleanup tasks
self.monitor.stop_monitoring()
self.cleaner.stop_cleanup()
# Cancel all active tasks
for task in self._active_tasks:
if not task.done():
task.cancel()
await asyncio.gather(*self._active_tasks, return_exceptions=True)
async with self._lock:
# Move processing items back to queue
for url, item in self._processing.items():
if item.retry_count < self.max_retries:
item.status = "pending"
item.retry_count += 1
self._queue.append(item)
else:
self._failed[url] = item
self._processing.clear()
# Final state persistence
if self.persistence:
await self._persist_state()
# Clear collections
self._queue.clear()
self._completed.clear()
self._failed.clear()
self._guild_queues.clear()
self._channel_queues.clear()
self._active_tasks.clear()
# Reset initialization state
self._initialized = False
self._init_event.clear()
logger.info("Queue manager cleanup completed")
except Exception as e:
logger.error(f"Error during cleanup: {e}")
raise CleanupError(f"Failed to clean up queue manager: {str(e)}")
def force_stop(self) -> None:
"""Force stop all queue operations immediately"""
self._shutdown = True
logger.info("Force stopping queue manager...")
# Stop monitoring and cleanup
self.monitor.stop_monitoring()
self.cleaner.stop_cleanup()
# Cancel all active tasks
for task in self._active_tasks:
if not task.done():
task.cancel()
# Move processing items back to queue
for url, item in self._processing.items():
if item.retry_count < self.max_retries:
item.status = "pending"
item.retry_count += 1
self._queue.append(item)
else:
self._failed[url] = item
self._processing.clear()
self._active_tasks.clear()
# Reset initialization state
self._initialized = False
self._init_event.clear()
logger.info("Queue manager force stopped")

View File

@@ -0,0 +1,366 @@
"""Module for managing queue metrics"""
import time
import logging
from enum import Enum
from dataclasses import dataclass, field
from typing import Dict, Optional, List, Any, Set
from datetime import datetime, timedelta
import json
logger = logging.getLogger("QueueMetricsManager")
class MetricCategory(Enum):
"""Categories of metrics"""
PROCESSING = "processing"
PERFORMANCE = "performance"
ERRORS = "errors"
HARDWARE = "hardware"
MEMORY = "memory"
ACTIVITY = "activity"
class ErrorCategory(Enum):
"""Categories of errors"""
NETWORK = "network"
TIMEOUT = "timeout"
PERMISSION = "permission"
MEMORY = "memory"
HARDWARE = "hardware"
COMPRESSION = "compression"
STORAGE = "storage"
OTHER = "other"
@dataclass
class ProcessingMetrics:
"""Processing-related metrics"""
total_processed: int = 0
total_failed: int = 0
success_rate: float = 0.0
avg_processing_time: float = 0.0
_total_processing_time: float = 0.0
_processing_count: int = 0
def update(self, processing_time: float, success: bool) -> None:
"""Update processing metrics"""
self.total_processed += 1
if not success:
self.total_failed += 1
self._total_processing_time += processing_time
self._processing_count += 1
self.success_rate = (
(self.total_processed - self.total_failed)
/ self.total_processed
if self.total_processed > 0
else 0.0
)
self.avg_processing_time = (
self._total_processing_time / self._processing_count
if self._processing_count > 0
else 0.0
)
@dataclass
class ErrorMetrics:
"""Error-related metrics"""
errors_by_type: Dict[str, int] = field(default_factory=dict)
errors_by_category: Dict[ErrorCategory, int] = field(default_factory=dict)
recent_errors: List[Dict[str, Any]] = field(default_factory=list)
error_patterns: Dict[str, int] = field(default_factory=dict)
max_recent_errors: int = 100
def record_error(self, error: str, category: Optional[ErrorCategory] = None) -> None:
"""Record an error occurrence"""
# Track by exact error
self.errors_by_type[error] = self.errors_by_type.get(error, 0) + 1
# Track by category
if category is None:
category = self._categorize_error(error)
self.errors_by_category[category] = self.errors_by_category.get(category, 0) + 1
# Track recent errors
self.recent_errors.append({
"error": error,
"category": category.value,
"timestamp": datetime.utcnow().isoformat()
})
if len(self.recent_errors) > self.max_recent_errors:
self.recent_errors.pop(0)
# Update error patterns
pattern = self._extract_error_pattern(error)
self.error_patterns[pattern] = self.error_patterns.get(pattern, 0) + 1
def _categorize_error(self, error: str) -> ErrorCategory:
"""Categorize an error message"""
error_lower = error.lower()
if any(word in error_lower for word in ["network", "connection", "dns"]):
return ErrorCategory.NETWORK
elif "timeout" in error_lower:
return ErrorCategory.TIMEOUT
elif any(word in error_lower for word in ["permission", "access", "denied"]):
return ErrorCategory.PERMISSION
elif "memory" in error_lower:
return ErrorCategory.MEMORY
elif "hardware" in error_lower:
return ErrorCategory.HARDWARE
elif "compression" in error_lower:
return ErrorCategory.COMPRESSION
elif any(word in error_lower for word in ["disk", "storage", "space"]):
return ErrorCategory.STORAGE
return ErrorCategory.OTHER
def _extract_error_pattern(self, error: str) -> str:
"""Extract general pattern from error message"""
# This could be enhanced with regex or more sophisticated pattern matching
words = error.split()
if len(words) > 5:
return " ".join(words[:5]) + "..."
return error
@dataclass
class PerformanceMetrics:
"""Performance-related metrics"""
peak_memory_usage: float = 0.0
compression_failures: int = 0
hardware_accel_failures: int = 0
peak_queue_size: int = 0
peak_processing_time: float = 0.0
avg_queue_wait_time: float = 0.0
_total_wait_time: float = 0.0
_wait_count: int = 0
def update_memory(self, memory_usage: float) -> None:
"""Update memory usage metrics"""
self.peak_memory_usage = max(self.peak_memory_usage, memory_usage)
def record_wait_time(self, wait_time: float) -> None:
"""Record queue wait time"""
self._total_wait_time += wait_time
self._wait_count += 1
self.avg_queue_wait_time = (
self._total_wait_time / self._wait_count
if self._wait_count > 0
else 0.0
)
class MetricAggregator:
"""Aggregates metrics over time periods"""
def __init__(self, max_history: int = 1000):
self.max_history = max_history
self.hourly_metrics: List[Dict[str, Any]] = []
self.daily_metrics: List[Dict[str, Any]] = []
self.last_aggregation = datetime.utcnow()
def aggregate_metrics(self, current_metrics: Dict[str, Any]) -> None:
"""Aggregate current metrics"""
now = datetime.utcnow()
# Hourly aggregation
if now - self.last_aggregation >= timedelta(hours=1):
self.hourly_metrics.append({
"timestamp": now.isoformat(),
"metrics": current_metrics
})
if len(self.hourly_metrics) > self.max_history:
self.hourly_metrics.pop(0)
# Daily aggregation
if now.date() > self.last_aggregation.date():
daily_avg = self._calculate_daily_average(
self.hourly_metrics,
self.last_aggregation.date()
)
self.daily_metrics.append(daily_avg)
if len(self.daily_metrics) > 30: # Keep last 30 days
self.daily_metrics.pop(0)
self.last_aggregation = now
def _calculate_daily_average(
self,
metrics: List[Dict[str, Any]],
date: datetime.date
) -> Dict[str, Any]:
"""Calculate average metrics for a day"""
day_metrics = [
m for m in metrics
if datetime.fromisoformat(m["timestamp"]).date() == date
]
if not day_metrics:
return {
"date": date.isoformat(),
"metrics": {}
}
# Calculate averages for numeric values
avg_metrics = {}
for key in day_metrics[0]["metrics"].keys():
if isinstance(day_metrics[0]["metrics"][key], (int, float)):
avg_metrics[key] = sum(
m["metrics"][key] for m in day_metrics
) / len(day_metrics)
else:
avg_metrics[key] = day_metrics[-1]["metrics"][key]
return {
"date": date.isoformat(),
"metrics": avg_metrics
}
class QueueMetricsManager:
"""Manages metrics collection and reporting for the queue system"""
def __init__(self):
self.processing = ProcessingMetrics()
self.errors = ErrorMetrics()
self.performance = PerformanceMetrics()
self.aggregator = MetricAggregator()
self.last_activity = time.time()
self.last_cleanup = datetime.utcnow()
def update(
self,
processing_time: float,
success: bool,
error: Optional[str] = None
) -> None:
"""Update metrics with new processing information"""
try:
# Update processing metrics
self.processing.update(processing_time, success)
# Update error tracking
if error:
self.errors.record_error(error)
# Track specific failures
if "hardware acceleration" in error.lower():
self.performance.hardware_accel_failures += 1
elif "compression" in error.lower():
self.performance.compression_failures += 1
# Update activity timestamp
self.last_activity = time.time()
# Aggregate metrics
self.aggregator.aggregate_metrics(self.get_metrics())
except Exception as e:
logger.error(f"Error updating metrics: {e}")
def get_metrics(self) -> Dict[str, Any]:
"""Get current metrics"""
return {
MetricCategory.PROCESSING.value: {
"total_processed": self.processing.total_processed,
"total_failed": self.processing.total_failed,
"success_rate": self.processing.success_rate,
"avg_processing_time": self.processing.avg_processing_time
},
MetricCategory.ERRORS.value: {
"errors_by_type": self.errors.errors_by_type,
"errors_by_category": {
cat.value: count
for cat, count in self.errors.errors_by_category.items()
},
"error_patterns": self.errors.error_patterns,
"recent_errors": self.errors.recent_errors
},
MetricCategory.PERFORMANCE.value: {
"peak_memory_usage": self.performance.peak_memory_usage,
"compression_failures": self.performance.compression_failures,
"hardware_accel_failures": self.performance.hardware_accel_failures,
"peak_queue_size": self.performance.peak_queue_size,
"avg_queue_wait_time": self.performance.avg_queue_wait_time
},
MetricCategory.ACTIVITY.value: {
"last_activity": time.time() - self.last_activity,
"last_cleanup": self.last_cleanup.isoformat()
},
"history": {
"hourly": self.aggregator.hourly_metrics,
"daily": self.aggregator.daily_metrics
}
}
def update_memory_usage(self, memory_usage: float) -> None:
"""Update peak memory usage"""
self.performance.update_memory(memory_usage)
def update_cleanup_time(self) -> None:
"""Update last cleanup timestamp"""
self.last_cleanup = datetime.utcnow()
def reset_metrics(self) -> None:
"""Reset all metrics to initial state"""
self.processing = ProcessingMetrics()
self.errors = ErrorMetrics()
self.performance = PerformanceMetrics()
self.last_activity = time.time()
self.last_cleanup = datetime.utcnow()
def save_metrics(self, file_path: str) -> None:
"""Save metrics to file"""
try:
metrics = self.get_metrics()
with open(file_path, 'w') as f:
json.dump(metrics, f, indent=2)
except Exception as e:
logger.error(f"Error saving metrics: {e}")
def load_metrics(self, file_path: str) -> None:
"""Load metrics from file"""
try:
with open(file_path, 'r') as f:
metrics = json.load(f)
self.restore_metrics(metrics)
except Exception as e:
logger.error(f"Error loading metrics: {e}")
def restore_metrics(self, metrics_data: Dict[str, Any]) -> None:
"""Restore metrics from saved data"""
try:
# Restore processing metrics
proc_data = metrics_data.get(MetricCategory.PROCESSING.value, {})
self.processing = ProcessingMetrics(
total_processed=proc_data.get("total_processed", 0),
total_failed=proc_data.get("total_failed", 0),
success_rate=proc_data.get("success_rate", 0.0),
avg_processing_time=proc_data.get("avg_processing_time", 0.0)
)
# Restore error metrics
error_data = metrics_data.get(MetricCategory.ERRORS.value, {})
self.errors = ErrorMetrics(
errors_by_type=error_data.get("errors_by_type", {}),
errors_by_category={
ErrorCategory[k.upper()]: v
for k, v in error_data.get("errors_by_category", {}).items()
},
error_patterns=error_data.get("error_patterns", {}),
recent_errors=error_data.get("recent_errors", [])
)
# Restore performance metrics
perf_data = metrics_data.get(MetricCategory.PERFORMANCE.value, {})
self.performance = PerformanceMetrics(
peak_memory_usage=perf_data.get("peak_memory_usage", 0.0),
compression_failures=perf_data.get("compression_failures", 0),
hardware_accel_failures=perf_data.get("hardware_accel_failures", 0),
peak_queue_size=perf_data.get("peak_queue_size", 0),
avg_queue_wait_time=perf_data.get("avg_queue_wait_time", 0.0)
)
# Restore history
history = metrics_data.get("history", {})
self.aggregator.hourly_metrics = history.get("hourly", [])
self.aggregator.daily_metrics = history.get("daily", [])
except Exception as e:
logger.error(f"Error restoring metrics: {e}")

View File

@@ -2,221 +2,365 @@
import asyncio
import logging
import psutil
import time
from enum import Enum
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, List, Set
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Set
from .models import QueueItem, QueueMetrics
# Configure logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
from .health_checker import HealthChecker, HealthStatus, HealthCategory
from .recovery_manager import RecoveryManager, RecoveryStrategy
logger = logging.getLogger("QueueMonitoring")
class MonitoringLevel(Enum):
"""Monitoring intensity levels"""
LIGHT = "light" # Basic monitoring
NORMAL = "normal" # Standard monitoring
INTENSIVE = "intensive" # Detailed monitoring
DEBUG = "debug" # Debug-level monitoring
class AlertSeverity(Enum):
"""Alert severity levels"""
INFO = "info"
WARNING = "warning"
ERROR = "error"
CRITICAL = "critical"
@dataclass
class MonitoringEvent:
"""Represents a monitoring event"""
timestamp: datetime
category: HealthCategory
severity: AlertSeverity
message: str
details: Dict[str, Any] = field(default_factory=dict)
resolved: bool = False
resolution_time: Optional[datetime] = None
@dataclass
class MonitoringThresholds:
"""Monitoring thresholds configuration"""
check_interval: int = 15 # 15 seconds
deadlock_threshold: int = 60 # 1 minute
memory_threshold: int = 512 # 512MB
max_retries: int = 3
alert_threshold: int = 5 # Max alerts before escalation
recovery_timeout: int = 300 # 5 minutes
intensive_threshold: int = 0.8 # 80% resource usage triggers intensive
class AlertManager:
"""Manages monitoring alerts"""
def __init__(self, max_history: int = 1000):
self.max_history = max_history
self.active_alerts: Dict[str, MonitoringEvent] = {}
self.alert_history: List[MonitoringEvent] = []
self.alert_counts: Dict[AlertSeverity, int] = {
severity: 0 for severity in AlertSeverity
}
def create_alert(
self,
category: HealthCategory,
severity: AlertSeverity,
message: str,
details: Dict[str, Any] = None
) -> MonitoringEvent:
"""Create a new alert"""
event = MonitoringEvent(
timestamp=datetime.utcnow(),
category=category,
severity=severity,
message=message,
details=details or {}
)
alert_id = f"{category.value}_{event.timestamp.timestamp()}"
self.active_alerts[alert_id] = event
self.alert_counts[severity] += 1
self.alert_history.append(event)
if len(self.alert_history) > self.max_history:
self.alert_history.pop(0)
return event
def resolve_alert(self, alert_id: str) -> None:
"""Mark an alert as resolved"""
if alert_id in self.active_alerts:
event = self.active_alerts[alert_id]
event.resolved = True
event.resolution_time = datetime.utcnow()
self.active_alerts.pop(alert_id)
def get_active_alerts(self) -> List[MonitoringEvent]:
"""Get currently active alerts"""
return list(self.active_alerts.values())
def get_alert_stats(self) -> Dict[str, Any]:
"""Get alert statistics"""
return {
"active_alerts": len(self.active_alerts),
"total_alerts": len(self.alert_history),
"alert_counts": {
severity.value: count
for severity, count in self.alert_counts.items()
},
"recent_alerts": [
{
"timestamp": event.timestamp.isoformat(),
"category": event.category.value,
"severity": event.severity.value,
"message": event.message,
"resolved": event.resolved
}
for event in self.alert_history[-10:] # Last 10 alerts
]
}
class MonitoringStrategy:
"""Determines monitoring behavior"""
def __init__(
self,
level: MonitoringLevel = MonitoringLevel.NORMAL,
thresholds: Optional[MonitoringThresholds] = None
):
self.level = level
self.thresholds = thresholds or MonitoringThresholds()
self._last_intensive_check = datetime.utcnow()
def should_check_health(self, metrics: Dict[str, Any]) -> bool:
"""Determine if health check should be performed"""
if self.level == MonitoringLevel.INTENSIVE:
return True
elif self.level == MonitoringLevel.LIGHT:
return metrics.get("queue_size", 0) > 0
else: # NORMAL or DEBUG
return True
def get_check_interval(self) -> float:
"""Get the current check interval"""
if self.level == MonitoringLevel.INTENSIVE:
return self.thresholds.check_interval / 2
elif self.level == MonitoringLevel.LIGHT:
return self.thresholds.check_interval * 2
else: # NORMAL or DEBUG
return self.thresholds.check_interval
def should_escalate(self, alert_count: int) -> bool:
"""Determine if monitoring should be escalated"""
return (
self.level != MonitoringLevel.INTENSIVE and
alert_count >= self.thresholds.alert_threshold
)
def should_deescalate(self, alert_count: int) -> bool:
"""Determine if monitoring can be deescalated"""
return (
self.level == MonitoringLevel.INTENSIVE and
alert_count == 0 and
(datetime.utcnow() - self._last_intensive_check).total_seconds() > 300
)
class QueueMonitor:
"""Monitors queue health and performance"""
def __init__(
self,
deadlock_threshold: int = 60, # Reduced to 1 minute
memory_threshold: int = 512, # 512MB
max_retries: int = 3,
check_interval: int = 15 # Reduced to 15 seconds
strategy: Optional[MonitoringStrategy] = None,
thresholds: Optional[MonitoringThresholds] = None
):
self.deadlock_threshold = deadlock_threshold
self.memory_threshold = memory_threshold
self.max_retries = max_retries
self.check_interval = check_interval
self.strategy = strategy or MonitoringStrategy()
self.thresholds = thresholds or MonitoringThresholds()
# Initialize components
self.health_checker = HealthChecker(
memory_threshold=self.thresholds.memory_threshold,
deadlock_threshold=self.thresholds.deadlock_threshold
)
self.recovery_manager = RecoveryManager(max_retries=self.thresholds.max_retries)
self.alert_manager = AlertManager()
self._shutdown = False
self._last_active_time = time.time()
self._monitoring_task = None
self._monitoring_task: Optional[asyncio.Task] = None
async def start_monitoring(
self,
queue: List[QueueItem],
processing: Dict[str, QueueItem],
metrics: QueueMetrics,
queue_lock: asyncio.Lock
) -> None:
"""Start monitoring queue health
Args:
queue: Reference to the queue list
processing: Reference to processing dict
metrics: Reference to queue metrics
queue_lock: Lock for queue operations
"""
async def start(self, state_manager, metrics_manager) -> None:
"""Start monitoring queue health"""
if self._monitoring_task is not None:
logger.warning("Monitoring task already running")
return
logger.info("Starting queue monitoring...")
logger.info(f"Starting queue monitoring with level: {self.strategy.level.value}")
self._monitoring_task = asyncio.create_task(
self._monitor_loop(queue, processing, metrics, queue_lock)
self._monitor_loop(state_manager, metrics_manager)
)
async def _monitor_loop(
self,
queue: List[QueueItem],
processing: Dict[str, QueueItem],
metrics: QueueMetrics,
queue_lock: asyncio.Lock
) -> None:
async def _monitor_loop(self, state_manager, metrics_manager) -> None:
"""Main monitoring loop"""
while not self._shutdown:
try:
await self._check_health(queue, processing, metrics, queue_lock)
await asyncio.sleep(self.check_interval)
# Get current metrics
metrics = metrics_manager.get_metrics()
# Check if health check should be performed
if self.strategy.should_check_health(metrics):
await self._perform_health_check(
state_manager,
metrics_manager,
metrics
)
# Check for strategy adjustment
self._adjust_monitoring_strategy(metrics)
# Wait for next check
await asyncio.sleep(self.strategy.get_check_interval())
except asyncio.CancelledError:
logger.info("Queue monitoring cancelled")
break
except Exception as e:
logger.error(f"Error in health monitor: {str(e)}")
await asyncio.sleep(1) # Reduced sleep on error
logger.error(f"Error in monitoring loop: {str(e)}")
await asyncio.sleep(1)
def stop_monitoring(self) -> None:
async def stop(self) -> None:
"""Stop the monitoring process"""
logger.info("Stopping queue monitoring...")
self._shutdown = True
if self._monitoring_task and not self._monitoring_task.done():
self._monitoring_task.cancel()
try:
await self._monitoring_task
except asyncio.CancelledError:
pass
self._monitoring_task = None
def update_activity(self) -> None:
"""Update the last active time"""
self._last_active_time = time.time()
async def _check_health(
async def _perform_health_check(
self,
queue: List[QueueItem],
processing: Dict[str, QueueItem],
metrics: QueueMetrics,
queue_lock: asyncio.Lock
state_manager,
metrics_manager,
current_metrics: Dict[str, Any]
) -> None:
"""Check queue health and performance
Args:
queue: Reference to the queue list
processing: Reference to processing dict
metrics: Reference to queue metrics
queue_lock: Lock for queue operations
"""
"""Perform health check and recovery if needed"""
try:
current_time = time.time()
# Check memory usage
process = psutil.Process()
memory_usage = process.memory_info().rss / 1024 / 1024 # MB
memory_usage, is_critical = await self.health_checker.check_memory_usage()
metrics_manager.update_memory_usage(memory_usage)
if memory_usage > self.memory_threshold:
logger.warning(f"High memory usage detected: {memory_usage:.2f}MB")
# Force garbage collection
import gc
gc.collect()
memory_after = process.memory_info().rss / 1024 / 1024
logger.info(f"Memory after GC: {memory_after:.2f}MB")
if is_critical:
self.alert_manager.create_alert(
category=HealthCategory.MEMORY,
severity=AlertSeverity.CRITICAL,
message=f"Critical memory usage: {memory_usage:.1f}MB",
details={"memory_usage": memory_usage}
)
# Check for potential deadlocks
# Get current queue state
queue_stats = await state_manager.get_queue_stats()
processing_items = await state_manager.get_all_processing_items()
# Check for stuck items
stuck_items = []
async with queue_lock:
# Check processing items
for url, item in processing.items():
if hasattr(item, 'start_time') and item.start_time:
processing_time = current_time - item.start_time
if processing_time > self.deadlock_threshold:
stuck_items.append((url, item))
logger.warning(f"Item stuck in processing: {url} for {processing_time:.1f}s")
for item in processing_items:
if self.recovery_manager.should_recover_item(item):
stuck_items.append((item.url, item))
# Handle stuck items if found
if stuck_items:
logger.warning(f"Potential deadlock detected: {len(stuck_items)} items stuck")
await self._recover_stuck_items(stuck_items, queue, processing)
# Check overall queue activity
if processing and current_time - self._last_active_time > self.deadlock_threshold:
logger.warning("Queue appears to be hung - no activity detected")
# Force recovery of all processing items
all_items = list(processing.items())
await self._recover_stuck_items(all_items, queue, processing)
self._last_active_time = current_time
# Update metrics
metrics.last_activity_time = self._last_active_time
metrics.peak_memory_usage = max(metrics.peak_memory_usage, memory_usage)
# Calculate current metrics
queue_size = len(queue)
processing_count = len(processing)
# Log detailed metrics
logger.info(
f"Queue Health Metrics:\n"
f"- Success Rate: {metrics.success_rate:.2%}\n"
f"- Avg Processing Time: {metrics.avg_processing_time:.2f}s\n"
f"- Memory Usage: {memory_usage:.2f}MB\n"
f"- Peak Memory: {metrics.peak_memory_usage:.2f}MB\n"
f"- Error Distribution: {metrics.errors_by_type}\n"
f"- Queue Size: {queue_size}\n"
f"- Processing Items: {processing_count}\n"
f"- Last Activity: {(current_time - self._last_active_time):.1f}s ago"
self.alert_manager.create_alert(
category=HealthCategory.DEADLOCKS,
severity=AlertSeverity.WARNING,
message=f"Potential deadlock: {len(stuck_items)} items stuck",
details={"stuck_items": [item[0] for item in stuck_items]}
)
except Exception as e:
logger.error(f"Error checking queue health: {str(e)}")
# Don't re-raise to keep monitoring alive
await self.recovery_manager.recover_stuck_items(
stuck_items,
state_manager,
metrics_manager
)
async def _recover_stuck_items(
self,
stuck_items: List[tuple[str, QueueItem]],
queue: List[QueueItem],
processing: Dict[str, QueueItem]
) -> None:
"""Attempt to recover stuck items
# Check overall queue activity
if processing_items and self.health_checker.check_queue_activity(
self._last_active_time,
bool(processing_items)
):
self.alert_manager.create_alert(
category=HealthCategory.ACTIVITY,
severity=AlertSeverity.ERROR,
message="Queue appears to be hung",
details={"last_active": self._last_active_time}
)
Args:
stuck_items: List of (url, item) tuples for stuck items
queue: Reference to the queue list
processing: Reference to processing dict
"""
try:
recovered = 0
failed = 0
for url, item in stuck_items:
try:
# Move to failed if max retries reached
if item.retry_count >= self.max_retries:
logger.warning(f"Moving stuck item to failed: {url}")
item.status = "failed"
item.error = "Exceeded maximum retries after being stuck"
item.last_error = item.error
item.last_error_time = datetime.utcnow()
processing.pop(url)
failed += 1
else:
# Reset for retry
logger.info(f"Recovering stuck item for retry: {url}")
item.retry_count += 1
item.start_time = None
item.processing_time = 0
item.last_retry = datetime.utcnow()
item.status = "pending"
item.priority = max(0, item.priority - 2) # Lower priority
queue.append(item)
processing.pop(url)
recovered += 1
except Exception as e:
logger.error(f"Error recovering item {url}: {str(e)}")
# Update activity timestamp after recovery
await self.recovery_manager.perform_emergency_recovery(
state_manager,
metrics_manager
)
self.update_activity()
logger.info(f"Recovery complete - Recovered: {recovered}, Failed: {failed}")
# Check error rates
error_rate = current_metrics.get("error_rate", 0)
if error_rate > 0.2: # 20% error rate
self.alert_manager.create_alert(
category=HealthCategory.ERRORS,
severity=AlertSeverity.ERROR,
message=f"High error rate: {error_rate:.1%}",
details={"error_rate": error_rate}
)
# Log health report
if self.strategy.level in (MonitoringLevel.INTENSIVE, MonitoringLevel.DEBUG):
health_report = self.health_checker.format_health_report(
memory_usage=memory_usage,
queue_size=queue_stats["queue_size"],
processing_count=queue_stats["processing_count"],
success_rate=metrics_manager.success_rate,
avg_processing_time=metrics_manager.avg_processing_time,
peak_memory=metrics_manager.peak_memory_usage,
error_distribution=metrics_manager.errors_by_type,
last_activity_delta=time.time() - self._last_active_time
)
logger.info(health_report)
except Exception as e:
logger.error(f"Error recovering stuck items: {str(e)}")
# Don't re-raise to keep monitoring alive
logger.error(f"Error performing health check: {str(e)}")
self.alert_manager.create_alert(
category=HealthCategory.SYSTEM,
severity=AlertSeverity.ERROR,
message=f"Health check error: {str(e)}"
)
def _adjust_monitoring_strategy(self, metrics: Dict[str, Any]) -> None:
"""Adjust monitoring strategy based on current state"""
active_alerts = self.alert_manager.get_active_alerts()
# Check for escalation
if self.strategy.should_escalate(len(active_alerts)):
logger.warning("Escalating to intensive monitoring")
self.strategy.level = MonitoringLevel.INTENSIVE
self.strategy._last_intensive_check = datetime.utcnow()
# Check for de-escalation
elif self.strategy.should_deescalate(len(active_alerts)):
logger.info("De-escalating to normal monitoring")
self.strategy.level = MonitoringLevel.NORMAL
def get_monitoring_stats(self) -> Dict[str, Any]:
"""Get comprehensive monitoring statistics"""
return {
"monitoring_level": self.strategy.level.value,
"last_active": self._last_active_time,
"alerts": self.alert_manager.get_alert_stats(),
"recovery": self.recovery_manager.get_recovery_stats(),
"health": self.health_checker.get_health_stats()
}
class MonitoringError(Exception):
"""Base exception for monitoring-related errors"""

View File

@@ -0,0 +1,351 @@
"""Module for processing queue items"""
import asyncio
import logging
import time
from enum import Enum
from dataclasses import dataclass
from typing import Callable, Optional, Tuple, List, Set, Dict, Any
from datetime import datetime, timedelta
from .models import QueueItem
from .state_manager import QueueStateManager, ItemState
from .monitoring import QueueMonitor
logger = logging.getLogger("QueueProcessor")
class ProcessingStrategy(Enum):
"""Processing strategies"""
SEQUENTIAL = "sequential" # Process items one at a time
CONCURRENT = "concurrent" # Process multiple items concurrently
BATCHED = "batched" # Process items in batches
PRIORITY = "priority" # Process based on priority
@dataclass
class ProcessingMetrics:
"""Metrics for processing operations"""
total_processed: int = 0
successful: int = 0
failed: int = 0
retried: int = 0
avg_processing_time: float = 0.0
peak_concurrent_tasks: int = 0
last_processed: Optional[datetime] = None
error_counts: Dict[str, int] = None
def __post_init__(self):
self.error_counts = {}
def record_success(self, processing_time: float) -> None:
"""Record successful processing"""
self.total_processed += 1
self.successful += 1
self._update_avg_time(processing_time)
self.last_processed = datetime.utcnow()
def record_failure(self, error: str) -> None:
"""Record processing failure"""
self.total_processed += 1
self.failed += 1
self.error_counts[error] = self.error_counts.get(error, 0) + 1
self.last_processed = datetime.utcnow()
def record_retry(self) -> None:
"""Record processing retry"""
self.retried += 1
def _update_avg_time(self, new_time: float) -> None:
"""Update average processing time"""
if self.total_processed == 1:
self.avg_processing_time = new_time
else:
self.avg_processing_time = (
(self.avg_processing_time * (self.total_processed - 1) + new_time)
/ self.total_processed
)
def get_stats(self) -> Dict[str, Any]:
"""Get processing statistics"""
return {
"total_processed": self.total_processed,
"successful": self.successful,
"failed": self.failed,
"retried": self.retried,
"success_rate": (
self.successful / self.total_processed
if self.total_processed > 0
else 0
),
"avg_processing_time": self.avg_processing_time,
"peak_concurrent_tasks": self.peak_concurrent_tasks,
"last_processed": (
self.last_processed.isoformat()
if self.last_processed
else None
),
"error_distribution": self.error_counts
}
class BatchManager:
"""Manages processing batches"""
def __init__(
self,
batch_size: int,
max_concurrent: int,
timeout: float = 30.0
):
self.batch_size = batch_size
self.max_concurrent = max_concurrent
self.timeout = timeout
self.current_batch: List[QueueItem] = []
self.processing_start: Optional[datetime] = None
async def process_batch(
self,
items: List[QueueItem],
processor: Callable[[QueueItem], Tuple[bool, Optional[str]]]
) -> List[Tuple[QueueItem, bool, Optional[str]]]:
"""Process a batch of items"""
self.current_batch = items
self.processing_start = datetime.utcnow()
tasks = [
asyncio.create_task(self._process_item(processor, item))
for item in items
]
try:
results = await asyncio.gather(*tasks, return_exceptions=True)
return [
(item, *self._handle_result(result))
for item, result in zip(items, results)
]
finally:
self.current_batch = []
self.processing_start = None
async def _process_item(
self,
processor: Callable[[QueueItem], Tuple[bool, Optional[str]]],
item: QueueItem
) -> Tuple[bool, Optional[str]]:
"""Process a single item with timeout"""
try:
return await asyncio.wait_for(
processor(item),
timeout=self.timeout
)
except asyncio.TimeoutError:
return False, "Processing timeout"
except Exception as e:
return False, str(e)
def _handle_result(
self,
result: Any
) -> Tuple[bool, Optional[str]]:
"""Handle processing result"""
if isinstance(result, tuple) and len(result) == 2:
return result
if isinstance(result, Exception):
return False, str(result)
return False, "Unknown error"
def get_batch_status(self) -> Dict[str, Any]:
"""Get current batch status"""
return {
"batch_size": len(self.current_batch),
"processing_time": (
(datetime.utcnow() - self.processing_start).total_seconds()
if self.processing_start
else 0
),
"items": [item.url for item in self.current_batch]
}
class QueueProcessor:
"""Handles the processing of queue items"""
def __init__(
self,
state_manager: QueueStateManager,
monitor: QueueMonitor,
strategy: ProcessingStrategy = ProcessingStrategy.CONCURRENT,
max_retries: int = 3,
retry_delay: int = 5,
batch_size: int = 5,
max_concurrent: int = 3
):
self.state_manager = state_manager
self.monitor = monitor
self.strategy = strategy
self.max_retries = max_retries
self.retry_delay = retry_delay
self.batch_manager = BatchManager(batch_size, max_concurrent)
self.metrics = ProcessingMetrics()
self._shutdown = False
self._active_tasks: Set[asyncio.Task] = set()
self._processing_lock = asyncio.Lock()
async def start_processing(
self,
processor: Callable[[QueueItem], Tuple[bool, Optional[str]]]
) -> None:
"""Start processing items in the queue"""
logger.info(f"Queue processor started with strategy: {self.strategy.value}")
while not self._shutdown:
try:
if self.strategy == ProcessingStrategy.BATCHED:
await self._process_batch(processor)
elif self.strategy == ProcessingStrategy.CONCURRENT:
await self._process_concurrent(processor)
else: # SEQUENTIAL or PRIORITY
await self._process_sequential(processor)
except asyncio.CancelledError:
logger.info("Queue processing cancelled")
break
except Exception as e:
logger.error(f"Critical error in queue processor: {e}")
await asyncio.sleep(1) # Delay before retry
await asyncio.sleep(0)
async def _process_batch(
self,
processor: Callable[[QueueItem], Tuple[bool, Optional[str]]]
) -> None:
"""Process items in batches"""
items = await self.state_manager.get_next_items(self.batch_manager.batch_size)
if not items:
await asyncio.sleep(0.1)
return
start_time = time.time()
results = await self.batch_manager.process_batch(items, processor)
for item, success, error in results:
await self._handle_result(
item,
success,
error,
time.time() - start_time
)
async def _process_concurrent(
self,
processor: Callable[[QueueItem], Tuple[bool, Optional[str]]]
) -> None:
"""Process items concurrently"""
if len(self._active_tasks) >= self.batch_manager.max_concurrent:
await asyncio.sleep(0.1)
return
items = await self.state_manager.get_next_items(
self.batch_manager.max_concurrent - len(self._active_tasks)
)
for item in items:
task = asyncio.create_task(self._process_item(processor, item))
self._active_tasks.add(task)
task.add_done_callback(self._active_tasks.discard)
self.metrics.peak_concurrent_tasks = max(
self.metrics.peak_concurrent_tasks,
len(self._active_tasks)
)
async def _process_sequential(
self,
processor: Callable[[QueueItem], Tuple[bool, Optional[str]]]
) -> None:
"""Process items sequentially"""
items = await self.state_manager.get_next_items(1)
if not items:
await asyncio.sleep(0.1)
return
await self._process_item(processor, items[0])
async def _process_item(
self,
processor: Callable[[QueueItem], Tuple[bool, Optional[str]]],
item: QueueItem
) -> None:
"""Process a single queue item"""
try:
logger.info(f"Processing queue item: {item.url}")
start_time = time.time()
async with self._processing_lock:
item.start_processing()
self.monitor.update_activity()
success, error = await processor(item)
processing_time = time.time() - start_time
await self._handle_result(item, success, error, processing_time)
except Exception as e:
logger.error(f"Error processing {item.url}: {e}")
await self._handle_result(item, False, str(e), 0)
async def _handle_result(
self,
item: QueueItem,
success: bool,
error: Optional[str],
processing_time: float
) -> None:
"""Handle processing result"""
item.finish_processing(success, error)
if success:
await self.state_manager.mark_completed(item, True)
self.metrics.record_success(processing_time)
logger.info(f"Successfully processed: {item.url}")
else:
if item.retry_count < self.max_retries:
item.retry_count += 1
await self.state_manager.retry_item(item)
self.metrics.record_retry()
logger.warning(f"Retrying: {item.url} (attempt {item.retry_count})")
await asyncio.sleep(self.retry_delay)
else:
await self.state_manager.mark_completed(item, False, error)
self.metrics.record_failure(error or "Unknown error")
logger.error(f"Failed after {self.max_retries} attempts: {item.url}")
async def stop_processing(self) -> None:
"""Stop processing queue items"""
self._shutdown = True
# Cancel all active tasks
for task in self._active_tasks:
if not task.done():
task.cancel()
# Wait for tasks to complete
if self._active_tasks:
await asyncio.gather(*self._active_tasks, return_exceptions=True)
self._active_tasks.clear()
logger.info("Queue processor stopped")
def is_processing(self) -> bool:
"""Check if the processor is currently processing items"""
return bool(self._active_tasks)
def get_processor_stats(self) -> Dict[str, Any]:
"""Get processor statistics"""
return {
"strategy": self.strategy.value,
"active_tasks": len(self._active_tasks),
"metrics": self.metrics.get_stats(),
"batch_status": self.batch_manager.get_batch_status(),
"is_processing": self.is_processing()
}

View File

@@ -0,0 +1,359 @@
"""Module for handling queue item recovery operations"""
import logging
from enum import Enum
from dataclasses import dataclass, field
from typing import List, Tuple, Dict, Optional, Any, Set
from datetime import datetime, timedelta
from .models import QueueItem
logger = logging.getLogger("QueueRecoveryManager")
class RecoveryStrategy(Enum):
"""Recovery strategies"""
RETRY = "retry" # Retry the item
FAIL = "fail" # Mark as failed
REQUEUE = "requeue" # Add back to queue
EMERGENCY = "emergency" # Emergency recovery
class RecoveryPolicy(Enum):
"""Recovery policies"""
AGGRESSIVE = "aggressive" # Recover quickly, more retries
CONSERVATIVE = "conservative" # Recover slowly, fewer retries
BALANCED = "balanced" # Balance between speed and reliability
@dataclass
class RecoveryThresholds:
"""Thresholds for recovery operations"""
max_retries: int = 3
deadlock_threshold: int = 300 # 5 minutes
emergency_threshold: int = 600 # 10 minutes
backoff_base: int = 5 # Base delay for exponential backoff
max_concurrent_recoveries: int = 5
@dataclass
class RecoveryResult:
"""Result of a recovery operation"""
item_url: str
strategy: RecoveryStrategy
success: bool
error: Optional[str] = None
retry_count: int = 0
timestamp: datetime = field(default_factory=datetime.utcnow)
class RecoveryTracker:
"""Tracks recovery operations"""
def __init__(self, max_history: int = 1000):
self.max_history = max_history
self.history: List[RecoveryResult] = []
self.active_recoveries: Set[str] = set()
self.recovery_counts: Dict[str, int] = {}
self.success_counts: Dict[str, int] = {}
self.error_counts: Dict[str, int] = {}
def record_recovery(self, result: RecoveryResult) -> None:
"""Record a recovery operation"""
self.history.append(result)
if len(self.history) > self.max_history:
self.history.pop(0)
self.recovery_counts[result.item_url] = (
self.recovery_counts.get(result.item_url, 0) + 1
)
if result.success:
self.success_counts[result.item_url] = (
self.success_counts.get(result.item_url, 0) + 1
)
else:
self.error_counts[result.item_url] = (
self.error_counts.get(result.item_url, 0) + 1
)
def start_recovery(self, url: str) -> None:
"""Start tracking a recovery operation"""
self.active_recoveries.add(url)
def end_recovery(self, url: str) -> None:
"""End tracking a recovery operation"""
self.active_recoveries.discard(url)
def get_stats(self) -> Dict[str, Any]:
"""Get recovery statistics"""
return {
"total_recoveries": len(self.history),
"active_recoveries": len(self.active_recoveries),
"success_rate": (
sum(self.success_counts.values()) /
len(self.history) if self.history else 0
),
"recovery_counts": self.recovery_counts.copy(),
"error_counts": self.error_counts.copy(),
"recent_recoveries": [
{
"url": r.item_url,
"strategy": r.strategy.value,
"success": r.success,
"error": r.error,
"timestamp": r.timestamp.isoformat()
}
for r in self.history[-10:] # Last 10 recoveries
]
}
class RecoveryManager:
"""Handles recovery of stuck or failed queue items"""
def __init__(
self,
thresholds: Optional[RecoveryThresholds] = None,
policy: RecoveryPolicy = RecoveryPolicy.BALANCED
):
self.thresholds = thresholds or RecoveryThresholds()
self.policy = policy
self.tracker = RecoveryTracker()
self._recovery_lock = asyncio.Lock()
async def recover_stuck_items(
self,
stuck_items: List[Tuple[str, QueueItem]],
state_manager,
metrics_manager
) -> Tuple[int, int]:
"""Recover stuck items"""
recovered = 0
failed = 0
try:
async with self._recovery_lock:
for url, item in stuck_items:
if len(self.tracker.active_recoveries) >= self.thresholds.max_concurrent_recoveries:
logger.warning("Max concurrent recoveries reached, waiting...")
await asyncio.sleep(1)
continue
try:
self.tracker.start_recovery(url)
strategy = self._determine_strategy(item)
success = await self._execute_recovery(
url,
item,
strategy,
state_manager,
metrics_manager
)
if success:
recovered += 1
else:
failed += 1
except Exception as e:
logger.error(f"Error recovering item {url}: {str(e)}")
failed += 1
finally:
self.tracker.end_recovery(url)
logger.info(f"Recovery complete - Recovered: {recovered}, Failed: {failed}")
return recovered, failed
except Exception as e:
logger.error(f"Error in recovery process: {str(e)}")
return 0, len(stuck_items)
def _determine_strategy(self, item: QueueItem) -> RecoveryStrategy:
"""Determine recovery strategy based on item state"""
if item.retry_count >= self.thresholds.max_retries:
return RecoveryStrategy.FAIL
processing_time = (
datetime.utcnow().timestamp() - item.start_time
if item.start_time
else 0
)
if processing_time > self.thresholds.emergency_threshold:
return RecoveryStrategy.EMERGENCY
elif self.policy == RecoveryPolicy.AGGRESSIVE:
return RecoveryStrategy.RETRY
elif self.policy == RecoveryPolicy.CONSERVATIVE:
return RecoveryStrategy.REQUEUE
else: # BALANCED
return (
RecoveryStrategy.RETRY
if item.retry_count < self.thresholds.max_retries // 2
else RecoveryStrategy.REQUEUE
)
async def _execute_recovery(
self,
url: str,
item: QueueItem,
strategy: RecoveryStrategy,
state_manager,
metrics_manager
) -> bool:
"""Execute recovery strategy"""
try:
if strategy == RecoveryStrategy.FAIL:
await self._handle_failed_item(url, item, state_manager, metrics_manager)
success = False
elif strategy == RecoveryStrategy.RETRY:
await self._handle_retry_item(url, item, state_manager)
success = True
elif strategy == RecoveryStrategy.REQUEUE:
await self._handle_requeue_item(url, item, state_manager)
success = True
else: # EMERGENCY
await self._handle_emergency_recovery(url, item, state_manager, metrics_manager)
success = True
self.tracker.record_recovery(RecoveryResult(
item_url=url,
strategy=strategy,
success=success,
retry_count=item.retry_count
))
return success
except Exception as e:
self.tracker.record_recovery(RecoveryResult(
item_url=url,
strategy=strategy,
success=False,
error=str(e),
retry_count=item.retry_count
))
raise
async def _handle_failed_item(
self,
url: str,
item: QueueItem,
state_manager,
metrics_manager
) -> None:
"""Handle an item that has exceeded retry attempts"""
logger.warning(f"Moving stuck item to failed: {url}")
item.status = "failed"
item.error = "Exceeded maximum retries after being stuck"
item.last_error = item.error
item.last_error_time = datetime.utcnow()
await state_manager.mark_completed(item, False, item.error)
metrics_manager.update(
processing_time=item.processing_time or 0,
success=False,
error=item.error
)
async def _handle_retry_item(
self,
url: str,
item: QueueItem,
state_manager
) -> None:
"""Handle an item that will be retried"""
logger.info(f"Recovering stuck item for retry: {url}")
item.retry_count += 1
item.start_time = None
item.processing_time = 0
item.last_retry = datetime.utcnow()
item.status = "pending"
item.priority = max(0, item.priority - 2)
await state_manager.retry_item(item)
async def _handle_requeue_item(
self,
url: str,
item: QueueItem,
state_manager
) -> None:
"""Handle an item that will be requeued"""
logger.info(f"Requeuing stuck item: {url}")
item.retry_count += 1
item.start_time = None
item.processing_time = 0
item.last_retry = datetime.utcnow()
item.status = "pending"
item.priority = 0 # Reset priority
# Calculate backoff delay
backoff = self.thresholds.backoff_base * (2 ** (item.retry_count - 1))
await asyncio.sleep(min(backoff, 60)) # Cap at 60 seconds
await state_manager.retry_item(item)
async def _handle_emergency_recovery(
self,
url: str,
item: QueueItem,
state_manager,
metrics_manager
) -> None:
"""Handle emergency recovery of an item"""
logger.warning(f"Emergency recovery for item: {url}")
# Force item cleanup
await state_manager.force_cleanup_item(item)
# Reset item state
item.retry_count = 0
item.start_time = None
item.processing_time = 0
item.status = "pending"
item.priority = 10 # High priority
# Add back to queue
await state_manager.retry_item(item)
async def perform_emergency_recovery(
self,
state_manager,
metrics_manager
) -> None:
"""Perform emergency recovery of all processing items"""
try:
logger.warning("Performing emergency recovery of all processing items")
processing_items = await state_manager.get_all_processing_items()
recovered, failed = await self.recover_stuck_items(
[(item.url, item) for item in processing_items],
state_manager,
metrics_manager
)
logger.info(f"Emergency recovery complete - Recovered: {recovered}, Failed: {failed}")
except Exception as e:
logger.error(f"Error during emergency recovery: {str(e)}")
def should_recover_item(self, item: QueueItem) -> bool:
"""Check if an item should be recovered"""
if not hasattr(item, 'start_time') or not item.start_time:
return False
processing_time = datetime.utcnow().timestamp() - item.start_time
return processing_time > self.thresholds.deadlock_threshold
def get_recovery_stats(self) -> Dict[str, Any]:
"""Get recovery statistics"""
return {
"policy": self.policy.value,
"thresholds": {
"max_retries": self.thresholds.max_retries,
"deadlock_threshold": self.thresholds.deadlock_threshold,
"emergency_threshold": self.thresholds.emergency_threshold,
"max_concurrent": self.thresholds.max_concurrent_recoveries
},
"tracker": self.tracker.get_stats()
}

View File

@@ -0,0 +1,366 @@
"""Module for managing queue state"""
import logging
import asyncio
from enum import Enum
from dataclasses import dataclass
from typing import Dict, Set, List, Optional, Any
from datetime import datetime
from .models import QueueItem, QueueMetrics
logger = logging.getLogger("QueueStateManager")
class ItemState(Enum):
"""Possible states for queue items"""
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
RETRYING = "retrying"
@dataclass
class StateTransition:
"""Records a state transition"""
item_url: str
from_state: ItemState
to_state: ItemState
timestamp: datetime
reason: Optional[str] = None
class StateSnapshot:
"""Represents a point-in-time snapshot of queue state"""
def __init__(self):
self.timestamp = datetime.utcnow()
self.queue: List[QueueItem] = []
self.processing: Dict[str, QueueItem] = {}
self.completed: Dict[str, QueueItem] = {}
self.failed: Dict[str, QueueItem] = {}
self.guild_queues: Dict[int, Set[str]] = {}
self.channel_queues: Dict[int, Set[str]] = {}
def to_dict(self) -> Dict[str, Any]:
"""Convert snapshot to dictionary"""
return {
"timestamp": self.timestamp.isoformat(),
"queue": [item.__dict__ for item in self.queue],
"processing": {url: item.__dict__ for url, item in self.processing.items()},
"completed": {url: item.__dict__ for url, item in self.completed.items()},
"failed": {url: item.__dict__ for url, item in self.failed.items()},
"guild_queues": {gid: list(urls) for gid, urls in self.guild_queues.items()},
"channel_queues": {cid: list(urls) for cid, urls in self.channel_queues.items()}
}
class StateValidator:
"""Validates queue state"""
@staticmethod
def validate_item(item: QueueItem) -> bool:
"""Validate a queue item"""
return all([
isinstance(item.url, str) and item.url,
isinstance(item.guild_id, int) and item.guild_id > 0,
isinstance(item.channel_id, int) and item.channel_id > 0,
isinstance(item.priority, int) and 0 <= item.priority <= 10,
isinstance(item.added_at, datetime),
isinstance(item.status, str)
])
@staticmethod
def validate_transition(
item: QueueItem,
from_state: ItemState,
to_state: ItemState
) -> bool:
"""Validate a state transition"""
valid_transitions = {
ItemState.PENDING: {ItemState.PROCESSING, ItemState.FAILED},
ItemState.PROCESSING: {ItemState.COMPLETED, ItemState.FAILED, ItemState.RETRYING},
ItemState.FAILED: {ItemState.RETRYING},
ItemState.RETRYING: {ItemState.PENDING},
ItemState.COMPLETED: set() # No transitions from completed
}
return to_state in valid_transitions.get(from_state, set())
class StateTracker:
"""Tracks state changes and transitions"""
def __init__(self, max_history: int = 1000):
self.max_history = max_history
self.transitions: List[StateTransition] = []
self.snapshots: List[StateSnapshot] = []
self.state_counts: Dict[ItemState, int] = {state: 0 for state in ItemState}
def record_transition(
self,
transition: StateTransition
) -> None:
"""Record a state transition"""
self.transitions.append(transition)
if len(self.transitions) > self.max_history:
self.transitions.pop(0)
self.state_counts[transition.from_state] -= 1
self.state_counts[transition.to_state] += 1
def take_snapshot(self, state_manager: 'QueueStateManager') -> None:
"""Take a snapshot of current state"""
snapshot = StateSnapshot()
snapshot.queue = state_manager._queue.copy()
snapshot.processing = state_manager._processing.copy()
snapshot.completed = state_manager._completed.copy()
snapshot.failed = state_manager._failed.copy()
snapshot.guild_queues = {
gid: urls.copy() for gid, urls in state_manager._guild_queues.items()
}
snapshot.channel_queues = {
cid: urls.copy() for cid, urls in state_manager._channel_queues.items()
}
self.snapshots.append(snapshot)
if len(self.snapshots) > self.max_history:
self.snapshots.pop(0)
def get_state_history(self) -> Dict[str, Any]:
"""Get state history statistics"""
return {
"transitions": len(self.transitions),
"snapshots": len(self.snapshots),
"state_counts": {
state.value: count
for state, count in self.state_counts.items()
},
"latest_snapshot": (
self.snapshots[-1].to_dict()
if self.snapshots
else None
)
}
class QueueStateManager:
"""Manages the state of the queue system"""
def __init__(self, max_queue_size: int = 1000):
self.max_queue_size = max_queue_size
# Queue storage
self._queue: List[QueueItem] = []
self._processing: Dict[str, QueueItem] = {}
self._completed: Dict[str, QueueItem] = {}
self._failed: Dict[str, QueueItem] = {}
# Tracking
self._guild_queues: Dict[int, Set[str]] = {}
self._channel_queues: Dict[int, Set[str]] = {}
# State management
self._lock = asyncio.Lock()
self.validator = StateValidator()
self.tracker = StateTracker()
async def add_item(self, item: QueueItem) -> bool:
"""Add an item to the queue"""
if not self.validator.validate_item(item):
logger.error(f"Invalid queue item: {item}")
return False
async with self._lock:
if len(self._queue) >= self.max_queue_size:
return False
# Record transition
self.tracker.record_transition(StateTransition(
item_url=item.url,
from_state=ItemState.PENDING,
to_state=ItemState.PENDING,
timestamp=datetime.utcnow(),
reason="Initial add"
))
# Add to main queue
self._queue.append(item)
self._queue.sort(key=lambda x: (-x.priority, x.added_at))
# Update tracking
if item.guild_id not in self._guild_queues:
self._guild_queues[item.guild_id] = set()
self._guild_queues[item.guild_id].add(item.url)
if item.channel_id not in self._channel_queues:
self._channel_queues[item.channel_id] = set()
self._channel_queues[item.channel_id].add(item.url)
# Take snapshot periodically
if len(self._queue) % 100 == 0:
self.tracker.take_snapshot(self)
return True
async def get_next_items(self, count: int = 5) -> List[QueueItem]:
"""Get the next batch of items to process"""
items = []
async with self._lock:
while len(items) < count and self._queue:
item = self._queue.pop(0)
items.append(item)
self._processing[item.url] = item
# Record transition
self.tracker.record_transition(StateTransition(
item_url=item.url,
from_state=ItemState.PENDING,
to_state=ItemState.PROCESSING,
timestamp=datetime.utcnow()
))
return items
async def mark_completed(
self,
item: QueueItem,
success: bool,
error: Optional[str] = None
) -> None:
"""Mark an item as completed or failed"""
async with self._lock:
self._processing.pop(item.url, None)
to_state = ItemState.COMPLETED if success else ItemState.FAILED
self.tracker.record_transition(StateTransition(
item_url=item.url,
from_state=ItemState.PROCESSING,
to_state=to_state,
timestamp=datetime.utcnow(),
reason=error if error else None
))
if success:
self._completed[item.url] = item
else:
self._failed[item.url] = item
async def retry_item(self, item: QueueItem) -> None:
"""Add an item back to the queue for retry"""
if not self.validator.validate_transition(
item,
ItemState.FAILED,
ItemState.RETRYING
):
logger.error(f"Invalid retry transition for item: {item}")
return
async with self._lock:
self._processing.pop(item.url, None)
item.status = ItemState.PENDING.value
item.last_retry = datetime.utcnow()
item.priority = max(0, item.priority - 1)
# Record transitions
self.tracker.record_transition(StateTransition(
item_url=item.url,
from_state=ItemState.FAILED,
to_state=ItemState.RETRYING,
timestamp=datetime.utcnow()
))
self.tracker.record_transition(StateTransition(
item_url=item.url,
from_state=ItemState.RETRYING,
to_state=ItemState.PENDING,
timestamp=datetime.utcnow()
))
self._queue.append(item)
self._queue.sort(key=lambda x: (-x.priority, x.added_at))
async def get_guild_status(self, guild_id: int) -> Dict[str, int]:
"""Get queue status for a specific guild"""
async with self._lock:
return {
"pending": len([
item for item in self._queue
if item.guild_id == guild_id
]),
"processing": len([
item for item in self._processing.values()
if item.guild_id == guild_id
]),
"completed": len([
item for item in self._completed.values()
if item.guild_id == guild_id
]),
"failed": len([
item for item in self._failed.values()
if item.guild_id == guild_id
])
}
async def clear_state(self) -> None:
"""Clear all state data"""
async with self._lock:
self._queue.clear()
self._processing.clear()
self._completed.clear()
self._failed.clear()
self._guild_queues.clear()
self._channel_queues.clear()
# Take final snapshot before clearing
self.tracker.take_snapshot(self)
async def get_state_for_persistence(self) -> Dict[str, Any]:
"""Get current state for persistence"""
async with self._lock:
# Take snapshot before persistence
self.tracker.take_snapshot(self)
return {
"queue": self._queue,
"processing": self._processing,
"completed": self._completed,
"failed": self._failed,
"history": self.tracker.get_state_history()
}
async def restore_state(self, state: Dict[str, Any]) -> None:
"""Restore state from persisted data"""
async with self._lock:
self._queue = state.get("queue", [])
self._processing = state.get("processing", {})
self._completed = state.get("completed", {})
self._failed = state.get("failed", {})
# Validate restored items
for item in self._queue:
if not self.validator.validate_item(item):
logger.warning(f"Removing invalid restored item: {item}")
self._queue.remove(item)
# Rebuild tracking
self._rebuild_tracking()
def _rebuild_tracking(self) -> None:
"""Rebuild guild and channel tracking from queue data"""
self._guild_queues.clear()
self._channel_queues.clear()
for item in self._queue:
if item.guild_id not in self._guild_queues:
self._guild_queues[item.guild_id] = set()
self._guild_queues[item.guild_id].add(item.url)
if item.channel_id not in self._channel_queues:
self._channel_queues[item.channel_id] = set()
self._channel_queues[item.channel_id].add(item.url)
def get_state_stats(self) -> Dict[str, Any]:
"""Get comprehensive state statistics"""
return {
"queue_size": len(self._queue),
"processing_count": len(self._processing),
"completed_count": len(self._completed),
"failed_count": len(self._failed),
"guild_count": len(self._guild_queues),
"channel_count": len(self._channel_queues),
"history": self.tracker.get_state_history()
}

View File

@@ -0,0 +1,330 @@
"""Module for managing video compression"""
import os
import logging
import asyncio
import json
import subprocess
from datetime import datetime
from typing import Dict, Optional, Tuple, Callable, Set
from .exceptions import CompressionError, VideoVerificationError
logger = logging.getLogger("CompressionManager")
class CompressionManager:
"""Manages video compression operations"""
def __init__(self, ffmpeg_mgr, max_file_size: int):
self.ffmpeg_mgr = ffmpeg_mgr
self.max_file_size = max_file_size * 1024 * 1024 # Convert to bytes
self._active_processes: Set[subprocess.Popen] = set()
self._processes_lock = asyncio.Lock()
self._shutting_down = False
async def compress_video(
self,
input_file: str,
output_file: str,
progress_callback: Optional[Callable[[float], None]] = None
) -> Tuple[bool, str]:
"""Compress a video file
Args:
input_file: Path to input video file
output_file: Path to output video file
progress_callback: Optional callback for compression progress
Returns:
Tuple[bool, str]: (Success status, Error message if any)
"""
if self._shutting_down:
return False, "Compression manager is shutting down"
try:
# Get optimal compression parameters
compression_params = self.ffmpeg_mgr.get_compression_params(
input_file,
self.max_file_size // (1024 * 1024) # Convert to MB
)
# Try hardware acceleration first
success, error = await self._try_compression(
input_file,
output_file,
compression_params,
progress_callback,
use_hardware=True
)
# Fall back to CPU if hardware acceleration fails
if not success:
logger.warning(f"Hardware acceleration failed: {error}, falling back to CPU encoding")
success, error = await self._try_compression(
input_file,
output_file,
compression_params,
progress_callback,
use_hardware=False
)
if not success:
return False, f"Compression failed: {error}"
# Verify output file
if not await self._verify_output(input_file, output_file):
return False, "Output file verification failed"
return True, ""
except Exception as e:
logger.error(f"Error during compression: {e}")
return False, str(e)
async def _try_compression(
self,
input_file: str,
output_file: str,
params: Dict[str, str],
progress_callback: Optional[Callable[[float], None]],
use_hardware: bool
) -> Tuple[bool, str]:
"""Attempt video compression with given parameters"""
if self._shutting_down:
return False, "Compression manager is shutting down"
try:
# Build FFmpeg command
cmd = await self._build_ffmpeg_command(
input_file,
output_file,
params,
use_hardware
)
# Get video duration for progress calculation
duration = await self._get_video_duration(input_file)
# Initialize compression progress tracking
await self._init_compression_progress(
input_file,
params,
use_hardware,
duration
)
# Run compression
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
# Track the process
async with self._processes_lock:
self._active_processes.add(process)
try:
success = await self._monitor_compression(
process,
input_file,
output_file,
duration,
progress_callback
)
return success, ""
finally:
async with self._processes_lock:
self._active_processes.discard(process)
except Exception as e:
return False, str(e)
async def _build_ffmpeg_command(
self,
input_file: str,
output_file: str,
params: Dict[str, str],
use_hardware: bool
) -> List[str]:
"""Build FFmpeg command with appropriate parameters"""
ffmpeg_path = str(self.ffmpeg_mgr.get_ffmpeg_path())
cmd = [ffmpeg_path, "-y", "-i", input_file, "-progress", "pipe:1"]
# Modify parameters for hardware acceleration
if use_hardware:
gpu_info = self.ffmpeg_mgr.gpu_info
if gpu_info["nvidia"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_nvenc"
elif gpu_info["amd"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_amf"
elif gpu_info["intel"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_qsv"
else:
params["c:v"] = "libx264"
# Add parameters to command
for key, value in params.items():
cmd.extend([f"-{key}", str(value)])
cmd.append(output_file)
return cmd
async def _monitor_compression(
self,
process: asyncio.subprocess.Process,
input_file: str,
output_file: str,
duration: float,
progress_callback: Optional[Callable[[float], None]]
) -> bool:
"""Monitor compression progress"""
start_time = datetime.utcnow()
while True:
if self._shutting_down:
process.terminate()
return False
line = await process.stdout.readline()
if not line:
break
try:
await self._update_progress(
line.decode().strip(),
input_file,
output_file,
duration,
start_time,
progress_callback
)
except Exception as e:
logger.error(f"Error updating progress: {e}")
await process.wait()
return os.path.exists(output_file)
async def _verify_output(
self,
input_file: str,
output_file: str
) -> bool:
"""Verify compressed output file"""
try:
# Check file exists and is not empty
if not os.path.exists(output_file) or os.path.getsize(output_file) == 0:
return False
# Check file size is within limit
if os.path.getsize(output_file) > self.max_file_size:
return False
# Verify video integrity
return await self.ffmpeg_mgr.verify_video_file(output_file)
except Exception as e:
logger.error(f"Error verifying output file: {e}")
return False
async def cleanup(self) -> None:
"""Clean up resources"""
self._shutting_down = True
await self._terminate_processes()
async def force_cleanup(self) -> None:
"""Force cleanup of resources"""
self._shutting_down = True
await self._kill_processes()
async def _terminate_processes(self) -> None:
"""Terminate active processes gracefully"""
async with self._processes_lock:
for process in self._active_processes:
try:
process.terminate()
await asyncio.sleep(0.1)
if process.returncode is None:
process.kill()
except Exception as e:
logger.error(f"Error terminating process: {e}")
self._active_processes.clear()
async def _kill_processes(self) -> None:
"""Kill active processes immediately"""
async with self._processes_lock:
for process in self._active_processes:
try:
process.kill()
except Exception as e:
logger.error(f"Error killing process: {e}")
self._active_processes.clear()
async def _get_video_duration(self, file_path: str) -> float:
"""Get video duration in seconds"""
try:
return await self.ffmpeg_mgr.get_video_duration(file_path)
except Exception as e:
logger.error(f"Error getting video duration: {e}")
return 0
async def _init_compression_progress(
self,
input_file: str,
params: Dict[str, str],
use_hardware: bool,
duration: float
) -> None:
"""Initialize compression progress tracking"""
from videoarchiver.processor import _compression_progress
_compression_progress[input_file] = {
"active": True,
"filename": os.path.basename(input_file),
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"elapsed_time": "0:00",
"input_size": os.path.getsize(input_file),
"current_size": 0,
"target_size": self.max_file_size,
"codec": params.get("c:v", "unknown"),
"hardware_accel": use_hardware,
"preset": params.get("preset", "unknown"),
"crf": params.get("crf", "unknown"),
"duration": duration,
"bitrate": params.get("b:v", "unknown"),
"audio_codec": params.get("c:a", "unknown"),
"audio_bitrate": params.get("b:a", "unknown"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
async def _update_progress(
self,
line: str,
input_file: str,
output_file: str,
duration: float,
start_time: datetime,
progress_callback: Optional[Callable[[float], None]]
) -> None:
"""Update compression progress"""
if line.startswith("out_time_ms="):
current_time = int(line.split("=")[1]) / 1000000
if duration > 0:
progress = min(100, (current_time / duration) * 100)
# Update compression progress
from videoarchiver.processor import _compression_progress
if input_file in _compression_progress:
elapsed = datetime.utcnow() - start_time
_compression_progress[input_file].update({
"percent": progress,
"elapsed_time": str(elapsed).split(".")[0],
"current_size": os.path.getsize(output_file) if os.path.exists(output_file) else 0,
"current_time": current_time,
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
})
if progress_callback:
progress_callback(progress)

View File

@@ -0,0 +1,177 @@
"""Module for directory management operations"""
import os
import logging
import asyncio
from pathlib import Path
from typing import List, Optional, Tuple
from .exceptions import FileCleanupError
from .file_deletion import SecureFileDeleter
logger = logging.getLogger("DirectoryManager")
class DirectoryManager:
"""Handles directory operations and cleanup"""
def __init__(self):
self.file_deleter = SecureFileDeleter()
async def cleanup_directory(
self,
directory_path: str,
recursive: bool = True,
delete_empty: bool = True
) -> Tuple[int, List[str]]:
"""Clean up a directory by removing files and optionally empty subdirectories
Args:
directory_path: Path to the directory to clean
recursive: Whether to clean subdirectories
delete_empty: Whether to delete empty directories
Returns:
Tuple[int, List[str]]: (Number of files deleted, List of errors)
Raises:
FileCleanupError: If cleanup fails critically
"""
if not os.path.exists(directory_path):
return 0, []
deleted_count = 0
errors = []
try:
# Process files and directories
deleted, errs = await self._process_directory_contents(
directory_path,
recursive,
delete_empty
)
deleted_count += deleted
errors.extend(errs)
# Clean up empty directories if requested
if delete_empty:
dir_errs = await self._cleanup_empty_directories(directory_path)
errors.extend(dir_errs)
if errors:
logger.warning(f"Cleanup completed with {len(errors)} errors")
else:
logger.info(f"Successfully cleaned directory: {directory_path}")
return deleted_count, errors
except Exception as e:
logger.error(f"Error during cleanup of {directory_path}: {e}")
raise FileCleanupError(f"Directory cleanup failed: {str(e)}")
async def _process_directory_contents(
self,
directory_path: str,
recursive: bool,
delete_empty: bool
) -> Tuple[int, List[str]]:
"""Process contents of a directory"""
deleted_count = 0
errors = []
try:
for entry in os.scandir(directory_path):
try:
if entry.is_file():
# Delete file
if await self.file_deleter.delete_file(entry.path):
deleted_count += 1
else:
errors.append(f"Failed to delete file: {entry.path}")
elif entry.is_dir() and recursive:
# Process subdirectory
subdir_deleted, subdir_errors = await self.cleanup_directory(
entry.path,
recursive=True,
delete_empty=delete_empty
)
deleted_count += subdir_deleted
errors.extend(subdir_errors)
except Exception as e:
errors.append(f"Error processing {entry.path}: {str(e)}")
except Exception as e:
errors.append(f"Error scanning directory {directory_path}: {str(e)}")
return deleted_count, errors
async def _cleanup_empty_directories(self, start_path: str) -> List[str]:
"""Remove empty directories recursively"""
errors = []
try:
for root, dirs, files in os.walk(start_path, topdown=False):
for name in dirs:
try:
dir_path = os.path.join(root, name)
if not os.listdir(dir_path): # Check if directory is empty
await self._remove_directory(dir_path)
except Exception as e:
errors.append(f"Error removing directory {name}: {str(e)}")
except Exception as e:
errors.append(f"Error walking directory tree: {str(e)}")
return errors
async def _remove_directory(self, dir_path: str) -> None:
"""Remove a directory safely"""
try:
await asyncio.to_thread(os.rmdir, dir_path)
except Exception as e:
logger.error(f"Failed to remove directory {dir_path}: {e}")
raise
async def ensure_directory(self, directory_path: str) -> None:
"""Ensure a directory exists and is accessible
Args:
directory_path: Path to the directory to ensure
Raises:
FileCleanupError: If directory cannot be created or accessed
"""
try:
path = Path(directory_path)
path.mkdir(parents=True, exist_ok=True)
# Verify directory is writable
if not os.access(directory_path, os.W_OK):
raise FileCleanupError(f"Directory {directory_path} is not writable")
except Exception as e:
logger.error(f"Error ensuring directory {directory_path}: {e}")
raise FileCleanupError(f"Failed to ensure directory: {str(e)}")
async def get_directory_size(self, directory_path: str) -> int:
"""Get total size of a directory in bytes
Args:
directory_path: Path to the directory
Returns:
int: Total size in bytes
"""
total_size = 0
try:
for entry in os.scandir(directory_path):
try:
if entry.is_file():
total_size += entry.stat().st_size
elif entry.is_dir():
total_size += await self.get_directory_size(entry.path)
except Exception as e:
logger.warning(f"Error getting size for {entry.path}: {e}")
except Exception as e:
logger.error(f"Error calculating directory size: {e}")
return total_size

View File

@@ -0,0 +1,207 @@
"""Module for managing video downloads"""
import os
import logging
import asyncio
import yt_dlp
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Tuple, Callable, Any
from pathlib import Path
from .verification_manager import VideoVerificationManager
from .compression_manager import CompressionManager
from .progress_tracker import ProgressTracker
logger = logging.getLogger("DownloadManager")
class CancellableYTDLLogger:
"""Custom yt-dlp logger that can be cancelled"""
def __init__(self):
self.cancelled = False
def debug(self, msg):
if self.cancelled:
raise Exception("Download cancelled")
logger.debug(msg)
def warning(self, msg):
if self.cancelled:
raise Exception("Download cancelled")
logger.warning(msg)
def error(self, msg):
if self.cancelled:
raise Exception("Download cancelled")
logger.error(msg)
class DownloadManager:
"""Manages video downloads and processing"""
MAX_RETRIES = 5
RETRY_DELAY = 10
FILE_OP_RETRIES = 3
FILE_OP_RETRY_DELAY = 1
SHUTDOWN_TIMEOUT = 15 # seconds
def __init__(
self,
download_path: str,
video_format: str,
max_quality: int,
max_file_size: int,
enabled_sites: Optional[List[str]] = None,
concurrent_downloads: int = 2,
ffmpeg_mgr = None
):
self.download_path = Path(download_path)
self.download_path.mkdir(parents=True, exist_ok=True)
os.chmod(str(self.download_path), 0o755)
# Initialize components
self.verification_manager = VideoVerificationManager(ffmpeg_mgr)
self.compression_manager = CompressionManager(ffmpeg_mgr, max_file_size)
self.progress_tracker = ProgressTracker()
# Create thread pool
self.download_pool = ThreadPoolExecutor(
max_workers=max(1, min(3, concurrent_downloads)),
thread_name_prefix="videoarchiver_download"
)
# Initialize state
self._shutting_down = False
self.ytdl_logger = CancellableYTDLLogger()
# Configure yt-dlp options
self.ydl_opts = self._configure_ydl_opts(
video_format,
max_quality,
max_file_size,
ffmpeg_mgr
)
def _configure_ydl_opts(
self,
video_format: str,
max_quality: int,
max_file_size: int,
ffmpeg_mgr
) -> Dict[str, Any]:
"""Configure yt-dlp options"""
return {
"format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best",
"outtmpl": "%(title)s.%(ext)s",
"merge_output_format": video_format,
"quiet": True,
"no_warnings": True,
"extract_flat": True,
"concurrent_fragment_downloads": 1,
"retries": self.MAX_RETRIES,
"fragment_retries": self.MAX_RETRIES,
"file_access_retries": self.FILE_OP_RETRIES,
"extractor_retries": self.MAX_RETRIES,
"postprocessor_hooks": [self._check_file_size],
"progress_hooks": [self._progress_hook],
"ffmpeg_location": str(ffmpeg_mgr.get_ffmpeg_path()),
"ffprobe_location": str(ffmpeg_mgr.get_ffprobe_path()),
"paths": {"home": str(self.download_path)},
"logger": self.ytdl_logger,
"ignoreerrors": True,
"no_color": True,
"geo_bypass": True,
"socket_timeout": 60,
"http_chunk_size": 1048576,
"external_downloader_args": {"ffmpeg": ["-timeout", "60000000"]},
"max_sleep_interval": 5,
"sleep_interval": 1,
"max_filesize": max_file_size * 1024 * 1024,
}
def _check_file_size(self, info: Dict[str, Any]) -> None:
"""Check if file size is within limits"""
if info.get("filepath") and os.path.exists(info["filepath"]):
try:
size = os.path.getsize(info["filepath"])
if size > self.compression_manager.max_file_size:
logger.info(f"File exceeds size limit, will compress: {info['filepath']}")
except OSError as e:
logger.error(f"Error checking file size: {str(e)}")
def _progress_hook(self, d: Dict[str, Any]) -> None:
"""Handle download progress"""
if d["status"] == "finished":
logger.info(f"Download completed: {d['filename']}")
elif d["status"] == "downloading":
try:
self.progress_tracker.update_download_progress(d)
except Exception as e:
logger.debug(f"Error logging progress: {str(e)}")
async def cleanup(self) -> None:
"""Clean up resources"""
self._shutting_down = True
self.ytdl_logger.cancelled = True
self.download_pool.shutdown(wait=False, cancel_futures=True)
await self.compression_manager.cleanup()
self.progress_tracker.clear_progress()
async def force_cleanup(self) -> None:
"""Force cleanup of all resources"""
self._shutting_down = True
self.ytdl_logger.cancelled = True
self.download_pool.shutdown(wait=False, cancel_futures=True)
await self.compression_manager.force_cleanup()
self.progress_tracker.clear_progress()
async def download_video(
self,
url: str,
progress_callback: Optional[Callable[[float], None]] = None
) -> Tuple[bool, str, str]:
"""Download and process a video"""
if self._shutting_down:
return False, "", "Downloader is shutting down"
self.progress_tracker.start_download(url)
try:
# Download video
success, file_path, error = await self._safe_download(
url,
progress_callback
)
if not success:
return False, "", error
# Verify and compress if needed
return await self._process_downloaded_file(
file_path,
progress_callback
)
except Exception as e:
logger.error(f"Download error: {str(e)}")
return False, "", str(e)
finally:
self.progress_tracker.end_download(url)
async def _safe_download(
self,
url: str,
progress_callback: Optional[Callable[[float], None]]
) -> Tuple[bool, str, str]:
"""Safely download video with retries"""
# Implementation moved to separate method for clarity
pass # Implementation would be similar to original but using new components
async def _process_downloaded_file(
self,
file_path: str,
progress_callback: Optional[Callable[[float], None]]
) -> Tuple[bool, str, str]:
"""Process a downloaded file (verify and compress if needed)"""
# Implementation moved to separate method for clarity
pass # Implementation would be similar to original but using new components

View File

@@ -0,0 +1,117 @@
"""Module for secure file deletion operations"""
import os
import stat
import asyncio
import logging
from pathlib import Path
from typing import Optional
from .exceptions import FileCleanupError
logger = logging.getLogger("FileDeleter")
class SecureFileDeleter:
"""Handles secure file deletion operations"""
def __init__(self, max_size: int = 100 * 1024 * 1024):
"""Initialize the file deleter
Args:
max_size: Maximum file size in bytes for secure deletion (default: 100MB)
"""
self.max_size = max_size
async def delete_file(self, file_path: str) -> bool:
"""Delete a file securely
Args:
file_path: Path to the file to delete
Returns:
bool: True if file was successfully deleted
Raises:
FileCleanupError: If file deletion fails after all attempts
"""
if not os.path.exists(file_path):
return True
try:
file_size = await self._get_file_size(file_path)
# For large files, skip secure deletion
if file_size > self.max_size:
return await self._delete_large_file(file_path)
# Perform secure deletion
await self._ensure_writable(file_path)
if file_size > 0:
await self._zero_file_content(file_path, file_size)
return await self._delete_file(file_path)
except Exception as e:
logger.error(f"Error during deletion of {file_path}: {e}")
return await self._force_delete(file_path)
async def _get_file_size(self, file_path: str) -> int:
"""Get the size of a file"""
try:
return os.path.getsize(file_path)
except OSError as e:
logger.warning(f"Could not get size of {file_path}: {e}")
return 0
async def _delete_large_file(self, file_path: str) -> bool:
"""Delete a large file directly"""
try:
logger.debug(f"File {file_path} exceeds max size for secure deletion, performing direct removal")
os.remove(file_path)
return True
except OSError as e:
logger.error(f"Failed to remove large file {file_path}: {e}")
return False
async def _ensure_writable(self, file_path: str) -> None:
"""Ensure a file is writable"""
try:
current_mode = os.stat(file_path).st_mode
os.chmod(file_path, current_mode | stat.S_IWRITE)
except OSError as e:
logger.warning(f"Could not modify permissions of {file_path}: {e}")
raise FileCleanupError(f"Permission error: {str(e)}")
async def _zero_file_content(self, file_path: str, file_size: int) -> None:
"""Zero out file content in chunks"""
try:
chunk_size = min(1024 * 1024, file_size) # 1MB chunks or file size if smaller
with open(file_path, "wb") as f:
for offset in range(0, file_size, chunk_size):
write_size = min(chunk_size, file_size - offset)
f.write(b'\0' * write_size)
await asyncio.sleep(0) # Allow other tasks to run
f.flush()
os.fsync(f.fileno())
except OSError as e:
logger.warning(f"Error zeroing file {file_path}: {e}")
raise
async def _delete_file(self, file_path: str) -> bool:
"""Delete a file"""
try:
Path(file_path).unlink(missing_ok=True)
return True
except OSError as e:
logger.error(f"Failed to delete file {file_path}: {e}")
return False
async def _force_delete(self, file_path: str) -> bool:
"""Force delete a file as last resort"""
try:
if os.path.exists(file_path):
os.chmod(file_path, stat.S_IWRITE | stat.S_IREAD)
Path(file_path).unlink(missing_ok=True)
except Exception as e:
logger.error(f"Force delete failed for {file_path}: {e}")
raise FileCleanupError(f"Force delete failed: {str(e)}")
return not os.path.exists(file_path)

View File

@@ -1,135 +1,150 @@
"""File operation utilities"""
import os
import stat
import asyncio
import logging
from pathlib import Path
from typing import Optional
from typing import List, Tuple, Optional
from .exceptions import FileCleanupError
from .file_deletion import SecureFileDeleter
from .directory_manager import DirectoryManager
from .permission_manager import PermissionManager
logger = logging.getLogger("VideoArchiver")
async def secure_delete_file(file_path: str, max_size: int = 100 * 1024 * 1024) -> bool:
class FileOperations:
"""Manages file and directory operations"""
def __init__(self):
"""Initialize file operation managers"""
self.file_deleter = SecureFileDeleter()
self.directory_manager = DirectoryManager()
self.permission_manager = PermissionManager()
async def secure_delete_file(
self,
file_path: str,
max_size: Optional[int] = None
) -> bool:
"""Delete a file securely
Args:
file_path: Path to the file to delete
max_size: Maximum file size in bytes to attempt secure deletion (default: 100MB)
max_size: Optional maximum file size for secure deletion
Returns:
bool: True if file was successfully deleted, False otherwise
bool: True if file was successfully deleted
Raises:
FileCleanupError: If file deletion fails after all attempts
FileCleanupError: If file deletion fails
"""
if not os.path.exists(file_path):
return True
try:
# Ensure file is writable before deletion
await self.permission_manager.ensure_writable(file_path)
try:
# Get file size
try:
file_size = os.path.getsize(file_path)
except OSError as e:
logger.warning(f"Could not get size of {file_path}: {e}")
file_size = 0
# For large files, skip secure deletion and just remove
if file_size > max_size:
logger.debug(f"File {file_path} exceeds max size for secure deletion, performing direct removal")
try:
os.remove(file_path)
return True
except OSError as e:
logger.error(f"Failed to remove large file {file_path}: {e}")
return False
# Ensure file is writable
try:
current_mode = os.stat(file_path).st_mode
os.chmod(file_path, current_mode | stat.S_IWRITE)
except OSError as e:
logger.warning(f"Could not modify permissions of {file_path}: {e}")
raise FileCleanupError(f"Permission error: {str(e)}")
# Zero out file content in chunks to avoid memory issues
if file_size > 0:
try:
chunk_size = min(1024 * 1024, file_size) # 1MB chunks or file size if smaller
with open(file_path, "wb") as f:
for offset in range(0, file_size, chunk_size):
write_size = min(chunk_size, file_size - offset)
f.write(b'\0' * write_size)
# Allow other tasks to run
await asyncio.sleep(0)
f.flush()
os.fsync(f.fileno())
except OSError as e:
logger.warning(f"Error zeroing file {file_path}: {e}")
# Delete the file
try:
Path(file_path).unlink(missing_ok=True)
return True
except OSError as e:
logger.error(f"Failed to delete file {file_path}: {e}")
return False
# Perform secure deletion
if max_size:
self.file_deleter.max_size = max_size
return await self.file_deleter.delete_file(file_path)
except Exception as e:
logger.error(f"Error during deletion of {file_path}: {e}")
# Last resort: try force delete
try:
if os.path.exists(file_path):
os.chmod(file_path, stat.S_IWRITE | stat.S_IREAD)
Path(file_path).unlink(missing_ok=True)
except Exception as e2:
logger.error(f"Force delete failed for {file_path}: {e2}")
raise FileCleanupError(f"Force delete failed: {str(e2)}")
return not os.path.exists(file_path)
logger.error(f"Error during secure file deletion: {e}")
raise FileCleanupError(f"Secure deletion failed: {str(e)}")
async def cleanup_downloads(download_path: str) -> None:
async def cleanup_downloads(
self,
download_path: str,
recursive: bool = True,
delete_empty: bool = True
) -> None:
"""Clean up the downloads directory
Args:
download_path: Path to the downloads directory to clean
download_path: Path to the downloads directory
recursive: Whether to clean subdirectories
delete_empty: Whether to delete empty directories
Raises:
FileCleanupError: If cleanup fails
"""
try:
if not os.path.exists(download_path):
return
# Ensure we have necessary permissions
await self.permission_manager.ensure_writable(
download_path,
recursive=recursive
)
errors = []
# Delete all files in the directory
for entry in os.scandir(download_path):
try:
path = entry.path
if entry.is_file():
if not await secure_delete_file(path):
errors.append(f"Failed to delete file: {path}")
elif entry.is_dir():
await asyncio.to_thread(lambda: os.rmdir(path) if not os.listdir(path) else None)
except Exception as e:
errors.append(f"Error processing {entry.path}: {str(e)}")
continue
# Clean up empty subdirectories
for root, dirs, files in os.walk(download_path, topdown=False):
for name in dirs:
try:
dir_path = os.path.join(root, name)
if not os.listdir(dir_path): # Check if directory is empty
await asyncio.to_thread(os.rmdir, dir_path)
except Exception as e:
errors.append(f"Error removing directory {name}: {str(e)}")
# Perform cleanup
deleted_count, errors = await self.directory_manager.cleanup_directory(
download_path,
recursive=recursive,
delete_empty=delete_empty
)
# Log results
if errors:
raise FileCleanupError("\n".join(errors))
error_msg = "\n".join(errors)
logger.error(f"Cleanup completed with errors:\n{error_msg}")
raise FileCleanupError(f"Cleanup completed with {len(errors)} errors")
else:
logger.info(f"Successfully cleaned up {deleted_count} files")
except FileCleanupError:
raise
except Exception as e:
logger.error(f"Error during cleanup of {download_path}: {e}")
raise FileCleanupError(f"Cleanup failed: {str(e)}")
logger.error(f"Error during downloads cleanup: {e}")
raise FileCleanupError(f"Downloads cleanup failed: {str(e)}")
async def ensure_directory(self, directory_path: str) -> None:
"""Ensure a directory exists with proper permissions
Args:
directory_path: Path to the directory
Raises:
FileCleanupError: If directory cannot be created or accessed
"""
try:
# Create directory if needed
await self.directory_manager.ensure_directory(directory_path)
# Set proper permissions
await self.permission_manager.fix_permissions(directory_path)
# Verify it's writable
if not await self.permission_manager.check_permissions(
directory_path,
require_writable=True,
require_readable=True,
require_executable=True
):
raise FileCleanupError(f"Directory {directory_path} has incorrect permissions")
except Exception as e:
logger.error(f"Error ensuring directory: {e}")
raise FileCleanupError(f"Failed to ensure directory: {str(e)}")
async def get_directory_info(
self,
directory_path: str
) -> Tuple[int, List[str]]:
"""Get directory size and any permission issues
Args:
directory_path: Path to the directory
Returns:
Tuple[int, List[str]]: (Total size in bytes, List of permission issues)
"""
try:
# Get directory size
total_size = await self.directory_manager.get_directory_size(directory_path)
# Check permissions
permission_issues = await self.permission_manager.fix_permissions(
directory_path,
recursive=True
)
return total_size, permission_issues
except Exception as e:
logger.error(f"Error getting directory info: {e}")
return 0, [f"Error: {str(e)}"]

View File

@@ -7,14 +7,166 @@ import stat
import logging
import contextlib
import time
from typing import Generator, List, Optional
from pathlib import Path
from .exceptions import FileCleanupError
from .permission_manager import PermissionManager
logger = logging.getLogger("VideoArchiver")
logger = logging.getLogger("PathManager")
@contextlib.contextmanager
def temp_path_context():
"""Context manager for temporary path creation and cleanup
class TempDirectoryManager:
"""Manages temporary directory creation and cleanup"""
def __init__(self):
self.permission_manager = PermissionManager()
self.max_retries = 3
self.retry_delay = 1
async def create_temp_dir(self, prefix: str = "videoarchiver_") -> str:
"""Create a temporary directory with proper permissions
Args:
prefix: Prefix for temporary directory name
Returns:
str: Path to temporary directory
Raises:
FileCleanupError: If directory creation fails
"""
try:
# Create temp directory
temp_dir = tempfile.mkdtemp(prefix=prefix)
logger.debug(f"Created temporary directory: {temp_dir}")
# Set proper permissions
await self.permission_manager.set_permissions(
temp_dir,
stat.S_IRWXU, # rwx for user only
recursive=False
)
# Verify directory
if not await self._verify_directory(temp_dir):
raise FileCleanupError(f"Failed to verify temporary directory: {temp_dir}")
return temp_dir
except Exception as e:
logger.error(f"Error creating temporary directory: {e}")
raise FileCleanupError(f"Failed to create temporary directory: {str(e)}")
async def cleanup_temp_dir(self, temp_dir: str) -> List[str]:
"""Clean up a temporary directory
Args:
temp_dir: Path to temporary directory
Returns:
List[str]: List of any cleanup errors
"""
if not temp_dir or not os.path.exists(temp_dir):
return []
cleanup_errors = []
try:
# Set permissions recursively
await self._prepare_for_cleanup(temp_dir, cleanup_errors)
# Attempt cleanup with retries
for attempt in range(self.max_retries):
try:
# Remove directory
shutil.rmtree(temp_dir, ignore_errors=True)
# Verify removal
if not os.path.exists(temp_dir):
logger.debug(f"Successfully cleaned up temporary directory: {temp_dir}")
break
if attempt < self.max_retries - 1:
await self._retry_delay()
except Exception as e:
if attempt == self.max_retries - 1:
cleanup_errors.append(
f"Failed to clean up temporary directory {temp_dir} "
f"after {self.max_retries} attempts: {e}"
)
elif attempt < self.max_retries - 1:
await self._retry_delay()
continue
except Exception as e:
cleanup_errors.append(f"Error during temp directory cleanup: {str(e)}")
return cleanup_errors
async def _prepare_for_cleanup(
self,
temp_dir: str,
cleanup_errors: List[str]
) -> None:
"""Prepare directory for cleanup by setting permissions"""
for root, dirs, files in os.walk(temp_dir):
# Set directory permissions
for d in dirs:
try:
dir_path = os.path.join(root, d)
await self.permission_manager.set_permissions(
dir_path,
stat.S_IRWXU
)
except Exception as e:
cleanup_errors.append(
f"Failed to set permissions on directory {dir_path}: {e}"
)
# Set file permissions
for f in files:
try:
file_path = os.path.join(root, f)
await self.permission_manager.set_permissions(
file_path,
stat.S_IRWXU
)
except Exception as e:
cleanup_errors.append(
f"Failed to set permissions on file {file_path}: {e}"
)
async def _verify_directory(self, directory: str) -> bool:
"""Verify a directory exists and is writable"""
if not os.path.exists(directory):
return False
return await self.permission_manager.check_permissions(
directory,
require_writable=True,
require_readable=True,
require_executable=True
)
async def _retry_delay(self) -> None:
"""Sleep between retry attempts"""
await asyncio.sleep(self.retry_delay)
class PathManager:
"""Manages path operations and validation"""
def __init__(self):
self.temp_dir_manager = TempDirectoryManager()
@contextlib.asynccontextmanager
async def temp_path_context(
self,
prefix: str = "videoarchiver_"
) -> Generator[str, None, None]:
"""Async context manager for temporary path creation and cleanup
Args:
prefix: Prefix for temporary directory name
Yields:
str: Path to temporary directory
@@ -24,22 +176,8 @@ def temp_path_context():
"""
temp_dir = None
try:
# Create temp directory with proper permissions
temp_dir = tempfile.mkdtemp(prefix="videoarchiver_")
logger.debug(f"Created temporary directory: {temp_dir}")
# Ensure directory has rwx permissions for user only
try:
os.chmod(temp_dir, stat.S_IRWXU)
except OSError as e:
raise FileCleanupError(f"Failed to set permissions on temporary directory: {str(e)}")
# Verify directory exists and is writable
if not os.path.exists(temp_dir):
raise FileCleanupError(f"Failed to create temporary directory: {temp_dir}")
if not os.access(temp_dir, os.W_OK):
raise FileCleanupError(f"Temporary directory is not writable: {temp_dir}")
# Create temporary directory
temp_dir = await self.temp_dir_manager.create_temp_dir(prefix)
yield temp_dir
except FileCleanupError:
@@ -49,50 +187,37 @@ def temp_path_context():
raise FileCleanupError(f"Temporary directory error: {str(e)}")
finally:
if temp_dir and os.path.exists(temp_dir):
cleanup_errors = []
try:
# Ensure all files are deletable with retries
max_retries = 3
for attempt in range(max_retries):
try:
# Set permissions recursively
for root, dirs, files in os.walk(temp_dir):
for d in dirs:
try:
dir_path = os.path.join(root, d)
os.chmod(dir_path, stat.S_IRWXU)
except OSError as e:
cleanup_errors.append(f"Failed to set permissions on directory {dir_path}: {e}")
for f in files:
try:
file_path = os.path.join(root, f)
os.chmod(file_path, stat.S_IRWXU)
except OSError as e:
cleanup_errors.append(f"Failed to set permissions on file {file_path}: {e}")
# Try to remove the directory
shutil.rmtree(temp_dir, ignore_errors=True)
# Verify directory is gone
if not os.path.exists(temp_dir):
logger.debug(f"Successfully cleaned up temporary directory: {temp_dir}")
break
if attempt < max_retries - 1:
time.sleep(1) # Wait before retry
except Exception as e:
if attempt == max_retries - 1:
cleanup_errors.append(f"Failed to clean up temporary directory {temp_dir} after {max_retries} attempts: {e}")
elif attempt < max_retries - 1:
time.sleep(1) # Wait before retry
continue
except Exception as e:
cleanup_errors.append(f"Error during temp directory cleanup: {str(e)}")
if temp_dir:
# Clean up directory
cleanup_errors = await self.temp_dir_manager.cleanup_temp_dir(temp_dir)
if cleanup_errors:
error_msg = "\n".join(cleanup_errors)
logger.error(error_msg)
# Don't raise here as we're in finally block and don't want to mask original error
# Don't raise here as we're in finally block
async def ensure_directory(self, directory: str) -> None:
"""Ensure a directory exists with proper permissions
Args:
directory: Path to ensure exists
Raises:
FileCleanupError: If directory cannot be created or accessed
"""
try:
path = Path(directory)
path.mkdir(parents=True, exist_ok=True)
# Set proper permissions
await self.temp_dir_manager.permission_manager.set_permissions(
directory,
stat.S_IRWXU
)
# Verify directory
if not await self.temp_dir_manager._verify_directory(directory):
raise FileCleanupError(f"Failed to verify directory: {directory}")
except Exception as e:
logger.error(f"Error ensuring directory {directory}: {e}")
raise FileCleanupError(f"Failed to ensure directory: {str(e)}")

View File

@@ -0,0 +1,202 @@
"""Module for managing file and directory permissions"""
import os
import stat
import logging
from pathlib import Path
from typing import Optional, Union, List
from .exceptions import FileCleanupError
logger = logging.getLogger("PermissionManager")
class PermissionManager:
"""Handles file and directory permission operations"""
DEFAULT_FILE_MODE = 0o644 # rw-r--r--
DEFAULT_DIR_MODE = 0o755 # rwxr-xr-x
FULL_ACCESS_MODE = 0o777 # rwxrwxrwx
def __init__(self):
self._is_windows = os.name == 'nt'
async def ensure_writable(
self,
path: Union[str, Path],
recursive: bool = False
) -> None:
"""Ensure a path is writable
Args:
path: Path to make writable
recursive: Whether to apply recursively to directories
Raises:
FileCleanupError: If permissions cannot be modified
"""
try:
path = Path(path)
if not path.exists():
return
if path.is_file():
await self._make_file_writable(path)
elif path.is_dir():
await self._make_directory_writable(path, recursive)
except Exception as e:
logger.error(f"Error ensuring writable permissions for {path}: {e}")
raise FileCleanupError(f"Failed to set writable permissions: {str(e)}")
async def _make_file_writable(self, path: Path) -> None:
"""Make a file writable"""
try:
current_mode = path.stat().st_mode
if self._is_windows:
os.chmod(path, stat.S_IWRITE | stat.S_IREAD)
else:
os.chmod(path, current_mode | stat.S_IWRITE)
except Exception as e:
logger.error(f"Failed to make file {path} writable: {e}")
raise
async def _make_directory_writable(
self,
path: Path,
recursive: bool
) -> None:
"""Make a directory writable"""
try:
if self._is_windows:
os.chmod(path, stat.S_IWRITE | stat.S_IREAD | stat.S_IEXEC)
else:
current_mode = path.stat().st_mode
os.chmod(path, current_mode | stat.S_IWRITE | stat.S_IEXEC)
if recursive:
for item in path.rglob('*'):
if item.is_file():
await self._make_file_writable(item)
elif item.is_dir():
await self._make_directory_writable(item, False)
except Exception as e:
logger.error(f"Failed to make directory {path} writable: {e}")
raise
async def set_permissions(
self,
path: Union[str, Path],
mode: int,
recursive: bool = False
) -> None:
"""Set specific permissions on a path
Args:
path: Path to set permissions on
mode: Permission mode (e.g., 0o755)
recursive: Whether to apply recursively
Raises:
FileCleanupError: If permissions cannot be set
"""
try:
path = Path(path)
if not path.exists():
return
if not self._is_windows: # Skip on Windows
os.chmod(path, mode)
if recursive and path.is_dir():
file_mode = mode & ~stat.S_IXUSR & ~stat.S_IXGRP & ~stat.S_IXOTH
for item in path.rglob('*'):
if item.is_file():
os.chmod(item, file_mode)
elif item.is_dir():
os.chmod(item, mode)
except Exception as e:
logger.error(f"Error setting permissions for {path}: {e}")
raise FileCleanupError(f"Failed to set permissions: {str(e)}")
async def check_permissions(
self,
path: Union[str, Path],
require_writable: bool = True,
require_readable: bool = True,
require_executable: bool = False
) -> bool:
"""Check if a path has required permissions
Args:
path: Path to check
require_writable: Whether write permission is required
require_readable: Whether read permission is required
require_executable: Whether execute permission is required
Returns:
bool: True if path has required permissions
"""
try:
path = Path(path)
if not path.exists():
return False
if require_readable and not os.access(path, os.R_OK):
return False
if require_writable and not os.access(path, os.W_OK):
return False
if require_executable and not os.access(path, os.X_OK):
return False
return True
except Exception as e:
logger.error(f"Error checking permissions for {path}: {e}")
return False
async def fix_permissions(
self,
path: Union[str, Path],
recursive: bool = False
) -> List[str]:
"""Fix common permission issues on a path
Args:
path: Path to fix permissions on
recursive: Whether to apply recursively
Returns:
List[str]: List of errors encountered
"""
errors = []
try:
path = Path(path)
if not path.exists():
return errors
if path.is_file():
try:
await self.set_permissions(path, self.DEFAULT_FILE_MODE)
except Exception as e:
errors.append(f"Error fixing file permissions for {path}: {str(e)}")
elif path.is_dir():
try:
await self.set_permissions(path, self.DEFAULT_DIR_MODE)
if recursive:
for item in path.rglob('*'):
try:
if item.is_file():
await self.set_permissions(item, self.DEFAULT_FILE_MODE)
elif item.is_dir():
await self.set_permissions(item, self.DEFAULT_DIR_MODE)
except Exception as e:
errors.append(f"Error fixing permissions for {item}: {str(e)}")
except Exception as e:
errors.append(f"Error fixing directory permissions for {path}: {str(e)}")
except Exception as e:
errors.append(f"Error during permission fix: {str(e)}")
return errors

View File

@@ -0,0 +1,163 @@
"""Module for tracking download and compression progress"""
import logging
from typing import Dict, Any, Optional
from datetime import datetime
logger = logging.getLogger("ProgressTracker")
class ProgressTracker:
"""Tracks progress of downloads and compression operations"""
def __init__(self):
self._download_progress: Dict[str, Dict[str, Any]] = {}
self._compression_progress: Dict[str, Dict[str, Any]] = {}
def start_download(self, url: str) -> None:
"""Initialize progress tracking for a download"""
self._download_progress[url] = {
"active": True,
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"speed": "N/A",
"eta": "N/A",
"downloaded_bytes": 0,
"total_bytes": 0,
"retries": 0,
"fragment_count": 0,
"fragment_index": 0,
"video_title": "Unknown",
"extractor": "Unknown",
"format": "Unknown",
"resolution": "Unknown",
"fps": "Unknown",
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
def update_download_progress(self, data: Dict[str, Any]) -> None:
"""Update download progress information"""
try:
# Get URL from info dict
url = data.get("info_dict", {}).get("webpage_url", "unknown")
if url not in self._download_progress:
return
if data["status"] == "downloading":
self._download_progress[url].update({
"active": True,
"percent": float(data.get("_percent_str", "0").replace("%", "")),
"speed": data.get("_speed_str", "N/A"),
"eta": data.get("_eta_str", "N/A"),
"downloaded_bytes": data.get("downloaded_bytes", 0),
"total_bytes": data.get("total_bytes", 0) or data.get("total_bytes_estimate", 0),
"retries": data.get("retry_count", 0),
"fragment_count": data.get("fragment_count", 0),
"fragment_index": data.get("fragment_index", 0),
"video_title": data.get("info_dict", {}).get("title", "Unknown"),
"extractor": data.get("info_dict", {}).get("extractor", "Unknown"),
"format": data.get("info_dict", {}).get("format", "Unknown"),
"resolution": data.get("info_dict", {}).get("resolution", "Unknown"),
"fps": data.get("info_dict", {}).get("fps", "Unknown"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
})
logger.debug(
f"Download progress for {url}: "
f"{self._download_progress[url]['percent']}% at {self._download_progress[url]['speed']}, "
f"ETA: {self._download_progress[url]['eta']}"
)
except Exception as e:
logger.error(f"Error updating download progress: {e}")
def end_download(self, url: str) -> None:
"""Mark a download as completed"""
if url in self._download_progress:
self._download_progress[url]["active"] = False
def start_compression(
self,
input_file: str,
params: Dict[str, str],
use_hardware: bool,
duration: float,
input_size: int,
target_size: int
) -> None:
"""Initialize progress tracking for compression"""
self._compression_progress[input_file] = {
"active": True,
"filename": input_file,
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"elapsed_time": "0:00",
"input_size": input_size,
"current_size": 0,
"target_size": target_size,
"codec": params.get("c:v", "unknown"),
"hardware_accel": use_hardware,
"preset": params.get("preset", "unknown"),
"crf": params.get("crf", "unknown"),
"duration": duration,
"bitrate": params.get("b:v", "unknown"),
"audio_codec": params.get("c:a", "unknown"),
"audio_bitrate": params.get("b:a", "unknown"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
def update_compression_progress(
self,
input_file: str,
progress: float,
elapsed_time: str,
current_size: int,
current_time: float
) -> None:
"""Update compression progress information"""
if input_file in self._compression_progress:
self._compression_progress[input_file].update({
"percent": progress,
"elapsed_time": elapsed_time,
"current_size": current_size,
"current_time": current_time,
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
})
logger.debug(
f"Compression progress for {input_file}: "
f"{progress:.1f}%, Size: {current_size}/{self._compression_progress[input_file]['target_size']} bytes"
)
def end_compression(self, input_file: str) -> None:
"""Mark a compression operation as completed"""
if input_file in self._compression_progress:
self._compression_progress[input_file]["active"] = False
def get_download_progress(self, url: str) -> Optional[Dict[str, Any]]:
"""Get progress information for a download"""
return self._download_progress.get(url)
def get_compression_progress(self, input_file: str) -> Optional[Dict[str, Any]]:
"""Get progress information for a compression operation"""
return self._compression_progress.get(input_file)
def get_active_downloads(self) -> Dict[str, Dict[str, Any]]:
"""Get all active downloads"""
return {
url: progress
for url, progress in self._download_progress.items()
if progress.get("active", False)
}
def get_active_compressions(self) -> Dict[str, Dict[str, Any]]:
"""Get all active compression operations"""
return {
input_file: progress
for input_file, progress in self._compression_progress.items()
if progress.get("active", False)
}
def clear_progress(self) -> None:
"""Clear all progress tracking"""
self._download_progress.clear()
self._compression_progress.clear()