mirror of
https://github.com/pacnpal/Pac-cogs.git
synced 2025-12-21 19:31:06 -05:00
Core Systems:
Component-based architecture with lifecycle management Enhanced error handling and recovery mechanisms Comprehensive state management and tracking Event-driven architecture with monitoring Queue Management: Multiple processing strategies for different scenarios Advanced state management with recovery Comprehensive metrics and health monitoring Sophisticated cleanup system with multiple strategies Processing Pipeline: Enhanced message handling with validation Improved URL extraction and processing Better queue management and monitoring Advanced cleanup mechanisms Overall Benefits: Better code organization and maintainability Improved error handling and recovery Enhanced monitoring and reporting More robust and reliable system
This commit is contained in:
500
videoarchiver/queue/cleaners/guild_cleaner.py
Normal file
500
videoarchiver/queue/cleaners/guild_cleaner.py
Normal file
@@ -0,0 +1,500 @@
|
||||
"""Module for cleaning guild-specific queue items"""
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Set, Tuple, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from ..models import QueueItem
|
||||
|
||||
logger = logging.getLogger("GuildCleaner")
|
||||
|
||||
class GuildCleanupStrategy(Enum):
|
||||
"""Guild cleanup strategies"""
|
||||
FULL = "full" # Clear all guild items
|
||||
SELECTIVE = "selective" # Clear only specific categories
|
||||
GRACEFUL = "graceful" # Clear with grace period
|
||||
|
||||
class CleanupCategory(Enum):
|
||||
"""Categories for cleanup"""
|
||||
QUEUE = "queue"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
TRACKING = "tracking"
|
||||
|
||||
@dataclass
|
||||
class GuildCleanupConfig:
|
||||
"""Configuration for guild cleanup"""
|
||||
categories: Set[CleanupCategory] = field(default_factory=lambda: set(CleanupCategory))
|
||||
grace_period: int = 300 # 5 minutes
|
||||
preserve_completed: bool = False
|
||||
preserve_failed: bool = False
|
||||
batch_size: int = 100
|
||||
|
||||
@dataclass
|
||||
class GuildCleanupResult:
|
||||
"""Result of a guild cleanup operation"""
|
||||
guild_id: int
|
||||
timestamp: datetime
|
||||
strategy: GuildCleanupStrategy
|
||||
items_cleared: int
|
||||
categories_cleared: Set[CleanupCategory]
|
||||
initial_counts: Dict[str, int]
|
||||
final_counts: Dict[str, int]
|
||||
duration: float
|
||||
error: Optional[str] = None
|
||||
|
||||
class GuildCleanupTracker:
|
||||
"""Tracks guild cleanup operations"""
|
||||
|
||||
def __init__(self, max_history: int = 1000):
|
||||
self.max_history = max_history
|
||||
self.history: List[GuildCleanupResult] = []
|
||||
self.cleanup_counts: Dict[int, int] = {} # guild_id -> count
|
||||
self.total_items_cleared = 0
|
||||
self.last_cleanup: Optional[datetime] = None
|
||||
|
||||
def record_cleanup(self, result: GuildCleanupResult) -> None:
|
||||
"""Record a cleanup operation"""
|
||||
self.history.append(result)
|
||||
if len(self.history) > self.max_history:
|
||||
self.history.pop(0)
|
||||
|
||||
self.cleanup_counts[result.guild_id] = (
|
||||
self.cleanup_counts.get(result.guild_id, 0) + 1
|
||||
)
|
||||
self.total_items_cleared += result.items_cleared
|
||||
self.last_cleanup = result.timestamp
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get cleanup statistics"""
|
||||
return {
|
||||
"total_cleanups": len(self.history),
|
||||
"total_items_cleared": self.total_items_cleared,
|
||||
"guilds_cleaned": len(self.cleanup_counts),
|
||||
"last_cleanup": (
|
||||
self.last_cleanup.isoformat()
|
||||
if self.last_cleanup
|
||||
else None
|
||||
),
|
||||
"recent_cleanups": [
|
||||
{
|
||||
"guild_id": r.guild_id,
|
||||
"timestamp": r.timestamp.isoformat(),
|
||||
"strategy": r.strategy.value,
|
||||
"items_cleared": r.items_cleared,
|
||||
"categories": [c.value for c in r.categories_cleared]
|
||||
}
|
||||
for r in self.history[-5:] # Last 5 cleanups
|
||||
]
|
||||
}
|
||||
|
||||
class GuildCleaner:
|
||||
"""Handles cleanup of guild-specific queue items"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
strategy: GuildCleanupStrategy = GuildCleanupStrategy.GRACEFUL,
|
||||
config: Optional[GuildCleanupConfig] = None
|
||||
):
|
||||
self.strategy = strategy
|
||||
self.config = config or GuildCleanupConfig()
|
||||
self.tracker = GuildCleanupTracker()
|
||||
|
||||
async def clear_guild_items(
|
||||
self,
|
||||
guild_id: int,
|
||||
queue: List[QueueItem],
|
||||
processing: Dict[str, QueueItem],
|
||||
completed: Dict[str, QueueItem],
|
||||
failed: Dict[str, QueueItem],
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
channel_queues: Dict[int, Set[str]]
|
||||
) -> Tuple[int, Dict[str, int]]:
|
||||
"""Clear all queue items for a specific guild"""
|
||||
start_time = datetime.utcnow()
|
||||
cleared_categories = set()
|
||||
|
||||
try:
|
||||
# Get initial counts
|
||||
initial_counts = self._get_item_counts(
|
||||
guild_id,
|
||||
queue,
|
||||
processing,
|
||||
completed,
|
||||
failed
|
||||
)
|
||||
|
||||
# Get URLs for this guild
|
||||
guild_urls = guild_queues.get(guild_id, set())
|
||||
|
||||
# Clear items based on strategy
|
||||
cleared_count = 0
|
||||
if self.strategy == GuildCleanupStrategy.FULL:
|
||||
cleared_count = await self._full_cleanup(
|
||||
guild_id,
|
||||
queue,
|
||||
processing,
|
||||
completed,
|
||||
failed,
|
||||
guild_queues,
|
||||
channel_queues,
|
||||
cleared_categories
|
||||
)
|
||||
elif self.strategy == GuildCleanupStrategy.SELECTIVE:
|
||||
cleared_count = await self._selective_cleanup(
|
||||
guild_id,
|
||||
queue,
|
||||
processing,
|
||||
completed,
|
||||
failed,
|
||||
guild_queues,
|
||||
channel_queues,
|
||||
cleared_categories
|
||||
)
|
||||
else: # GRACEFUL
|
||||
cleared_count = await self._graceful_cleanup(
|
||||
guild_id,
|
||||
queue,
|
||||
processing,
|
||||
completed,
|
||||
failed,
|
||||
guild_queues,
|
||||
channel_queues,
|
||||
cleared_categories
|
||||
)
|
||||
|
||||
# Get final counts
|
||||
final_counts = self._get_item_counts(
|
||||
guild_id,
|
||||
queue,
|
||||
processing,
|
||||
completed,
|
||||
failed
|
||||
)
|
||||
|
||||
# Record cleanup result
|
||||
duration = (datetime.utcnow() - start_time).total_seconds()
|
||||
result = GuildCleanupResult(
|
||||
guild_id=guild_id,
|
||||
timestamp=datetime.utcnow(),
|
||||
strategy=self.strategy,
|
||||
items_cleared=cleared_count,
|
||||
categories_cleared=cleared_categories,
|
||||
initial_counts=initial_counts,
|
||||
final_counts=final_counts,
|
||||
duration=duration
|
||||
)
|
||||
self.tracker.record_cleanup(result)
|
||||
|
||||
logger.info(self.format_guild_cleanup_report(
|
||||
guild_id,
|
||||
initial_counts,
|
||||
final_counts,
|
||||
duration
|
||||
))
|
||||
return cleared_count, initial_counts
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing guild {guild_id} queue: {e}")
|
||||
self.tracker.record_cleanup(GuildCleanupResult(
|
||||
guild_id=guild_id,
|
||||
timestamp=datetime.utcnow(),
|
||||
strategy=self.strategy,
|
||||
items_cleared=0,
|
||||
categories_cleared=set(),
|
||||
initial_counts={},
|
||||
final_counts={},
|
||||
duration=0,
|
||||
error=str(e)
|
||||
))
|
||||
raise
|
||||
|
||||
async def _full_cleanup(
|
||||
self,
|
||||
guild_id: int,
|
||||
queue: List[QueueItem],
|
||||
processing: Dict[str, QueueItem],
|
||||
completed: Dict[str, QueueItem],
|
||||
failed: Dict[str, QueueItem],
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
channel_queues: Dict[int, Set[str]],
|
||||
cleared_categories: Set[CleanupCategory]
|
||||
) -> int:
|
||||
"""Perform full cleanup"""
|
||||
cleared_count = 0
|
||||
|
||||
# Clear from pending queue
|
||||
queue[:] = [item for item in queue if item.guild_id != guild_id]
|
||||
cleared_count += len(queue)
|
||||
cleared_categories.add(CleanupCategory.QUEUE)
|
||||
|
||||
# Clear from processing
|
||||
cleared = await self._clear_from_dict(
|
||||
processing, guild_id, 'processing'
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.PROCESSING)
|
||||
|
||||
# Clear from completed
|
||||
cleared = await self._clear_from_dict(
|
||||
completed, guild_id, 'completed'
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.COMPLETED)
|
||||
|
||||
# Clear from failed
|
||||
cleared = await self._clear_from_dict(
|
||||
failed, guild_id, 'failed'
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.FAILED)
|
||||
|
||||
# Clear tracking
|
||||
cleared = await self._clear_tracking(
|
||||
guild_id,
|
||||
guild_queues,
|
||||
channel_queues
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.TRACKING)
|
||||
|
||||
return cleared_count
|
||||
|
||||
async def _selective_cleanup(
|
||||
self,
|
||||
guild_id: int,
|
||||
queue: List[QueueItem],
|
||||
processing: Dict[str, QueueItem],
|
||||
completed: Dict[str, QueueItem],
|
||||
failed: Dict[str, QueueItem],
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
channel_queues: Dict[int, Set[str]],
|
||||
cleared_categories: Set[CleanupCategory]
|
||||
) -> int:
|
||||
"""Perform selective cleanup"""
|
||||
cleared_count = 0
|
||||
|
||||
# Clear only configured categories
|
||||
if CleanupCategory.QUEUE in self.config.categories:
|
||||
queue[:] = [item for item in queue if item.guild_id != guild_id]
|
||||
cleared_count += len(queue)
|
||||
cleared_categories.add(CleanupCategory.QUEUE)
|
||||
|
||||
if CleanupCategory.PROCESSING in self.config.categories:
|
||||
cleared = await self._clear_from_dict(
|
||||
processing, guild_id, 'processing'
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.PROCESSING)
|
||||
|
||||
if (
|
||||
CleanupCategory.COMPLETED in self.config.categories and
|
||||
not self.config.preserve_completed
|
||||
):
|
||||
cleared = await self._clear_from_dict(
|
||||
completed, guild_id, 'completed'
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.COMPLETED)
|
||||
|
||||
if (
|
||||
CleanupCategory.FAILED in self.config.categories and
|
||||
not self.config.preserve_failed
|
||||
):
|
||||
cleared = await self._clear_from_dict(
|
||||
failed, guild_id, 'failed'
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.FAILED)
|
||||
|
||||
if CleanupCategory.TRACKING in self.config.categories:
|
||||
cleared = await self._clear_tracking(
|
||||
guild_id,
|
||||
guild_queues,
|
||||
channel_queues
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.TRACKING)
|
||||
|
||||
return cleared_count
|
||||
|
||||
async def _graceful_cleanup(
|
||||
self,
|
||||
guild_id: int,
|
||||
queue: List[QueueItem],
|
||||
processing: Dict[str, QueueItem],
|
||||
completed: Dict[str, QueueItem],
|
||||
failed: Dict[str, QueueItem],
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
channel_queues: Dict[int, Set[str]],
|
||||
cleared_categories: Set[CleanupCategory]
|
||||
) -> int:
|
||||
"""Perform graceful cleanup"""
|
||||
cleared_count = 0
|
||||
cutoff_time = datetime.utcnow().timestamp() - self.config.grace_period
|
||||
|
||||
# Clear queue items beyond grace period
|
||||
queue[:] = [
|
||||
item for item in queue
|
||||
if not (
|
||||
item.guild_id == guild_id and
|
||||
item.added_at.timestamp() < cutoff_time
|
||||
)
|
||||
]
|
||||
cleared_count += len(queue)
|
||||
cleared_categories.add(CleanupCategory.QUEUE)
|
||||
|
||||
# Clear processing items beyond grace period
|
||||
for url in list(processing.keys()):
|
||||
item = processing[url]
|
||||
if (
|
||||
item.guild_id == guild_id and
|
||||
item.added_at.timestamp() < cutoff_time
|
||||
):
|
||||
processing.pop(url)
|
||||
cleared_count += 1
|
||||
cleared_categories.add(CleanupCategory.PROCESSING)
|
||||
|
||||
# Clear completed and failed based on config
|
||||
if not self.config.preserve_completed:
|
||||
cleared = await self._clear_from_dict(
|
||||
completed, guild_id, 'completed'
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.COMPLETED)
|
||||
|
||||
if not self.config.preserve_failed:
|
||||
cleared = await self._clear_from_dict(
|
||||
failed, guild_id, 'failed'
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.FAILED)
|
||||
|
||||
# Clear tracking
|
||||
cleared = await self._clear_tracking(
|
||||
guild_id,
|
||||
guild_queues,
|
||||
channel_queues
|
||||
)
|
||||
cleared_count += cleared
|
||||
cleared_categories.add(CleanupCategory.TRACKING)
|
||||
|
||||
return cleared_count
|
||||
|
||||
async def _clear_from_dict(
|
||||
self,
|
||||
items_dict: Dict[str, QueueItem],
|
||||
guild_id: int,
|
||||
category: str
|
||||
) -> int:
|
||||
"""Clear guild items from a dictionary"""
|
||||
cleared = 0
|
||||
batch_count = 0
|
||||
|
||||
for url in list(items_dict.keys()):
|
||||
if items_dict[url].guild_id == guild_id:
|
||||
items_dict.pop(url)
|
||||
cleared += 1
|
||||
batch_count += 1
|
||||
|
||||
# Process in batches
|
||||
if batch_count >= self.config.batch_size:
|
||||
await asyncio.sleep(0) # Yield to event loop
|
||||
batch_count = 0
|
||||
|
||||
logger.debug(f"Cleared {cleared} {category} items for guild {guild_id}")
|
||||
return cleared
|
||||
|
||||
async def _clear_tracking(
|
||||
self,
|
||||
guild_id: int,
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
channel_queues: Dict[int, Set[str]]
|
||||
) -> int:
|
||||
"""Clear guild tracking data"""
|
||||
cleared = 0
|
||||
guild_urls = guild_queues.get(guild_id, set())
|
||||
|
||||
# Clear guild tracking
|
||||
if guild_id in guild_queues:
|
||||
cleared += len(guild_queues[guild_id])
|
||||
guild_queues.pop(guild_id)
|
||||
|
||||
# Clear channel tracking
|
||||
await self._clear_channel_tracking(channel_queues, guild_urls)
|
||||
|
||||
return cleared
|
||||
|
||||
async def _clear_channel_tracking(
|
||||
self,
|
||||
channel_queues: Dict[int, Set[str]],
|
||||
guild_urls: Set[str]
|
||||
) -> None:
|
||||
"""Clear channel tracking for guild URLs"""
|
||||
batch_count = 0
|
||||
|
||||
for channel_id in list(channel_queues.keys()):
|
||||
channel_queues[channel_id] = {
|
||||
url for url in channel_queues[channel_id]
|
||||
if url not in guild_urls
|
||||
}
|
||||
if not channel_queues[channel_id]:
|
||||
channel_queues.pop(channel_id)
|
||||
|
||||
batch_count += 1
|
||||
if batch_count >= self.config.batch_size:
|
||||
await asyncio.sleep(0) # Yield to event loop
|
||||
batch_count = 0
|
||||
|
||||
def _get_item_counts(
|
||||
self,
|
||||
guild_id: int,
|
||||
queue: List[QueueItem],
|
||||
processing: Dict[str, QueueItem],
|
||||
completed: Dict[str, QueueItem],
|
||||
failed: Dict[str, QueueItem]
|
||||
) -> Dict[str, int]:
|
||||
"""Get item counts for a guild"""
|
||||
return {
|
||||
'queue': len([item for item in queue if item.guild_id == guild_id]),
|
||||
'processing': len([item for item in processing.values() if item.guild_id == guild_id]),
|
||||
'completed': len([item for item in completed.values() if item.guild_id == guild_id]),
|
||||
'failed': len([item for item in failed.values() if item.guild_id == guild_id])
|
||||
}
|
||||
|
||||
def format_guild_cleanup_report(
|
||||
self,
|
||||
guild_id: int,
|
||||
initial_counts: Dict[str, int],
|
||||
final_counts: Dict[str, int],
|
||||
duration: float
|
||||
) -> str:
|
||||
"""Format a guild cleanup report"""
|
||||
return (
|
||||
f"Guild {guild_id} Cleanup Results:\n"
|
||||
f"Strategy: {self.strategy.value}\n"
|
||||
f"Duration: {duration:.2f}s\n"
|
||||
f"Items:\n"
|
||||
f"- Queue: {initial_counts['queue']} -> {final_counts['queue']}\n"
|
||||
f"- Processing: {initial_counts['processing']} -> {final_counts['processing']}\n"
|
||||
f"- Completed: {initial_counts['completed']} -> {final_counts['completed']}\n"
|
||||
f"- Failed: {initial_counts['failed']} -> {final_counts['failed']}\n"
|
||||
f"Total cleared: {sum(initial_counts.values()) - sum(final_counts.values())} items"
|
||||
)
|
||||
|
||||
def get_cleaner_stats(self) -> Dict[str, Any]:
|
||||
"""Get comprehensive cleaner statistics"""
|
||||
return {
|
||||
"strategy": self.strategy.value,
|
||||
"config": {
|
||||
"categories": [c.value for c in self.config.categories],
|
||||
"grace_period": self.config.grace_period,
|
||||
"preserve_completed": self.config.preserve_completed,
|
||||
"preserve_failed": self.config.preserve_failed,
|
||||
"batch_size": self.config.batch_size
|
||||
},
|
||||
"tracker": self.tracker.get_stats()
|
||||
}
|
||||
336
videoarchiver/queue/cleaners/history_cleaner.py
Normal file
336
videoarchiver/queue/cleaners/history_cleaner.py
Normal file
@@ -0,0 +1,336 @@
|
||||
"""Module for cleaning historical queue items"""
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Optional, List, Any, Set
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from ..models import QueueItem
|
||||
|
||||
logger = logging.getLogger("HistoryCleaner")
|
||||
|
||||
class CleanupStrategy(Enum):
|
||||
"""Cleanup strategies"""
|
||||
AGGRESSIVE = "aggressive" # Remove more aggressively
|
||||
CONSERVATIVE = "conservative" # Remove conservatively
|
||||
BALANCED = "balanced" # Balance between retention and cleanup
|
||||
|
||||
class CleanupPolicy(Enum):
|
||||
"""Cleanup policies"""
|
||||
AGE = "age" # Clean based on age
|
||||
SIZE = "size" # Clean based on size
|
||||
HYBRID = "hybrid" # Consider both age and size
|
||||
|
||||
@dataclass
|
||||
class CleanupThresholds:
|
||||
"""Thresholds for cleanup operations"""
|
||||
max_history_age: int = 43200 # 12 hours
|
||||
max_completed_items: int = 10000
|
||||
max_failed_items: int = 5000
|
||||
min_retention_time: int = 3600 # 1 hour
|
||||
size_threshold: int = 100 * 1024 * 1024 # 100MB
|
||||
|
||||
@dataclass
|
||||
class CleanupResult:
|
||||
"""Result of a cleanup operation"""
|
||||
timestamp: datetime
|
||||
items_cleaned: int
|
||||
space_freed: int
|
||||
duration: float
|
||||
strategy: CleanupStrategy
|
||||
policy: CleanupPolicy
|
||||
details: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
class CleanupTracker:
|
||||
"""Tracks cleanup operations"""
|
||||
|
||||
def __init__(self, max_history: int = 1000):
|
||||
self.max_history = max_history
|
||||
self.history: List[CleanupResult] = []
|
||||
self.total_items_cleaned = 0
|
||||
self.total_space_freed = 0
|
||||
self.last_cleanup: Optional[datetime] = None
|
||||
|
||||
def record_cleanup(self, result: CleanupResult) -> None:
|
||||
"""Record a cleanup operation"""
|
||||
self.history.append(result)
|
||||
if len(self.history) > self.max_history:
|
||||
self.history.pop(0)
|
||||
|
||||
self.total_items_cleaned += result.items_cleaned
|
||||
self.total_space_freed += result.space_freed
|
||||
self.last_cleanup = result.timestamp
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get cleanup statistics"""
|
||||
return {
|
||||
"total_cleanups": len(self.history),
|
||||
"total_items_cleaned": self.total_items_cleaned,
|
||||
"total_space_freed": self.total_space_freed,
|
||||
"last_cleanup": (
|
||||
self.last_cleanup.isoformat()
|
||||
if self.last_cleanup
|
||||
else None
|
||||
),
|
||||
"recent_cleanups": [
|
||||
{
|
||||
"timestamp": r.timestamp.isoformat(),
|
||||
"items_cleaned": r.items_cleaned,
|
||||
"space_freed": r.space_freed,
|
||||
"strategy": r.strategy.value,
|
||||
"policy": r.policy.value
|
||||
}
|
||||
for r in self.history[-5:] # Last 5 cleanups
|
||||
]
|
||||
}
|
||||
|
||||
class HistoryCleaner:
|
||||
"""Handles cleanup of historical queue items"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
strategy: CleanupStrategy = CleanupStrategy.BALANCED,
|
||||
policy: CleanupPolicy = CleanupPolicy.HYBRID,
|
||||
thresholds: Optional[CleanupThresholds] = None
|
||||
):
|
||||
self.strategy = strategy
|
||||
self.policy = policy
|
||||
self.thresholds = thresholds or CleanupThresholds()
|
||||
self.tracker = CleanupTracker()
|
||||
|
||||
def _normalize_datetime(self, dt_value: any) -> datetime:
|
||||
"""Normalize a datetime value"""
|
||||
current_time = datetime.utcnow()
|
||||
|
||||
if not isinstance(dt_value, datetime):
|
||||
try:
|
||||
if isinstance(dt_value, str):
|
||||
return datetime.fromisoformat(dt_value)
|
||||
else:
|
||||
return current_time
|
||||
except (ValueError, TypeError):
|
||||
return current_time
|
||||
return dt_value
|
||||
|
||||
async def cleanup_completed(
|
||||
self,
|
||||
completed: Dict[str, QueueItem],
|
||||
cleanup_cutoff: datetime
|
||||
) -> int:
|
||||
"""Clean up completed items"""
|
||||
start_time = datetime.utcnow()
|
||||
items_cleaned = 0
|
||||
space_freed = 0
|
||||
completed_count = len(completed)
|
||||
|
||||
try:
|
||||
# Determine cleanup approach based on strategy and policy
|
||||
if self.policy == CleanupPolicy.SIZE:
|
||||
items_to_clean = self._get_items_by_size(completed)
|
||||
elif self.policy == CleanupPolicy.HYBRID:
|
||||
items_to_clean = self._get_items_hybrid(completed, cleanup_cutoff)
|
||||
else: # AGE policy
|
||||
items_to_clean = self._get_items_by_age(completed, cleanup_cutoff)
|
||||
|
||||
# Clean items
|
||||
for url in items_to_clean:
|
||||
try:
|
||||
item = completed[url]
|
||||
space_freed += self._estimate_item_size(item)
|
||||
completed.pop(url)
|
||||
items_cleaned += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning completed item {url}: {e}")
|
||||
completed.pop(url)
|
||||
items_cleaned += 1
|
||||
|
||||
# Record cleanup
|
||||
self._record_cleanup_result(
|
||||
items_cleaned,
|
||||
space_freed,
|
||||
start_time,
|
||||
"completed"
|
||||
)
|
||||
|
||||
logger.debug(f"Cleaned {items_cleaned} completed items")
|
||||
return items_cleaned
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during completed items cleanup: {e}")
|
||||
return 0
|
||||
|
||||
async def cleanup_failed(
|
||||
self,
|
||||
failed: Dict[str, QueueItem],
|
||||
cleanup_cutoff: datetime
|
||||
) -> int:
|
||||
"""Clean up failed items"""
|
||||
start_time = datetime.utcnow()
|
||||
items_cleaned = 0
|
||||
space_freed = 0
|
||||
failed_count = len(failed)
|
||||
|
||||
try:
|
||||
# Determine cleanup approach
|
||||
if self.policy == CleanupPolicy.SIZE:
|
||||
items_to_clean = self._get_items_by_size(failed)
|
||||
elif self.policy == CleanupPolicy.HYBRID:
|
||||
items_to_clean = self._get_items_hybrid(failed, cleanup_cutoff)
|
||||
else: # AGE policy
|
||||
items_to_clean = self._get_items_by_age(failed, cleanup_cutoff)
|
||||
|
||||
# Clean items
|
||||
for url in items_to_clean:
|
||||
try:
|
||||
item = failed[url]
|
||||
space_freed += self._estimate_item_size(item)
|
||||
failed.pop(url)
|
||||
items_cleaned += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning failed item {url}: {e}")
|
||||
failed.pop(url)
|
||||
items_cleaned += 1
|
||||
|
||||
# Record cleanup
|
||||
self._record_cleanup_result(
|
||||
items_cleaned,
|
||||
space_freed,
|
||||
start_time,
|
||||
"failed"
|
||||
)
|
||||
|
||||
logger.debug(f"Cleaned {items_cleaned} failed items")
|
||||
return items_cleaned
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during failed items cleanup: {e}")
|
||||
return 0
|
||||
|
||||
def _get_items_by_age(
|
||||
self,
|
||||
items: Dict[str, QueueItem],
|
||||
cutoff: datetime
|
||||
) -> Set[str]:
|
||||
"""Get items to clean based on age"""
|
||||
to_clean = set()
|
||||
|
||||
for url, item in items.items():
|
||||
item.added_at = self._normalize_datetime(item.added_at)
|
||||
if item.added_at < cutoff:
|
||||
to_clean.add(url)
|
||||
|
||||
return to_clean
|
||||
|
||||
def _get_items_by_size(self, items: Dict[str, QueueItem]) -> Set[str]:
|
||||
"""Get items to clean based on size"""
|
||||
to_clean = set()
|
||||
total_size = 0
|
||||
|
||||
# Sort items by size estimate
|
||||
sorted_items = sorted(
|
||||
items.items(),
|
||||
key=lambda x: self._estimate_item_size(x[1]),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
for url, item in sorted_items:
|
||||
total_size += self._estimate_item_size(item)
|
||||
if total_size > self.thresholds.size_threshold:
|
||||
to_clean.add(url)
|
||||
|
||||
return to_clean
|
||||
|
||||
def _get_items_hybrid(
|
||||
self,
|
||||
items: Dict[str, QueueItem],
|
||||
cutoff: datetime
|
||||
) -> Set[str]:
|
||||
"""Get items to clean using hybrid approach"""
|
||||
by_age = self._get_items_by_age(items, cutoff)
|
||||
by_size = self._get_items_by_size(items)
|
||||
|
||||
if self.strategy == CleanupStrategy.AGGRESSIVE:
|
||||
return by_age.union(by_size)
|
||||
elif self.strategy == CleanupStrategy.CONSERVATIVE:
|
||||
return by_age.intersection(by_size)
|
||||
else: # BALANCED
|
||||
return by_age
|
||||
|
||||
def _estimate_item_size(self, item: QueueItem) -> int:
|
||||
"""Estimate size of an item in bytes"""
|
||||
# This could be enhanced with actual file size tracking
|
||||
base_size = 1024 # 1KB base size
|
||||
return base_size * (item.retry_count + 1)
|
||||
|
||||
def _record_cleanup_result(
|
||||
self,
|
||||
items_cleaned: int,
|
||||
space_freed: int,
|
||||
start_time: datetime,
|
||||
cleanup_type: str
|
||||
) -> None:
|
||||
"""Record cleanup result"""
|
||||
duration = (datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
result = CleanupResult(
|
||||
timestamp=datetime.utcnow(),
|
||||
items_cleaned=items_cleaned,
|
||||
space_freed=space_freed,
|
||||
duration=duration,
|
||||
strategy=self.strategy,
|
||||
policy=self.policy,
|
||||
details={"type": cleanup_type}
|
||||
)
|
||||
|
||||
self.tracker.record_cleanup(result)
|
||||
|
||||
def get_cleanup_cutoff(self) -> datetime:
|
||||
"""Get the cutoff time for cleanup"""
|
||||
if self.strategy == CleanupStrategy.AGGRESSIVE:
|
||||
age = self.thresholds.max_history_age // 2
|
||||
elif self.strategy == CleanupStrategy.CONSERVATIVE:
|
||||
age = self.thresholds.max_history_age * 2
|
||||
else: # BALANCED
|
||||
age = self.thresholds.max_history_age
|
||||
|
||||
return datetime.utcnow() - timedelta(seconds=max(
|
||||
age,
|
||||
self.thresholds.min_retention_time
|
||||
))
|
||||
|
||||
def format_cleanup_report(
|
||||
self,
|
||||
initial_completed: int,
|
||||
final_completed: int,
|
||||
initial_failed: int,
|
||||
final_failed: int
|
||||
) -> str:
|
||||
"""Format a cleanup report"""
|
||||
stats = self.tracker.get_stats()
|
||||
|
||||
return (
|
||||
f"History Cleanup Results:\n"
|
||||
f"- Completed items: {initial_completed} -> {final_completed}\n"
|
||||
f"- Failed items: {initial_failed} -> {final_failed}\n"
|
||||
f"- Total items cleaned: {(initial_completed - final_completed) + (initial_failed - final_failed)}\n"
|
||||
f"- Space freed: {stats['total_space_freed']} bytes\n"
|
||||
f"- Strategy: {self.strategy.value}\n"
|
||||
f"- Policy: {self.policy.value}\n"
|
||||
f"- Total cleanups: {stats['total_cleanups']}"
|
||||
)
|
||||
|
||||
def get_cleaner_stats(self) -> Dict[str, Any]:
|
||||
"""Get comprehensive cleaner statistics"""
|
||||
return {
|
||||
"strategy": self.strategy.value,
|
||||
"policy": self.policy.value,
|
||||
"thresholds": {
|
||||
"max_history_age": self.thresholds.max_history_age,
|
||||
"max_completed_items": self.thresholds.max_completed_items,
|
||||
"max_failed_items": self.thresholds.max_failed_items,
|
||||
"min_retention_time": self.thresholds.min_retention_time,
|
||||
"size_threshold": self.thresholds.size_threshold
|
||||
},
|
||||
"tracker": self.tracker.get_stats()
|
||||
}
|
||||
452
videoarchiver/queue/cleaners/tracking_cleaner.py
Normal file
452
videoarchiver/queue/cleaners/tracking_cleaner.py
Normal file
@@ -0,0 +1,452 @@
|
||||
"""Module for cleaning queue tracking data"""
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Set, Tuple, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from ..models import QueueItem
|
||||
|
||||
logger = logging.getLogger("TrackingCleaner")
|
||||
|
||||
class TrackingCleanupStrategy(Enum):
|
||||
"""Tracking cleanup strategies"""
|
||||
AGGRESSIVE = "aggressive" # Remove all invalid entries
|
||||
CONSERVATIVE = "conservative" # Keep recent invalid entries
|
||||
BALANCED = "balanced" # Balance between cleanup and retention
|
||||
|
||||
class TrackingType(Enum):
|
||||
"""Types of tracking data"""
|
||||
GUILD = "guild"
|
||||
CHANNEL = "channel"
|
||||
URL = "url"
|
||||
|
||||
@dataclass
|
||||
class TrackingCleanupConfig:
|
||||
"""Configuration for tracking cleanup"""
|
||||
batch_size: int = 100
|
||||
retention_period: int = 3600 # 1 hour
|
||||
validate_urls: bool = True
|
||||
cleanup_empty: bool = True
|
||||
max_invalid_ratio: float = 0.5 # 50% invalid threshold
|
||||
|
||||
@dataclass
|
||||
class TrackingCleanupResult:
|
||||
"""Result of a tracking cleanup operation"""
|
||||
timestamp: datetime
|
||||
strategy: TrackingCleanupStrategy
|
||||
items_cleaned: int
|
||||
guilds_cleaned: int
|
||||
channels_cleaned: int
|
||||
duration: float
|
||||
initial_counts: Dict[str, int]
|
||||
final_counts: Dict[str, int]
|
||||
error: Optional[str] = None
|
||||
|
||||
class TrackingValidator:
|
||||
"""Validates tracking data"""
|
||||
|
||||
@staticmethod
|
||||
def validate_url(url: str) -> bool:
|
||||
"""Validate URL format"""
|
||||
try:
|
||||
return bool(url and isinstance(url, str) and "://" in url)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def validate_id(id_value: int) -> bool:
|
||||
"""Validate ID format"""
|
||||
try:
|
||||
return bool(isinstance(id_value, int) and id_value > 0)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
class TrackingCleanupTracker:
|
||||
"""Tracks cleanup operations"""
|
||||
|
||||
def __init__(self, max_history: int = 1000):
|
||||
self.max_history = max_history
|
||||
self.history: List[TrackingCleanupResult] = []
|
||||
self.total_items_cleaned = 0
|
||||
self.total_guilds_cleaned = 0
|
||||
self.total_channels_cleaned = 0
|
||||
self.last_cleanup: Optional[datetime] = None
|
||||
|
||||
def record_cleanup(self, result: TrackingCleanupResult) -> None:
|
||||
"""Record a cleanup operation"""
|
||||
self.history.append(result)
|
||||
if len(self.history) > self.max_history:
|
||||
self.history.pop(0)
|
||||
|
||||
self.total_items_cleaned += result.items_cleaned
|
||||
self.total_guilds_cleaned += result.guilds_cleaned
|
||||
self.total_channels_cleaned += result.channels_cleaned
|
||||
self.last_cleanup = result.timestamp
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get cleanup statistics"""
|
||||
return {
|
||||
"total_cleanups": len(self.history),
|
||||
"total_items_cleaned": self.total_items_cleaned,
|
||||
"total_guilds_cleaned": self.total_guilds_cleaned,
|
||||
"total_channels_cleaned": self.total_channels_cleaned,
|
||||
"last_cleanup": (
|
||||
self.last_cleanup.isoformat()
|
||||
if self.last_cleanup
|
||||
else None
|
||||
),
|
||||
"recent_cleanups": [
|
||||
{
|
||||
"timestamp": r.timestamp.isoformat(),
|
||||
"strategy": r.strategy.value,
|
||||
"items_cleaned": r.items_cleaned,
|
||||
"guilds_cleaned": r.guilds_cleaned,
|
||||
"channels_cleaned": r.channels_cleaned,
|
||||
"duration": r.duration
|
||||
}
|
||||
for r in self.history[-5:] # Last 5 cleanups
|
||||
]
|
||||
}
|
||||
|
||||
class TrackingCleaner:
|
||||
"""Handles cleanup of queue tracking data"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
strategy: TrackingCleanupStrategy = TrackingCleanupStrategy.BALANCED,
|
||||
config: Optional[TrackingCleanupConfig] = None
|
||||
):
|
||||
self.strategy = strategy
|
||||
self.config = config or TrackingCleanupConfig()
|
||||
self.tracker = TrackingCleanupTracker()
|
||||
self.validator = TrackingValidator()
|
||||
|
||||
async def cleanup_tracking(
|
||||
self,
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
channel_queues: Dict[int, Set[str]],
|
||||
queue: List[QueueItem],
|
||||
processing: Dict[str, QueueItem]
|
||||
) -> Tuple[int, Dict[str, int]]:
|
||||
"""Clean up tracking data"""
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
try:
|
||||
# Get initial counts
|
||||
initial_counts = self._get_tracking_counts(
|
||||
guild_queues,
|
||||
channel_queues
|
||||
)
|
||||
|
||||
# Get valid URLs
|
||||
valid_urls = self._get_valid_urls(queue, processing)
|
||||
|
||||
# Clean tracking data based on strategy
|
||||
items_cleaned = 0
|
||||
guilds_cleaned = 0
|
||||
channels_cleaned = 0
|
||||
|
||||
if self.strategy == TrackingCleanupStrategy.AGGRESSIVE:
|
||||
cleaned = await self._aggressive_cleanup(
|
||||
guild_queues,
|
||||
channel_queues,
|
||||
valid_urls
|
||||
)
|
||||
elif self.strategy == TrackingCleanupStrategy.CONSERVATIVE:
|
||||
cleaned = await self._conservative_cleanup(
|
||||
guild_queues,
|
||||
channel_queues,
|
||||
valid_urls
|
||||
)
|
||||
else: # BALANCED
|
||||
cleaned = await self._balanced_cleanup(
|
||||
guild_queues,
|
||||
channel_queues,
|
||||
valid_urls
|
||||
)
|
||||
|
||||
items_cleaned = cleaned[0]
|
||||
guilds_cleaned = cleaned[1]
|
||||
channels_cleaned = cleaned[2]
|
||||
|
||||
# Get final counts
|
||||
final_counts = self._get_tracking_counts(
|
||||
guild_queues,
|
||||
channel_queues
|
||||
)
|
||||
|
||||
# Record cleanup result
|
||||
duration = (datetime.utcnow() - start_time).total_seconds()
|
||||
result = TrackingCleanupResult(
|
||||
timestamp=datetime.utcnow(),
|
||||
strategy=self.strategy,
|
||||
items_cleaned=items_cleaned,
|
||||
guilds_cleaned=guilds_cleaned,
|
||||
channels_cleaned=channels_cleaned,
|
||||
duration=duration,
|
||||
initial_counts=initial_counts,
|
||||
final_counts=final_counts
|
||||
)
|
||||
self.tracker.record_cleanup(result)
|
||||
|
||||
logger.info(self.format_tracking_cleanup_report(
|
||||
initial_counts,
|
||||
final_counts,
|
||||
duration
|
||||
))
|
||||
return items_cleaned, initial_counts
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning tracking data: {e}")
|
||||
self.tracker.record_cleanup(TrackingCleanupResult(
|
||||
timestamp=datetime.utcnow(),
|
||||
strategy=self.strategy,
|
||||
items_cleaned=0,
|
||||
guilds_cleaned=0,
|
||||
channels_cleaned=0,
|
||||
duration=0,
|
||||
initial_counts={},
|
||||
final_counts={},
|
||||
error=str(e)
|
||||
))
|
||||
raise
|
||||
|
||||
async def _aggressive_cleanup(
|
||||
self,
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
channel_queues: Dict[int, Set[str]],
|
||||
valid_urls: Set[str]
|
||||
) -> Tuple[int, int, int]:
|
||||
"""Perform aggressive cleanup"""
|
||||
items_cleaned = 0
|
||||
guilds_cleaned = 0
|
||||
channels_cleaned = 0
|
||||
|
||||
# Clean guild tracking
|
||||
guild_cleaned = await self._cleanup_guild_tracking(
|
||||
guild_queues,
|
||||
valid_urls,
|
||||
validate_all=True
|
||||
)
|
||||
items_cleaned += guild_cleaned[0]
|
||||
guilds_cleaned += guild_cleaned[1]
|
||||
|
||||
# Clean channel tracking
|
||||
channel_cleaned = await self._cleanup_channel_tracking(
|
||||
channel_queues,
|
||||
valid_urls,
|
||||
validate_all=True
|
||||
)
|
||||
items_cleaned += channel_cleaned[0]
|
||||
channels_cleaned += channel_cleaned[1]
|
||||
|
||||
return items_cleaned, guilds_cleaned, channels_cleaned
|
||||
|
||||
async def _conservative_cleanup(
|
||||
self,
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
channel_queues: Dict[int, Set[str]],
|
||||
valid_urls: Set[str]
|
||||
) -> Tuple[int, int, int]:
|
||||
"""Perform conservative cleanup"""
|
||||
items_cleaned = 0
|
||||
guilds_cleaned = 0
|
||||
channels_cleaned = 0
|
||||
|
||||
# Only clean if invalid ratio exceeds threshold
|
||||
for guild_id, urls in list(guild_queues.items()):
|
||||
invalid_ratio = len(urls - valid_urls) / len(urls) if urls else 0
|
||||
if invalid_ratio > self.config.max_invalid_ratio:
|
||||
cleaned = await self._cleanup_guild_tracking(
|
||||
{guild_id: urls},
|
||||
valid_urls,
|
||||
validate_all=False
|
||||
)
|
||||
items_cleaned += cleaned[0]
|
||||
guilds_cleaned += cleaned[1]
|
||||
|
||||
for channel_id, urls in list(channel_queues.items()):
|
||||
invalid_ratio = len(urls - valid_urls) / len(urls) if urls else 0
|
||||
if invalid_ratio > self.config.max_invalid_ratio:
|
||||
cleaned = await self._cleanup_channel_tracking(
|
||||
{channel_id: urls},
|
||||
valid_urls,
|
||||
validate_all=False
|
||||
)
|
||||
items_cleaned += cleaned[0]
|
||||
channels_cleaned += cleaned[1]
|
||||
|
||||
return items_cleaned, guilds_cleaned, channels_cleaned
|
||||
|
||||
async def _balanced_cleanup(
|
||||
self,
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
channel_queues: Dict[int, Set[str]],
|
||||
valid_urls: Set[str]
|
||||
) -> Tuple[int, int, int]:
|
||||
"""Perform balanced cleanup"""
|
||||
items_cleaned = 0
|
||||
guilds_cleaned = 0
|
||||
channels_cleaned = 0
|
||||
|
||||
# Clean guild tracking with validation
|
||||
guild_cleaned = await self._cleanup_guild_tracking(
|
||||
guild_queues,
|
||||
valid_urls,
|
||||
validate_all=self.config.validate_urls
|
||||
)
|
||||
items_cleaned += guild_cleaned[0]
|
||||
guilds_cleaned += guild_cleaned[1]
|
||||
|
||||
# Clean channel tracking with validation
|
||||
channel_cleaned = await self._cleanup_channel_tracking(
|
||||
channel_queues,
|
||||
valid_urls,
|
||||
validate_all=self.config.validate_urls
|
||||
)
|
||||
items_cleaned += channel_cleaned[0]
|
||||
channels_cleaned += channel_cleaned[1]
|
||||
|
||||
return items_cleaned, guilds_cleaned, channels_cleaned
|
||||
|
||||
async def _cleanup_guild_tracking(
|
||||
self,
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
valid_urls: Set[str],
|
||||
validate_all: bool
|
||||
) -> Tuple[int, int]:
|
||||
"""Clean up guild tracking data"""
|
||||
items_cleaned = 0
|
||||
guilds_cleaned = 0
|
||||
batch_count = 0
|
||||
|
||||
for guild_id in list(guild_queues.keys()):
|
||||
if not self.validator.validate_id(guild_id):
|
||||
guild_queues.pop(guild_id)
|
||||
guilds_cleaned += 1
|
||||
continue
|
||||
|
||||
original_size = len(guild_queues[guild_id])
|
||||
guild_queues[guild_id] = {
|
||||
url for url in guild_queues[guild_id]
|
||||
if (
|
||||
(not validate_all or self.validator.validate_url(url)) and
|
||||
url in valid_urls
|
||||
)
|
||||
}
|
||||
items_cleaned += original_size - len(guild_queues[guild_id])
|
||||
|
||||
if self.config.cleanup_empty and not guild_queues[guild_id]:
|
||||
guild_queues.pop(guild_id)
|
||||
guilds_cleaned += 1
|
||||
|
||||
batch_count += 1
|
||||
if batch_count >= self.config.batch_size:
|
||||
await asyncio.sleep(0) # Yield to event loop
|
||||
batch_count = 0
|
||||
|
||||
logger.debug(f"Cleaned {items_cleaned} guild tracking items")
|
||||
return items_cleaned, guilds_cleaned
|
||||
|
||||
async def _cleanup_channel_tracking(
|
||||
self,
|
||||
channel_queues: Dict[int, Set[str]],
|
||||
valid_urls: Set[str],
|
||||
validate_all: bool
|
||||
) -> Tuple[int, int]:
|
||||
"""Clean up channel tracking data"""
|
||||
items_cleaned = 0
|
||||
channels_cleaned = 0
|
||||
batch_count = 0
|
||||
|
||||
for channel_id in list(channel_queues.keys()):
|
||||
if not self.validator.validate_id(channel_id):
|
||||
channel_queues.pop(channel_id)
|
||||
channels_cleaned += 1
|
||||
continue
|
||||
|
||||
original_size = len(channel_queues[channel_id])
|
||||
channel_queues[channel_id] = {
|
||||
url for url in channel_queues[channel_id]
|
||||
if (
|
||||
(not validate_all or self.validator.validate_url(url)) and
|
||||
url in valid_urls
|
||||
)
|
||||
}
|
||||
items_cleaned += original_size - len(channel_queues[channel_id])
|
||||
|
||||
if self.config.cleanup_empty and not channel_queues[channel_id]:
|
||||
channel_queues.pop(channel_id)
|
||||
channels_cleaned += 1
|
||||
|
||||
batch_count += 1
|
||||
if batch_count >= self.config.batch_size:
|
||||
await asyncio.sleep(0) # Yield to event loop
|
||||
batch_count = 0
|
||||
|
||||
logger.debug(f"Cleaned {items_cleaned} channel tracking items")
|
||||
return items_cleaned, channels_cleaned
|
||||
|
||||
def _get_valid_urls(
|
||||
self,
|
||||
queue: List[QueueItem],
|
||||
processing: Dict[str, QueueItem]
|
||||
) -> Set[str]:
|
||||
"""Get set of valid URLs"""
|
||||
valid_urls = {item.url for item in queue}
|
||||
valid_urls.update(processing.keys())
|
||||
return valid_urls
|
||||
|
||||
def _get_tracking_counts(
|
||||
self,
|
||||
guild_queues: Dict[int, Set[str]],
|
||||
channel_queues: Dict[int, Set[str]]
|
||||
) -> Dict[str, int]:
|
||||
"""Get tracking data counts"""
|
||||
return {
|
||||
'guilds': len(guild_queues),
|
||||
'channels': len(channel_queues),
|
||||
'guild_urls': sum(len(urls) for urls in guild_queues.values()),
|
||||
'channel_urls': sum(len(urls) for urls in channel_queues.values())
|
||||
}
|
||||
|
||||
def format_tracking_cleanup_report(
|
||||
self,
|
||||
initial_counts: Dict[str, int],
|
||||
final_counts: Dict[str, int],
|
||||
duration: float
|
||||
) -> str:
|
||||
"""Format a tracking cleanup report"""
|
||||
total_cleaned = (
|
||||
(initial_counts['guild_urls'] - final_counts['guild_urls']) +
|
||||
(initial_counts['channel_urls'] - final_counts['channel_urls'])
|
||||
)
|
||||
|
||||
return (
|
||||
f"Tracking Cleanup Results:\n"
|
||||
f"Strategy: {self.strategy.value}\n"
|
||||
f"Duration: {duration:.2f}s\n"
|
||||
f"Items:\n"
|
||||
f"- Guild Queues: {initial_counts['guilds']} -> {final_counts['guilds']}\n"
|
||||
f"- Channel Queues: {initial_counts['channels']} -> {final_counts['channels']}\n"
|
||||
f"- Guild URLs: {initial_counts['guild_urls']} -> {final_counts['guild_urls']}\n"
|
||||
f"- Channel URLs: {initial_counts['channel_urls']} -> {final_counts['channel_urls']}\n"
|
||||
f"Total items cleaned: {total_cleaned}"
|
||||
)
|
||||
|
||||
def get_cleaner_stats(self) -> Dict[str, Any]:
|
||||
"""Get comprehensive cleaner statistics"""
|
||||
return {
|
||||
"strategy": self.strategy.value,
|
||||
"config": {
|
||||
"batch_size": self.config.batch_size,
|
||||
"retention_period": self.config.retention_period,
|
||||
"validate_urls": self.config.validate_urls,
|
||||
"cleanup_empty": self.config.cleanup_empty,
|
||||
"max_invalid_ratio": self.config.max_invalid_ratio
|
||||
},
|
||||
"tracker": self.tracker.get_stats()
|
||||
}
|
||||
Reference in New Issue
Block a user