mirror of
https://github.com/pacnpal/Pac-cogs.git
synced 2025-12-20 10:51:05 -05:00
Eliminating duplicate queue processing that was causing race conditions
Adding proper processing state tracking and timing Implementing more aggressive monitoring (1-minute intervals) Adding activity tracking to detect and recover from hung states Improving error handling and logging throughout the system Reducing timeouts and deadlock thresholds for faster recovery
This commit is contained in:
@@ -40,11 +40,11 @@ from .events import setup_events
|
|||||||
|
|
||||||
logger = logging.getLogger("VideoArchiver")
|
logger = logging.getLogger("VideoArchiver")
|
||||||
|
|
||||||
# Constants for timeouts - extremely aggressive timeouts
|
# Constants for timeouts - more reasonable timeouts
|
||||||
UNLOAD_TIMEOUT = 2 # seconds
|
UNLOAD_TIMEOUT = 30 # seconds
|
||||||
CLEANUP_TIMEOUT = 1 # seconds
|
CLEANUP_TIMEOUT = 15 # seconds
|
||||||
INIT_TIMEOUT = 5 # seconds
|
INIT_TIMEOUT = 60 # seconds
|
||||||
COMPONENT_INIT_TIMEOUT = 2 # seconds
|
COMPONENT_INIT_TIMEOUT = 30 # seconds
|
||||||
|
|
||||||
class VideoArchiver(GroupCog):
|
class VideoArchiver(GroupCog):
|
||||||
"""Archive videos from Discord channels"""
|
"""Archive videos from Discord channels"""
|
||||||
@@ -73,6 +73,9 @@ class VideoArchiver(GroupCog):
|
|||||||
self._cleanup_task: Optional[asyncio.Task] = None
|
self._cleanup_task: Optional[asyncio.Task] = None
|
||||||
self._unloading = False
|
self._unloading = False
|
||||||
self.db = None
|
self.db = None
|
||||||
|
self.queue_manager = None
|
||||||
|
self.processor = None
|
||||||
|
self.components = {}
|
||||||
|
|
||||||
# Start initialization
|
# Start initialization
|
||||||
self._init_task = asyncio.create_task(self._initialize())
|
self._init_task = asyncio.create_task(self._initialize())
|
||||||
@@ -335,10 +338,12 @@ class VideoArchiver(GroupCog):
|
|||||||
"""Handle initialization task completion"""
|
"""Handle initialization task completion"""
|
||||||
try:
|
try:
|
||||||
task.result()
|
task.result()
|
||||||
|
logger.info("Initialization completed successfully")
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
pass
|
logger.warning("Initialization was cancelled")
|
||||||
|
asyncio.create_task(self._cleanup())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Initialization failed: {str(e)}")
|
logger.error(f"Initialization failed: {str(e)}\n{traceback.format_exc()}")
|
||||||
asyncio.create_task(self._cleanup())
|
asyncio.create_task(self._cleanup())
|
||||||
|
|
||||||
async def _initialize(self) -> None:
|
async def _initialize(self) -> None:
|
||||||
@@ -348,11 +353,13 @@ class VideoArchiver(GroupCog):
|
|||||||
config = Config.get_conf(self, identifier=855847, force_registration=True)
|
config = Config.get_conf(self, identifier=855847, force_registration=True)
|
||||||
config.register_guild(**self.default_guild_settings)
|
config.register_guild(**self.default_guild_settings)
|
||||||
self.config_manager = ConfigManager(config)
|
self.config_manager = ConfigManager(config)
|
||||||
|
logger.info("Config manager initialized")
|
||||||
|
|
||||||
# Set up paths
|
# Set up paths
|
||||||
self.data_path = Path(data_manager.cog_data_path(self))
|
self.data_path = Path(data_manager.cog_data_path(self))
|
||||||
self.download_path = self.data_path / "downloads"
|
self.download_path = self.data_path / "downloads"
|
||||||
self.download_path.mkdir(parents=True, exist_ok=True)
|
self.download_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
logger.info("Paths initialized")
|
||||||
|
|
||||||
# Clean existing downloads with timeout
|
# Clean existing downloads with timeout
|
||||||
try:
|
try:
|
||||||
@@ -360,31 +367,15 @@ class VideoArchiver(GroupCog):
|
|||||||
cleanup_downloads(str(self.download_path)),
|
cleanup_downloads(str(self.download_path)),
|
||||||
timeout=CLEANUP_TIMEOUT
|
timeout=CLEANUP_TIMEOUT
|
||||||
)
|
)
|
||||||
|
logger.info("Downloads cleaned up")
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.warning("Download cleanup timed out, continuing initialization")
|
logger.warning("Download cleanup timed out, continuing initialization")
|
||||||
|
|
||||||
# Initialize shared FFmpeg manager
|
# Initialize shared FFmpeg manager
|
||||||
self.ffmpeg_mgr = FFmpegManager()
|
self.ffmpeg_mgr = FFmpegManager()
|
||||||
logger.info("Initialized shared FFmpeg manager")
|
logger.info("FFmpeg manager initialized")
|
||||||
|
|
||||||
# Initialize components dict first
|
# Initialize queue manager before components
|
||||||
self.components: Dict[int, Dict[str, Any]] = {}
|
|
||||||
|
|
||||||
# Initialize components for existing guilds with timeout
|
|
||||||
for guild in self.bot.guilds:
|
|
||||||
try:
|
|
||||||
await asyncio.wait_for(
|
|
||||||
initialize_guild_components(self, guild.id),
|
|
||||||
timeout=COMPONENT_INIT_TIMEOUT
|
|
||||||
)
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
logger.error(f"Guild {guild.id} initialization timed out")
|
|
||||||
continue
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to initialize guild {guild.id}: {str(e)}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Initialize queue manager after components are ready
|
|
||||||
queue_path = self.data_path / "queue_state.json"
|
queue_path = self.data_path / "queue_state.json"
|
||||||
queue_path.parent.mkdir(parents=True, exist_ok=True)
|
queue_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
self.queue_manager = EnhancedVideoQueueManager(
|
self.queue_manager = EnhancedVideoQueueManager(
|
||||||
@@ -395,9 +386,7 @@ class VideoArchiver(GroupCog):
|
|||||||
max_history_age=86400,
|
max_history_age=86400,
|
||||||
persistence_path=str(queue_path),
|
persistence_path=str(queue_path),
|
||||||
)
|
)
|
||||||
|
logger.info("Queue manager initialized")
|
||||||
# Initialize update checker
|
|
||||||
self.update_checker = UpdateChecker(self.bot, self.config_manager)
|
|
||||||
|
|
||||||
# Initialize processor with queue manager and shared FFmpeg manager
|
# Initialize processor with queue manager and shared FFmpeg manager
|
||||||
self.processor = VideoProcessor(
|
self.processor = VideoProcessor(
|
||||||
@@ -408,6 +397,26 @@ class VideoArchiver(GroupCog):
|
|||||||
ffmpeg_mgr=self.ffmpeg_mgr,
|
ffmpeg_mgr=self.ffmpeg_mgr,
|
||||||
db=self.db, # Pass database to processor (None by default)
|
db=self.db, # Pass database to processor (None by default)
|
||||||
)
|
)
|
||||||
|
logger.info("Video processor initialized")
|
||||||
|
|
||||||
|
# Initialize components for existing guilds with timeout
|
||||||
|
for guild in self.bot.guilds:
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(
|
||||||
|
initialize_guild_components(self, guild.id),
|
||||||
|
timeout=COMPONENT_INIT_TIMEOUT
|
||||||
|
)
|
||||||
|
logger.info(f"Guild {guild.id} components initialized")
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.error(f"Guild {guild.id} initialization timed out")
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to initialize guild {guild.id}: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Initialize update checker
|
||||||
|
self.update_checker = UpdateChecker(self.bot, self.config_manager)
|
||||||
|
logger.info("Update checker initialized")
|
||||||
|
|
||||||
# Start update checker with timeout
|
# Start update checker with timeout
|
||||||
try:
|
try:
|
||||||
@@ -415,15 +424,20 @@ class VideoArchiver(GroupCog):
|
|||||||
self.update_checker.start(),
|
self.update_checker.start(),
|
||||||
timeout=INIT_TIMEOUT
|
timeout=INIT_TIMEOUT
|
||||||
)
|
)
|
||||||
|
logger.info("Update checker started")
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.warning("Update checker start timed out")
|
logger.warning("Update checker start timed out")
|
||||||
|
|
||||||
|
# Start queue processing
|
||||||
|
await self.queue_manager.process_queue(self.processor.process_video)
|
||||||
|
logger.info("Queue processing started")
|
||||||
|
|
||||||
# Set ready flag
|
# Set ready flag
|
||||||
self.ready.set()
|
self.ready.set()
|
||||||
logger.info("VideoArchiver initialization completed successfully")
|
logger.info("VideoArchiver initialization completed successfully")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Critical error during initialization: {str(e)}")
|
logger.error(f"Critical error during initialization: {str(e)}\n{traceback.format_exc()}")
|
||||||
# Force cleanup on initialization error
|
# Force cleanup on initialization error
|
||||||
try:
|
try:
|
||||||
await asyncio.wait_for(
|
await asyncio.wait_for(
|
||||||
@@ -435,22 +449,24 @@ class VideoArchiver(GroupCog):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
async def cog_load(self) -> None:
|
async def cog_load(self) -> None:
|
||||||
"""Handle cog loading with aggressive timeout"""
|
"""Handle cog loading with proper timeout"""
|
||||||
try:
|
try:
|
||||||
# Create initialization task
|
# Create initialization task
|
||||||
init_task = asyncio.create_task(self._initialize())
|
init_task = asyncio.create_task(self._initialize())
|
||||||
try:
|
try:
|
||||||
# Wait for initialization with timeout
|
# Wait for initialization with timeout
|
||||||
await asyncio.wait_for(init_task, timeout=INIT_TIMEOUT)
|
await asyncio.wait_for(init_task, timeout=INIT_TIMEOUT)
|
||||||
|
logger.info("Initialization completed within timeout")
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.error("Initialization timed out, forcing cleanup")
|
logger.error("Initialization timed out, forcing cleanup")
|
||||||
init_task.cancel()
|
init_task.cancel()
|
||||||
await force_cleanup_resources(self)
|
await force_cleanup_resources(self)
|
||||||
raise ProcessingError("Cog initialization timed out")
|
raise ProcessingError("Cog initialization timed out")
|
||||||
|
|
||||||
# Wait for ready flag with short timeout
|
# Wait for ready flag with timeout
|
||||||
try:
|
try:
|
||||||
await asyncio.wait_for(self.ready.wait(), timeout=INIT_TIMEOUT)
|
await asyncio.wait_for(self.ready.wait(), timeout=INIT_TIMEOUT)
|
||||||
|
logger.info("Ready flag set within timeout")
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
await force_cleanup_resources(self)
|
await force_cleanup_resources(self)
|
||||||
raise ProcessingError("Ready flag wait timed out")
|
raise ProcessingError("Ready flag wait timed out")
|
||||||
@@ -467,34 +483,36 @@ class VideoArchiver(GroupCog):
|
|||||||
raise ProcessingError(f"Error during cog load: {str(e)}")
|
raise ProcessingError(f"Error during cog load: {str(e)}")
|
||||||
|
|
||||||
async def cog_unload(self) -> None:
|
async def cog_unload(self) -> None:
|
||||||
"""Clean up when cog is unloaded with extremely aggressive timeout handling"""
|
"""Clean up when cog is unloaded with proper timeout handling"""
|
||||||
self._unloading = True
|
self._unloading = True
|
||||||
try:
|
try:
|
||||||
# Cancel any pending tasks immediately
|
# Cancel any pending tasks
|
||||||
if self._init_task and not self._init_task.done():
|
if self._init_task and not self._init_task.done():
|
||||||
self._init_task.cancel()
|
self._init_task.cancel()
|
||||||
|
|
||||||
if self._cleanup_task and not self._cleanup_task.done():
|
if self._cleanup_task and not self._cleanup_task.done():
|
||||||
self._cleanup_task.cancel()
|
self._cleanup_task.cancel()
|
||||||
|
|
||||||
# Try normal cleanup first with very short timeout
|
# Try normal cleanup first
|
||||||
cleanup_task = asyncio.create_task(cleanup_resources(self))
|
cleanup_task = asyncio.create_task(cleanup_resources(self))
|
||||||
try:
|
try:
|
||||||
await asyncio.wait_for(cleanup_task, timeout=CLEANUP_TIMEOUT)
|
await asyncio.wait_for(cleanup_task, timeout=UNLOAD_TIMEOUT)
|
||||||
|
logger.info("Normal cleanup completed")
|
||||||
except (asyncio.TimeoutError, Exception) as e:
|
except (asyncio.TimeoutError, Exception) as e:
|
||||||
if isinstance(e, asyncio.TimeoutError):
|
if isinstance(e, asyncio.TimeoutError):
|
||||||
logger.warning("Normal cleanup timed out, forcing cleanup")
|
logger.warning("Normal cleanup timed out, forcing cleanup")
|
||||||
else:
|
else:
|
||||||
logger.error(f"Error during normal cleanup: {str(e)}")
|
logger.error(f"Error during normal cleanup: {str(e)}")
|
||||||
|
|
||||||
# Cancel normal cleanup and force cleanup immediately
|
# Cancel normal cleanup and force cleanup
|
||||||
cleanup_task.cancel()
|
cleanup_task.cancel()
|
||||||
try:
|
try:
|
||||||
# Force cleanup with very short timeout
|
# Force cleanup with timeout
|
||||||
await asyncio.wait_for(
|
await asyncio.wait_for(
|
||||||
force_cleanup_resources(self),
|
force_cleanup_resources(self),
|
||||||
timeout=CLEANUP_TIMEOUT
|
timeout=CLEANUP_TIMEOUT
|
||||||
)
|
)
|
||||||
|
logger.info("Force cleanup completed")
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.error("Force cleanup timed out")
|
logger.error("Force cleanup timed out")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -506,16 +524,14 @@ class VideoArchiver(GroupCog):
|
|||||||
self._unloading = False
|
self._unloading = False
|
||||||
# Ensure ready flag is cleared
|
# Ensure ready flag is cleared
|
||||||
self.ready.clear()
|
self.ready.clear()
|
||||||
# Aggressively clear all references
|
# Clear all references
|
||||||
self.bot = None
|
self.bot = None
|
||||||
self.processor = None
|
self.processor = None
|
||||||
self.queue_manager = None
|
self.queue_manager = None
|
||||||
self.update_checker = None
|
self.update_checker = None
|
||||||
self.ffmpeg_mgr = None
|
self.ffmpeg_mgr = None
|
||||||
if hasattr(self, 'components'):
|
self.components.clear()
|
||||||
self.components.clear()
|
|
||||||
self.db = None
|
self.db = None
|
||||||
# Clear any other potential references
|
|
||||||
self._init_task = None
|
self._init_task = None
|
||||||
self._cleanup_task = None
|
self._cleanup_task = None
|
||||||
|
|
||||||
@@ -526,6 +542,7 @@ class VideoArchiver(GroupCog):
|
|||||||
cleanup_resources(self),
|
cleanup_resources(self),
|
||||||
timeout=CLEANUP_TIMEOUT
|
timeout=CLEANUP_TIMEOUT
|
||||||
)
|
)
|
||||||
|
logger.info("Cleanup completed successfully")
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.warning("Cleanup timed out, forcing cleanup")
|
logger.warning("Cleanup timed out, forcing cleanup")
|
||||||
try:
|
try:
|
||||||
@@ -533,5 +550,6 @@ class VideoArchiver(GroupCog):
|
|||||||
force_cleanup_resources(self),
|
force_cleanup_resources(self),
|
||||||
timeout=CLEANUP_TIMEOUT
|
timeout=CLEANUP_TIMEOUT
|
||||||
)
|
)
|
||||||
|
logger.info("Force cleanup completed")
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.error("Force cleanup timed out")
|
logger.error("Force cleanup timed out")
|
||||||
|
|||||||
@@ -43,14 +43,10 @@ class VideoProcessor:
|
|||||||
if self.db:
|
if self.db:
|
||||||
self.queue_handler.db = self.db
|
self.queue_handler.db = self.db
|
||||||
|
|
||||||
# Start queue processing
|
# Store queue task reference but don't start processing here
|
||||||
logger.info("Starting video processing queue...")
|
# Queue processing is managed by VideoArchiver class
|
||||||
self._queue_task = None
|
self._queue_task = None
|
||||||
if queue_manager:
|
logger.info("VideoProcessor initialized successfully")
|
||||||
self._queue_task = self.bot.loop.create_task(
|
|
||||||
queue_manager.process_queue(self.queue_handler.process_video)
|
|
||||||
)
|
|
||||||
logger.info("Video processing queue started successfully")
|
|
||||||
|
|
||||||
async def process_message(self, message: discord.Message) -> None:
|
async def process_message(self, message: discord.Message) -> None:
|
||||||
"""Process a message for video content"""
|
"""Process a message for video content"""
|
||||||
@@ -74,7 +70,7 @@ class VideoProcessor:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error cleaning up FFmpeg manager: {e}")
|
logger.error(f"Error cleaning up FFmpeg manager: {e}")
|
||||||
|
|
||||||
# Cancel queue processing task
|
# Cancel queue processing task if we have one
|
||||||
if self._queue_task and not self._queue_task.done():
|
if self._queue_task and not self._queue_task.done():
|
||||||
self._queue_task.cancel()
|
self._queue_task.cancel()
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -35,6 +35,10 @@ class QueueHandler:
|
|||||||
download_task = None
|
download_task = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Start processing
|
||||||
|
item.start_processing()
|
||||||
|
logger.info(f"Started processing video: {item.url}")
|
||||||
|
|
||||||
# Check if video is already archived
|
# Check if video is already archived
|
||||||
if self.db and self.db.is_url_archived(item.url):
|
if self.db and self.db.is_url_archived(item.url):
|
||||||
logger.info(f"Video already archived: {item.url}")
|
logger.info(f"Video already archived: {item.url}")
|
||||||
@@ -43,18 +47,23 @@ class QueueHandler:
|
|||||||
archived_info = self.db.get_archived_video(item.url)
|
archived_info = self.db.get_archived_video(item.url)
|
||||||
if archived_info:
|
if archived_info:
|
||||||
await original_message.reply(f"This video was already archived. You can find it here: {archived_info[0]}")
|
await original_message.reply(f"This video was already archived. You can find it here: {archived_info[0]}")
|
||||||
|
item.finish_processing(True)
|
||||||
return True, None
|
return True, None
|
||||||
|
|
||||||
guild_id = item.guild_id
|
guild_id = item.guild_id
|
||||||
if guild_id not in self.components:
|
if guild_id not in self.components:
|
||||||
return False, f"No components found for guild {guild_id}"
|
error = f"No components found for guild {guild_id}"
|
||||||
|
item.finish_processing(False, error)
|
||||||
|
return False, error
|
||||||
|
|
||||||
components = self.components[guild_id]
|
components = self.components[guild_id]
|
||||||
downloader = components.get("downloader")
|
downloader = components.get("downloader")
|
||||||
message_manager = components.get("message_manager")
|
message_manager = components.get("message_manager")
|
||||||
|
|
||||||
if not downloader or not message_manager:
|
if not downloader or not message_manager:
|
||||||
return False, f"Missing required components for guild {guild_id}"
|
error = f"Missing required components for guild {guild_id}"
|
||||||
|
item.finish_processing(False, error)
|
||||||
|
return False, error
|
||||||
|
|
||||||
# Get original message and update reactions
|
# Get original message and update reactions
|
||||||
original_message = await self._get_original_message(item)
|
original_message = await self._get_original_message(item)
|
||||||
@@ -74,19 +83,21 @@ class QueueHandler:
|
|||||||
if original_message:
|
if original_message:
|
||||||
await original_message.add_reaction(REACTIONS["error"])
|
await original_message.add_reaction(REACTIONS["error"])
|
||||||
logger.error(f"Download failed for message {item.message_id}: {error}")
|
logger.error(f"Download failed for message {item.message_id}: {error}")
|
||||||
|
item.finish_processing(False, f"Failed to download video: {error}")
|
||||||
return False, f"Failed to download video: {error}"
|
return False, f"Failed to download video: {error}"
|
||||||
|
|
||||||
# Archive video
|
# Archive video
|
||||||
success, error = await self._archive_video(
|
success, error = await self._archive_video(
|
||||||
guild_id, original_message, message_manager, item.url, file_path
|
guild_id, original_message, message_manager, item.url, file_path
|
||||||
)
|
)
|
||||||
if not success:
|
|
||||||
return False, error
|
# Finish processing
|
||||||
|
item.finish_processing(success, error if not success else None)
|
||||||
return True, None
|
return success, error
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing video: {str(e)}", exc_info=True)
|
logger.error(f"Error processing video: {str(e)}", exc_info=True)
|
||||||
|
item.finish_processing(False, str(e))
|
||||||
return False, str(e)
|
return False, str(e)
|
||||||
finally:
|
finally:
|
||||||
# Clean up downloaded file
|
# Clean up downloaded file
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from typing import Dict, Optional, Set, Tuple, Callable, Any, List
|
from typing import Dict, Optional, Set, Tuple, Callable, Any, List
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
@@ -28,7 +29,8 @@ class EnhancedVideoQueueManager:
|
|||||||
max_history_age: int = 86400, # 24 hours
|
max_history_age: int = 86400, # 24 hours
|
||||||
persistence_path: Optional[str] = None,
|
persistence_path: Optional[str] = None,
|
||||||
backup_interval: int = 300, # 5 minutes
|
backup_interval: int = 300, # 5 minutes
|
||||||
deadlock_threshold: int = 900, # 15 minutes
|
deadlock_threshold: int = 300, # 5 minutes
|
||||||
|
check_interval: int = 60, # 1 minute
|
||||||
):
|
):
|
||||||
# Configuration
|
# Configuration
|
||||||
self.max_retries = max_retries
|
self.max_retries = max_retries
|
||||||
@@ -58,7 +60,8 @@ class EnhancedVideoQueueManager:
|
|||||||
self.persistence = QueuePersistenceManager(persistence_path) if persistence_path else None
|
self.persistence = QueuePersistenceManager(persistence_path) if persistence_path else None
|
||||||
self.monitor = QueueMonitor(
|
self.monitor = QueueMonitor(
|
||||||
deadlock_threshold=deadlock_threshold,
|
deadlock_threshold=deadlock_threshold,
|
||||||
max_retries=max_retries
|
max_retries=max_retries,
|
||||||
|
check_interval=check_interval
|
||||||
)
|
)
|
||||||
self.cleaner = QueueCleaner(
|
self.cleaner = QueueCleaner(
|
||||||
cleanup_interval=cleanup_interval,
|
cleanup_interval=cleanup_interval,
|
||||||
@@ -80,6 +83,7 @@ class EnhancedVideoQueueManager:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
self._active_tasks.add(monitor_task)
|
self._active_tasks.add(monitor_task)
|
||||||
|
logger.info("Queue monitoring started")
|
||||||
|
|
||||||
# Start cleanup
|
# Start cleanup
|
||||||
cleanup_task = asyncio.create_task(
|
cleanup_task = asyncio.create_task(
|
||||||
@@ -95,6 +99,7 @@ class EnhancedVideoQueueManager:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
self._active_tasks.add(cleanup_task)
|
self._active_tasks.add(cleanup_task)
|
||||||
|
logger.info("Queue cleanup started")
|
||||||
|
|
||||||
# Load persisted state if available
|
# Load persisted state if available
|
||||||
if self.persistence:
|
if self.persistence:
|
||||||
@@ -120,6 +125,7 @@ class EnhancedVideoQueueManager:
|
|||||||
self.metrics.compression_failures = metrics_data.get("compression_failures", 0)
|
self.metrics.compression_failures = metrics_data.get("compression_failures", 0)
|
||||||
self.metrics.hardware_accel_failures = metrics_data.get("hardware_accel_failures", 0)
|
self.metrics.hardware_accel_failures = metrics_data.get("hardware_accel_failures", 0)
|
||||||
|
|
||||||
|
logger.info("Loaded persisted queue state")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to load persisted state: {e}")
|
logger.error(f"Failed to load persisted state: {e}")
|
||||||
|
|
||||||
@@ -141,8 +147,6 @@ class EnhancedVideoQueueManager:
|
|||||||
if self._queue:
|
if self._queue:
|
||||||
item = self._queue.pop(0)
|
item = self._queue.pop(0)
|
||||||
self._processing[item.url] = item
|
self._processing[item.url] = item
|
||||||
item.status = "processing"
|
|
||||||
item.processing_time = 0.0
|
|
||||||
|
|
||||||
if not item:
|
if not item:
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
@@ -151,20 +155,19 @@ class EnhancedVideoQueueManager:
|
|||||||
try:
|
try:
|
||||||
# Process the item
|
# Process the item
|
||||||
logger.info(f"Processing queue item: {item.url}")
|
logger.info(f"Processing queue item: {item.url}")
|
||||||
|
item.start_processing() # Start processing tracking
|
||||||
|
self.metrics.last_activity_time = time.time() # Update activity time
|
||||||
|
|
||||||
success, error = await processor(item)
|
success, error = await processor(item)
|
||||||
|
|
||||||
# Update metrics and status
|
# Update metrics and status
|
||||||
async with self._processing_lock:
|
async with self._processing_lock:
|
||||||
|
item.finish_processing(success, error) # Update item status
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
item.status = "completed"
|
|
||||||
self._completed[item.url] = item
|
self._completed[item.url] = item
|
||||||
logger.info(f"Successfully processed: {item.url}")
|
logger.info(f"Successfully processed: {item.url}")
|
||||||
else:
|
else:
|
||||||
item.status = "failed"
|
|
||||||
item.error = error
|
|
||||||
item.last_error = error
|
|
||||||
item.last_error_time = datetime.utcnow()
|
|
||||||
|
|
||||||
if item.retry_count < self.max_retries:
|
if item.retry_count < self.max_retries:
|
||||||
item.retry_count += 1
|
item.retry_count += 1
|
||||||
item.status = "pending"
|
item.status = "pending"
|
||||||
@@ -177,16 +180,25 @@ class EnhancedVideoQueueManager:
|
|||||||
logger.error(f"Failed after {self.max_retries} attempts: {item.url}")
|
logger.error(f"Failed after {self.max_retries} attempts: {item.url}")
|
||||||
|
|
||||||
self._processing.pop(item.url, None)
|
self._processing.pop(item.url, None)
|
||||||
|
|
||||||
|
# Update metrics
|
||||||
|
self.metrics.update(
|
||||||
|
processing_time=item.processing_time,
|
||||||
|
success=success,
|
||||||
|
error=error
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing {item.url}: {e}")
|
logger.error(f"Error processing {item.url}: {e}")
|
||||||
async with self._processing_lock:
|
async with self._processing_lock:
|
||||||
item.status = "failed"
|
item.finish_processing(False, str(e))
|
||||||
item.error = str(e)
|
|
||||||
item.last_error = str(e)
|
|
||||||
item.last_error_time = datetime.utcnow()
|
|
||||||
self._failed[item.url] = item
|
self._failed[item.url] = item
|
||||||
self._processing.pop(item.url, None)
|
self._processing.pop(item.url, None)
|
||||||
|
self.metrics.update(
|
||||||
|
processing_time=item.processing_time,
|
||||||
|
success=False,
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
# Persist state if enabled
|
# Persist state if enabled
|
||||||
if self.persistence:
|
if self.persistence:
|
||||||
@@ -215,22 +227,7 @@ class EnhancedVideoQueueManager:
|
|||||||
author_id: int,
|
author_id: int,
|
||||||
priority: int = 0,
|
priority: int = 0,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Add a video to the processing queue
|
"""Add a video to the processing queue"""
|
||||||
|
|
||||||
Args:
|
|
||||||
url: Video URL
|
|
||||||
message_id: Discord message ID
|
|
||||||
channel_id: Discord channel ID
|
|
||||||
guild_id: Discord guild ID
|
|
||||||
author_id: Discord author ID
|
|
||||||
priority: Queue priority (higher = higher priority)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if added successfully
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
QueueError: If queue is full or shutting down
|
|
||||||
"""
|
|
||||||
if self._shutdown:
|
if self._shutdown:
|
||||||
raise QueueError("Queue manager is shutting down")
|
raise QueueError("Queue manager is shutting down")
|
||||||
|
|
||||||
@@ -262,6 +259,9 @@ class EnhancedVideoQueueManager:
|
|||||||
self._queue.append(item)
|
self._queue.append(item)
|
||||||
self._queue.sort(key=lambda x: (-x.priority, x.added_at))
|
self._queue.sort(key=lambda x: (-x.priority, x.added_at))
|
||||||
|
|
||||||
|
# Update activity time
|
||||||
|
self.metrics.last_activity_time = time.time()
|
||||||
|
|
||||||
if self.persistence:
|
if self.persistence:
|
||||||
await self.persistence.persist_queue_state(
|
await self.persistence.persist_queue_state(
|
||||||
self._queue,
|
self._queue,
|
||||||
@@ -279,14 +279,7 @@ class EnhancedVideoQueueManager:
|
|||||||
raise QueueError(f"Failed to add to queue: {str(e)}")
|
raise QueueError(f"Failed to add to queue: {str(e)}")
|
||||||
|
|
||||||
def get_queue_status(self, guild_id: int) -> dict:
|
def get_queue_status(self, guild_id: int) -> dict:
|
||||||
"""Get current queue status for a guild
|
"""Get current queue status for a guild"""
|
||||||
|
|
||||||
Args:
|
|
||||||
guild_id: Discord guild ID
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dict containing queue status and metrics
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
pending = len([item for item in self._queue if item.guild_id == guild_id])
|
pending = len([item for item in self._queue if item.guild_id == guild_id])
|
||||||
processing = len([item for item in self._processing.values() if item.guild_id == guild_id])
|
processing = len([item for item in self._processing.values() if item.guild_id == guild_id])
|
||||||
@@ -308,6 +301,7 @@ class EnhancedVideoQueueManager:
|
|||||||
"errors_by_type": self.metrics.errors_by_type,
|
"errors_by_type": self.metrics.errors_by_type,
|
||||||
"compression_failures": self.metrics.compression_failures,
|
"compression_failures": self.metrics.compression_failures,
|
||||||
"hardware_accel_failures": self.metrics.hardware_accel_failures,
|
"hardware_accel_failures": self.metrics.hardware_accel_failures,
|
||||||
|
"last_activity": time.time() - self.metrics.last_activity_time,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -328,21 +322,12 @@ class EnhancedVideoQueueManager:
|
|||||||
"errors_by_type": {},
|
"errors_by_type": {},
|
||||||
"compression_failures": 0,
|
"compression_failures": 0,
|
||||||
"hardware_accel_failures": 0,
|
"hardware_accel_failures": 0,
|
||||||
|
"last_activity": 0,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
async def clear_guild_queue(self, guild_id: int) -> int:
|
async def clear_guild_queue(self, guild_id: int) -> int:
|
||||||
"""Clear all queue items for a guild
|
"""Clear all queue items for a guild"""
|
||||||
|
|
||||||
Args:
|
|
||||||
guild_id: Discord guild ID
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Number of items cleared
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
QueueError: If queue is shutting down
|
|
||||||
"""
|
|
||||||
if self._shutdown:
|
if self._shutdown:
|
||||||
raise QueueError("Queue manager is shutting down")
|
raise QueueError("Queue manager is shutting down")
|
||||||
|
|
||||||
@@ -377,6 +362,7 @@ class EnhancedVideoQueueManager:
|
|||||||
"""Clean up resources and stop queue processing"""
|
"""Clean up resources and stop queue processing"""
|
||||||
try:
|
try:
|
||||||
self._shutdown = True
|
self._shutdown = True
|
||||||
|
logger.info("Starting queue manager cleanup...")
|
||||||
|
|
||||||
# Stop monitoring and cleanup tasks
|
# Stop monitoring and cleanup tasks
|
||||||
self.monitor.stop_monitoring()
|
self.monitor.stop_monitoring()
|
||||||
@@ -428,6 +414,7 @@ class EnhancedVideoQueueManager:
|
|||||||
def force_stop(self) -> None:
|
def force_stop(self) -> None:
|
||||||
"""Force stop all queue operations immediately"""
|
"""Force stop all queue operations immediately"""
|
||||||
self._shutdown = True
|
self._shutdown = True
|
||||||
|
logger.info("Force stopping queue manager...")
|
||||||
|
|
||||||
# Stop monitoring and cleanup
|
# Stop monitoring and cleanup
|
||||||
self.monitor.stop_monitoring()
|
self.monitor.stop_monitoring()
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
"""Data models for the queue system"""
|
"""Data models for the queue system"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from dataclasses import dataclass, field, asdict
|
from dataclasses import dataclass, field, asdict
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, Optional, List, Any
|
from typing import Dict, Optional, List, Any
|
||||||
@@ -22,14 +23,16 @@ class QueueItem:
|
|||||||
guild_id: int # Discord ID
|
guild_id: int # Discord ID
|
||||||
added_at: datetime = field(default_factory=datetime.utcnow)
|
added_at: datetime = field(default_factory=datetime.utcnow)
|
||||||
status: str = "pending"
|
status: str = "pending"
|
||||||
retry_count: int = 0 # Changed from retries to retry_count
|
retry_count: int = 0
|
||||||
priority: int = 0 # Added priority field with default value 0
|
priority: int = 0
|
||||||
last_retry: Optional[datetime] = None
|
last_retry: Optional[datetime] = None
|
||||||
last_error: Optional[str] = None
|
last_error: Optional[str] = None
|
||||||
last_error_time: Optional[datetime] = None
|
last_error_time: Optional[datetime] = None
|
||||||
|
start_time: Optional[float] = None # Added start_time for processing tracking
|
||||||
processing_time: float = 0.0
|
processing_time: float = 0.0
|
||||||
output_path: Optional[str] = None
|
output_path: Optional[str] = None
|
||||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
error: Optional[str] = None # Added error field for current error
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
"""Convert string dates to datetime objects after initialization"""
|
"""Convert string dates to datetime objects after initialization"""
|
||||||
@@ -57,6 +60,29 @@ class QueueItem:
|
|||||||
elif not isinstance(self.last_error_time, datetime):
|
elif not isinstance(self.last_error_time, datetime):
|
||||||
self.last_error_time = None
|
self.last_error_time = None
|
||||||
|
|
||||||
|
def start_processing(self) -> None:
|
||||||
|
"""Mark item as started processing"""
|
||||||
|
self.status = "processing"
|
||||||
|
self.start_time = time.time()
|
||||||
|
self.processing_time = 0.0
|
||||||
|
self.error = None
|
||||||
|
|
||||||
|
def finish_processing(self, success: bool, error: Optional[str] = None) -> None:
|
||||||
|
"""Mark item as finished processing"""
|
||||||
|
end_time = time.time()
|
||||||
|
if self.start_time:
|
||||||
|
self.processing_time = end_time - self.start_time
|
||||||
|
|
||||||
|
if success:
|
||||||
|
self.status = "completed"
|
||||||
|
else:
|
||||||
|
self.status = "failed"
|
||||||
|
self.error = error
|
||||||
|
self.last_error = error
|
||||||
|
self.last_error_time = datetime.utcnow()
|
||||||
|
|
||||||
|
self.start_time = None
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
def to_dict(self) -> dict:
|
||||||
"""Convert to dictionary with datetime handling"""
|
"""Convert to dictionary with datetime handling"""
|
||||||
data = asdict(self)
|
data = asdict(self)
|
||||||
@@ -91,6 +117,7 @@ class QueueMetrics:
|
|||||||
processing_times: List[float] = field(default_factory=list)
|
processing_times: List[float] = field(default_factory=list)
|
||||||
compression_failures: int = 0
|
compression_failures: int = 0
|
||||||
hardware_accel_failures: int = 0
|
hardware_accel_failures: int = 0
|
||||||
|
last_activity_time: float = field(default_factory=time.time) # Added activity tracking
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
"""Convert string dates to datetime objects after initialization"""
|
"""Convert string dates to datetime objects after initialization"""
|
||||||
@@ -115,6 +142,8 @@ class QueueMetrics:
|
|||||||
def update(self, processing_time: float, success: bool, error: str = None):
|
def update(self, processing_time: float, success: bool, error: str = None):
|
||||||
"""Update metrics with new processing information"""
|
"""Update metrics with new processing information"""
|
||||||
self.total_processed += 1
|
self.total_processed += 1
|
||||||
|
self.last_activity_time = time.time() # Update activity timestamp
|
||||||
|
|
||||||
if not success:
|
if not success:
|
||||||
self.total_failed += 1
|
self.total_failed += 1
|
||||||
if error:
|
if error:
|
||||||
|
|||||||
@@ -19,14 +19,17 @@ class QueueMonitor:
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
deadlock_threshold: int = 900, # 15 minutes
|
deadlock_threshold: int = 300, # 5 minutes
|
||||||
memory_threshold: int = 1024, # 1GB
|
memory_threshold: int = 512, # 512MB
|
||||||
max_retries: int = 3
|
max_retries: int = 3,
|
||||||
|
check_interval: int = 60 # Check every minute
|
||||||
):
|
):
|
||||||
self.deadlock_threshold = deadlock_threshold
|
self.deadlock_threshold = deadlock_threshold
|
||||||
self.memory_threshold = memory_threshold
|
self.memory_threshold = memory_threshold
|
||||||
self.max_retries = max_retries
|
self.max_retries = max_retries
|
||||||
|
self.check_interval = check_interval
|
||||||
self._shutdown = False
|
self._shutdown = False
|
||||||
|
self._last_active_time = time.time()
|
||||||
|
|
||||||
async def start_monitoring(
|
async def start_monitoring(
|
||||||
self,
|
self,
|
||||||
@@ -43,21 +46,28 @@ class QueueMonitor:
|
|||||||
metrics: Reference to queue metrics
|
metrics: Reference to queue metrics
|
||||||
processing_lock: Lock for processing dict
|
processing_lock: Lock for processing dict
|
||||||
"""
|
"""
|
||||||
|
logger.info("Starting queue monitoring...")
|
||||||
while not self._shutdown:
|
while not self._shutdown:
|
||||||
try:
|
try:
|
||||||
await self._check_health(queue, processing, metrics, processing_lock)
|
await self._check_health(queue, processing, metrics, processing_lock)
|
||||||
await asyncio.sleep(300) # Check every 5 minutes
|
await asyncio.sleep(self.check_interval)
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
|
logger.info("Queue monitoring cancelled")
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in health monitor: {str(e)}")
|
logger.error(f"Error in health monitor: {str(e)}")
|
||||||
await asyncio.sleep(60)
|
await asyncio.sleep(30) # Shorter sleep on error
|
||||||
|
|
||||||
def stop_monitoring(self) -> None:
|
def stop_monitoring(self) -> None:
|
||||||
"""Stop the monitoring process"""
|
"""Stop the monitoring process"""
|
||||||
|
logger.info("Stopping queue monitoring...")
|
||||||
self._shutdown = True
|
self._shutdown = True
|
||||||
|
|
||||||
|
def update_activity(self) -> None:
|
||||||
|
"""Update the last active time"""
|
||||||
|
self._last_active_time = time.time()
|
||||||
|
|
||||||
async def _check_health(
|
async def _check_health(
|
||||||
self,
|
self,
|
||||||
queue: List[QueueItem],
|
queue: List[QueueItem],
|
||||||
@@ -74,6 +84,8 @@ class QueueMonitor:
|
|||||||
processing_lock: Lock for processing dict
|
processing_lock: Lock for processing dict
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
# Check memory usage
|
# Check memory usage
|
||||||
process = psutil.Process()
|
process = psutil.Process()
|
||||||
memory_usage = process.memory_info().rss / 1024 / 1024 # MB
|
memory_usage = process.memory_info().rss / 1024 / 1024 # MB
|
||||||
@@ -83,18 +95,22 @@ class QueueMonitor:
|
|||||||
# Force garbage collection
|
# Force garbage collection
|
||||||
import gc
|
import gc
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
memory_after = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.info(f"Memory after GC: {memory_after:.2f}MB")
|
||||||
|
|
||||||
# Check for potential deadlocks
|
# Check for potential deadlocks
|
||||||
current_time = time.time()
|
|
||||||
processing_times = []
|
processing_times = []
|
||||||
stuck_items = []
|
stuck_items = []
|
||||||
|
|
||||||
for url, item in processing.items():
|
async with processing_lock:
|
||||||
if isinstance(item.processing_time, (int, float)) and item.processing_time > 0:
|
for url, item in processing.items():
|
||||||
processing_time = current_time - item.processing_time
|
# Check if item has started processing
|
||||||
processing_times.append(processing_time)
|
if hasattr(item, 'start_time') and item.start_time:
|
||||||
if processing_time > self.deadlock_threshold:
|
processing_time = current_time - item.start_time
|
||||||
stuck_items.append((url, item))
|
processing_times.append(processing_time)
|
||||||
|
if processing_time > self.deadlock_threshold:
|
||||||
|
stuck_items.append((url, item))
|
||||||
|
logger.warning(f"Item stuck in processing: {url} for {processing_time:.1f}s")
|
||||||
|
|
||||||
if stuck_items:
|
if stuck_items:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
@@ -104,6 +120,17 @@ class QueueMonitor:
|
|||||||
stuck_items, queue, processing, processing_lock
|
stuck_items, queue, processing, processing_lock
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Check overall queue activity
|
||||||
|
if processing and current_time - self._last_active_time > self.deadlock_threshold:
|
||||||
|
logger.warning("Queue appears to be hung - no activity detected")
|
||||||
|
# Force recovery of all processing items
|
||||||
|
async with processing_lock:
|
||||||
|
all_items = list(processing.items())
|
||||||
|
await self._recover_stuck_items(
|
||||||
|
all_items, queue, processing, processing_lock
|
||||||
|
)
|
||||||
|
self._last_active_time = current_time
|
||||||
|
|
||||||
# Calculate and log metrics
|
# Calculate and log metrics
|
||||||
success_rate = metrics.success_rate
|
success_rate = metrics.success_rate
|
||||||
error_distribution = metrics.errors_by_type
|
error_distribution = metrics.errors_by_type
|
||||||
@@ -112,14 +139,17 @@ class QueueMonitor:
|
|||||||
# Update peak memory usage
|
# Update peak memory usage
|
||||||
metrics.peak_memory_usage = max(metrics.peak_memory_usage, memory_usage)
|
metrics.peak_memory_usage = max(metrics.peak_memory_usage, memory_usage)
|
||||||
|
|
||||||
|
# Log detailed metrics
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Queue Health Metrics:\n"
|
f"Queue Health Metrics:\n"
|
||||||
f"- Success Rate: {success_rate:.2%}\n"
|
f"- Success Rate: {success_rate:.2%}\n"
|
||||||
f"- Avg Processing Time: {avg_processing_time:.2f}s\n"
|
f"- Avg Processing Time: {avg_processing_time:.2f}s\n"
|
||||||
f"- Memory Usage: {memory_usage:.2f}MB\n"
|
f"- Memory Usage: {memory_usage:.2f}MB\n"
|
||||||
|
f"- Peak Memory: {metrics.peak_memory_usage:.2f}MB\n"
|
||||||
f"- Error Distribution: {error_distribution}\n"
|
f"- Error Distribution: {error_distribution}\n"
|
||||||
f"- Queue Size: {len(queue)}\n"
|
f"- Queue Size: {len(queue)}\n"
|
||||||
f"- Processing Items: {len(processing)}"
|
f"- Processing Items: {len(processing)}\n"
|
||||||
|
f"- Last Activity: {(current_time - self._last_active_time):.1f}s ago"
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -142,26 +172,37 @@ class QueueMonitor:
|
|||||||
processing_lock: Lock for processing dict
|
processing_lock: Lock for processing dict
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
recovered = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
async with processing_lock:
|
async with processing_lock:
|
||||||
for url, item in stuck_items:
|
for url, item in stuck_items:
|
||||||
# Move to failed if max retries reached
|
try:
|
||||||
if item.retry_count >= self.max_retries:
|
# Move to failed if max retries reached
|
||||||
logger.warning(f"Moving stuck item to failed: {url}")
|
if item.retry_count >= self.max_retries:
|
||||||
item.status = "failed"
|
logger.warning(f"Moving stuck item to failed: {url}")
|
||||||
item.error = "Exceeded maximum retries after being stuck"
|
item.status = "failed"
|
||||||
item.last_error = item.error
|
item.error = "Exceeded maximum retries after being stuck"
|
||||||
item.last_error_time = datetime.utcnow()
|
item.last_error = item.error
|
||||||
processing.pop(url)
|
item.last_error_time = datetime.utcnow()
|
||||||
else:
|
processing.pop(url)
|
||||||
# Reset for retry
|
failed += 1
|
||||||
logger.info(f"Recovering stuck item for retry: {url}")
|
else:
|
||||||
item.retry_count += 1
|
# Reset for retry
|
||||||
item.processing_time = 0
|
logger.info(f"Recovering stuck item for retry: {url}")
|
||||||
item.last_retry = datetime.utcnow()
|
item.retry_count += 1
|
||||||
item.status = "pending"
|
item.start_time = None
|
||||||
item.priority = max(0, item.priority - 2) # Lower priority
|
item.processing_time = 0
|
||||||
queue.append(item)
|
item.last_retry = datetime.utcnow()
|
||||||
processing.pop(url)
|
item.status = "pending"
|
||||||
|
item.priority = max(0, item.priority - 2) # Lower priority
|
||||||
|
queue.append(item)
|
||||||
|
processing.pop(url)
|
||||||
|
recovered += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error recovering item {url}: {str(e)}")
|
||||||
|
|
||||||
|
logger.info(f"Recovery complete - Recovered: {recovered}, Failed: {failed}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error recovering stuck items: {str(e)}")
|
logger.error(f"Error recovering stuck items: {str(e)}")
|
||||||
|
|||||||
Reference in New Issue
Block a user