Added extremely aggressive timeouts:

CLEANUP_TIMEOUT = 1s
UNLOAD_TIMEOUT = 2s
INIT_TIMEOUT = 5s
COMPONENT_INIT_TIMEOUT = 2s
Added timeout protection for all async operations:

Download cleanup
Guild component initialization
Update checker start
Cleanup processes
Force cleanup fallbacks
Improved cleanup process:

Immediate task cancellation
Aggressive reference clearing
Force cleanup fallbacks
Comprehensive error logging
This commit is contained in:
pacnpal
2024-11-15 21:49:58 +00:00
parent 2025c4e8f0
commit 87605ad7f1

View File

@@ -40,9 +40,11 @@ from .events import setup_events
logger = logging.getLogger("VideoArchiver") logger = logging.getLogger("VideoArchiver")
# Constants for timeouts - reduced for faster cleanup # Constants for timeouts - extremely aggressive timeouts
UNLOAD_TIMEOUT = 5 # seconds UNLOAD_TIMEOUT = 2 # seconds
CLEANUP_TIMEOUT = 3 # seconds CLEANUP_TIMEOUT = 1 # seconds
INIT_TIMEOUT = 5 # seconds
COMPONENT_INIT_TIMEOUT = 2 # seconds
class VideoArchiver(GroupCog): class VideoArchiver(GroupCog):
"""Archive videos from Discord channels""" """Archive videos from Discord channels"""
@@ -340,7 +342,7 @@ class VideoArchiver(GroupCog):
asyncio.create_task(self._cleanup()) asyncio.create_task(self._cleanup())
async def _initialize(self) -> None: async def _initialize(self) -> None:
"""Initialize all components with proper error handling""" """Initialize all components with proper error handling and timeouts"""
try: try:
# Initialize config first as other components depend on it # Initialize config first as other components depend on it
config = Config.get_conf(self, identifier=855847, force_registration=True) config = Config.get_conf(self, identifier=855847, force_registration=True)
@@ -352,8 +354,14 @@ class VideoArchiver(GroupCog):
self.download_path = self.data_path / "downloads" self.download_path = self.data_path / "downloads"
self.download_path.mkdir(parents=True, exist_ok=True) self.download_path.mkdir(parents=True, exist_ok=True)
# Clean existing downloads # Clean existing downloads with timeout
await cleanup_downloads(str(self.download_path)) try:
await asyncio.wait_for(
cleanup_downloads(str(self.download_path)),
timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.warning("Download cleanup timed out, continuing initialization")
# Initialize shared FFmpeg manager # Initialize shared FFmpeg manager
self.ffmpeg_mgr = FFmpegManager() self.ffmpeg_mgr = FFmpegManager()
@@ -362,10 +370,16 @@ class VideoArchiver(GroupCog):
# Initialize components dict first # Initialize components dict first
self.components: Dict[int, Dict[str, Any]] = {} self.components: Dict[int, Dict[str, Any]] = {}
# Initialize components for existing guilds # Initialize components for existing guilds with timeout
for guild in self.bot.guilds: for guild in self.bot.guilds:
try: try:
await initialize_guild_components(self, guild.id) await asyncio.wait_for(
initialize_guild_components(self, guild.id),
timeout=COMPONENT_INIT_TIMEOUT
)
except asyncio.TimeoutError:
logger.error(f"Guild {guild.id} initialization timed out")
continue
except Exception as e: except Exception as e:
logger.error(f"Failed to initialize guild {guild.id}: {str(e)}") logger.error(f"Failed to initialize guild {guild.id}: {str(e)}")
continue continue
@@ -395,32 +409,65 @@ class VideoArchiver(GroupCog):
db=self.db, # Pass database to processor (None by default) db=self.db, # Pass database to processor (None by default)
) )
# Start update checker # Start update checker with timeout
await self.update_checker.start() try:
await asyncio.wait_for(
self.update_checker.start(),
timeout=INIT_TIMEOUT
)
except asyncio.TimeoutError:
logger.warning("Update checker start timed out")
# Set ready flag # Set ready flag
self.ready.set() self.ready.set()
logger.info("VideoArchiver initialization completed successfully") logger.info("VideoArchiver initialization completed successfully")
except Exception as e: except Exception as e:
logger.error(f"Critical error during initialization: {str(e)}") logger.error(f"Critical error during initialization: {str(e)}")
await self._cleanup() # Force cleanup on initialization error
try:
await asyncio.wait_for(
force_cleanup_resources(self),
timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.error("Force cleanup during initialization timed out")
raise raise
async def cog_load(self) -> None: async def cog_load(self) -> None:
"""Handle cog loading""" """Handle cog loading with aggressive timeout"""
try: try:
await asyncio.wait_for(self.ready.wait(), timeout=30) # Create initialization task
except asyncio.TimeoutError: init_task = asyncio.create_task(self._initialize())
await self._cleanup() try:
raise ProcessingError("Cog initialization timed out") # Wait for initialization with timeout
await asyncio.wait_for(init_task, timeout=INIT_TIMEOUT)
except asyncio.TimeoutError:
logger.error("Initialization timed out, forcing cleanup")
init_task.cancel()
await force_cleanup_resources(self)
raise ProcessingError("Cog initialization timed out")
# Wait for ready flag with short timeout
try:
await asyncio.wait_for(self.ready.wait(), timeout=INIT_TIMEOUT)
except asyncio.TimeoutError:
await force_cleanup_resources(self)
raise ProcessingError("Ready flag wait timed out")
except Exception as e: except Exception as e:
await self._cleanup() # Ensure cleanup on any error
try:
await asyncio.wait_for(
force_cleanup_resources(self),
timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.error("Force cleanup during load error timed out")
raise ProcessingError(f"Error during cog load: {str(e)}") raise ProcessingError(f"Error during cog load: {str(e)}")
async def cog_unload(self) -> None: async def cog_unload(self) -> None:
"""Clean up when cog is unloaded with aggressive timeout handling""" """Clean up when cog is unloaded with extremely aggressive timeout handling"""
self._unloading = True self._unloading = True
try: try:
# Cancel any pending tasks immediately # Cancel any pending tasks immediately
@@ -430,34 +477,61 @@ class VideoArchiver(GroupCog):
if self._cleanup_task and not self._cleanup_task.done(): if self._cleanup_task and not self._cleanup_task.done():
self._cleanup_task.cancel() self._cleanup_task.cancel()
# Try normal cleanup first with short timeout # Try normal cleanup first with very short timeout
cleanup_task = asyncio.create_task(cleanup_resources(self)) cleanup_task = asyncio.create_task(cleanup_resources(self))
try: try:
await asyncio.wait_for(cleanup_task, timeout=CLEANUP_TIMEOUT) await asyncio.wait_for(cleanup_task, timeout=CLEANUP_TIMEOUT)
except asyncio.TimeoutError: except (asyncio.TimeoutError, Exception) as e:
logger.warning("Normal cleanup timed out, forcing cleanup") if isinstance(e, asyncio.TimeoutError):
# Immediately cancel the normal cleanup task logger.warning("Normal cleanup timed out, forcing cleanup")
else:
logger.error(f"Error during normal cleanup: {str(e)}")
# Cancel normal cleanup and force cleanup immediately
cleanup_task.cancel() cleanup_task.cancel()
# Force cleanup without waiting try:
await force_cleanup_resources(self) # Force cleanup with very short timeout
except Exception as e: await asyncio.wait_for(
logger.error(f"Error during normal cleanup: {str(e)}") force_cleanup_resources(self),
await force_cleanup_resources(self) timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.error("Force cleanup timed out")
except Exception as e:
logger.error(f"Error during force cleanup: {str(e)}")
except Exception as e: except Exception as e:
logger.error(f"Error during cog unload: {str(e)}") logger.error(f"Error during cog unload: {str(e)}")
finally: finally:
self._unloading = False self._unloading = False
# Ensure ready flag is cleared # Ensure ready flag is cleared
self.ready.clear() self.ready.clear()
# Clear all references # Aggressively clear all references
self.bot = None self.bot = None
self.processor = None self.processor = None
self.queue_manager = None self.queue_manager = None
self.update_checker = None self.update_checker = None
self.ffmpeg_mgr = None self.ffmpeg_mgr = None
self.components.clear() if hasattr(self, 'components'):
self.components.clear()
self.db = None self.db = None
# Clear any other potential references
self._init_task = None
self._cleanup_task = None
async def _cleanup(self) -> None: async def _cleanup(self) -> None:
"""Clean up all resources with proper handling""" """Clean up all resources with proper handling"""
await cleanup_resources(self) try:
await asyncio.wait_for(
cleanup_resources(self),
timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.warning("Cleanup timed out, forcing cleanup")
try:
await asyncio.wait_for(
force_cleanup_resources(self),
timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.error("Force cleanup timed out")