Removed the problematic singleton pattern from queue manager

Added proper activity tracking in the monitoring system
Reduced timeouts and deadlock thresholds
Implemented more aggressive cleanup procedures
Added system-wide FFmpeg process cleanup
This commit is contained in:
pacnpal
2024-11-16 00:24:28 +00:00
parent 39061cbf3e
commit 32c63deeff
4 changed files with 208 additions and 119 deletions

View File

@@ -7,11 +7,11 @@ import traceback
from redbot.core import Config, data_manager
from redbot.core.bot import Red
from redbot.core.commands import (
GroupCog,
Context,
hybrid_command,
GroupCog,
Context,
hybrid_command,
hybrid_group,
guild_only
guild_only,
)
from redbot.core import checks
from discord import app_commands
@@ -43,9 +43,10 @@ logger = logging.getLogger("VideoArchiver")
# Constants for timeouts - more reasonable timeouts
UNLOAD_TIMEOUT = 30 # seconds
CLEANUP_TIMEOUT = 15 # seconds
INIT_TIMEOUT = 60 # seconds
INIT_TIMEOUT = 60 # seconds
COMPONENT_INIT_TIMEOUT = 30 # seconds
class VideoArchiver(GroupCog):
"""Archive videos from Discord channels"""
@@ -135,7 +136,9 @@ class VideoArchiver(GroupCog):
self.processor.db = None
self.processor.queue_handler.db = None
await self.config_manager.update_setting(ctx.guild.id, "use_database", False)
await self.config_manager.update_setting(
ctx.guild.id, "use_database", False
)
await ctx.send("Video archive database has been disabled.")
except Exception as e:
@@ -364,8 +367,7 @@ class VideoArchiver(GroupCog):
# Clean existing downloads with timeout
try:
await asyncio.wait_for(
cleanup_downloads(str(self.download_path)),
timeout=CLEANUP_TIMEOUT
cleanup_downloads(str(self.download_path)), timeout=CLEANUP_TIMEOUT
)
logger.info("Downloads cleaned up")
except asyncio.TimeoutError:
@@ -386,12 +388,11 @@ class VideoArchiver(GroupCog):
max_history_age=86400,
persistence_path=str(queue_path),
)
# Initialize queue manager with timeout
try:
await asyncio.wait_for(
self.queue_manager.initialize(),
timeout=INIT_TIMEOUT
self.queue_manager.initialize(), timeout=INIT_TIMEOUT
)
logger.info("Queue manager initialized successfully")
except asyncio.TimeoutError:
@@ -417,7 +418,7 @@ class VideoArchiver(GroupCog):
try:
await asyncio.wait_for(
initialize_guild_components(self, guild.id),
timeout=COMPONENT_INIT_TIMEOUT
timeout=COMPONENT_INIT_TIMEOUT,
)
logger.info(f"Guild {guild.id} components initialized")
except asyncio.TimeoutError:
@@ -434,8 +435,7 @@ class VideoArchiver(GroupCog):
# Start update checker with timeout
try:
await asyncio.wait_for(
self.update_checker.start(),
timeout=INIT_TIMEOUT
self.update_checker.start(), timeout=INIT_TIMEOUT
)
logger.info("Update checker started")
except asyncio.TimeoutError:
@@ -453,12 +453,13 @@ class VideoArchiver(GroupCog):
logger.info("VideoArchiver initialization completed successfully")
except Exception as e:
logger.error(f"Critical error during initialization: {str(e)}\n{traceback.format_exc()}")
logger.error(
f"Critical error during initialization: {str(e)}\n{traceback.format_exc()}"
)
# Force cleanup on initialization error
try:
await asyncio.wait_for(
force_cleanup_resources(self),
timeout=CLEANUP_TIMEOUT
force_cleanup_resources(self), timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.error("Force cleanup during initialization timed out")
@@ -491,8 +492,7 @@ class VideoArchiver(GroupCog):
# Ensure cleanup on any error
try:
await asyncio.wait_for(
force_cleanup_resources(self),
timeout=CLEANUP_TIMEOUT
force_cleanup_resources(self), timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.error("Force cleanup during load error timed out")
@@ -510,7 +510,11 @@ class VideoArchiver(GroupCog):
self._cleanup_task.cancel()
# Cancel queue processing task if it exists
if hasattr(self, '_queue_task') and self._queue_task and not self._queue_task.done():
if (
hasattr(self, "_queue_task")
and self._queue_task
and not self._queue_task.done()
):
self._queue_task.cancel()
try:
await self._queue_task
@@ -535,8 +539,7 @@ class VideoArchiver(GroupCog):
try:
# Force cleanup with timeout
await asyncio.wait_for(
force_cleanup_resources(self),
timeout=CLEANUP_TIMEOUT
force_cleanup_resources(self), timeout=CLEANUP_TIMEOUT
)
logger.info("Force cleanup completed")
except asyncio.TimeoutError:
@@ -560,7 +563,7 @@ class VideoArchiver(GroupCog):
self.db = None
self._init_task = None
self._cleanup_task = None
if hasattr(self, '_queue_task'):
if hasattr(self, "_queue_task"):
self._queue_task = None
async def _cleanup(self) -> None:

View File

@@ -2,7 +2,10 @@
import logging
import asyncio
import signal
import os
from typing import TYPE_CHECKING
from pathlib import Path
from ..utils.file_ops import cleanup_downloads
@@ -11,48 +14,61 @@ if TYPE_CHECKING:
logger = logging.getLogger("VideoArchiver")
CLEANUP_TIMEOUT = 15 # seconds
CLEANUP_TIMEOUT = 5 # Reduced timeout to 5 seconds
FORCE_CLEANUP_TIMEOUT = 3 # Even shorter timeout for force cleanup
async def cleanup_resources(cog: "VideoArchiver") -> None:
"""Clean up all resources with proper handling"""
try:
logger.info("Starting resource cleanup...")
# Cancel initialization if still running
if cog._init_task and not cog._init_task.done():
logger.info("Cancelling initialization task")
cog._init_task.cancel()
try:
await asyncio.wait_for(cog._init_task, timeout=CLEANUP_TIMEOUT)
except (asyncio.TimeoutError, asyncio.CancelledError):
pass
logger.warning("Initialization task cancellation timed out")
# Stop update checker
if hasattr(cog, "update_checker"):
if hasattr(cog, "update_checker") and cog.update_checker:
logger.info("Stopping update checker")
try:
await asyncio.wait_for(
cog.update_checker.stop(), timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
pass
logger.warning("Update checker stop timed out")
cog.update_checker = None
# Clean up processor
if hasattr(cog, "processor"):
if hasattr(cog, "processor") and cog.processor:
logger.info("Cleaning up processor")
try:
await asyncio.wait_for(
cog.processor.cleanup(), timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.warning("Processor cleanup timed out, forcing cleanup")
await cog.processor.force_cleanup()
cog.processor = None
# Clean up queue manager
if hasattr(cog, "queue_manager"):
if hasattr(cog, "queue_manager") and cog.queue_manager:
logger.info("Cleaning up queue manager")
try:
await asyncio.wait_for(
cog.queue_manager.cleanup(), timeout=CLEANUP_TIMEOUT
)
except asyncio.TimeoutError:
logger.warning("Queue manager cleanup timed out, forcing stop")
cog.queue_manager.force_stop()
cog.queue_manager = None
# Clean up components for each guild
if hasattr(cog, "components"):
logger.info("Cleaning up guild components")
for guild_id, components in cog.components.items():
try:
if "message_manager" in components:
@@ -66,44 +82,106 @@ async def cleanup_resources(cog: "VideoArchiver") -> None:
cog.components.clear()
# Kill any FFmpeg processes
if hasattr(cog, "ffmpeg_mgr") and cog.ffmpeg_mgr:
logger.info("Killing FFmpeg processes")
cog.ffmpeg_mgr.kill_all_processes()
cog.ffmpeg_mgr = None
# Clean up download directory
if hasattr(cog, "download_path") and cog.download_path.exists():
logger.info("Cleaning up download directory")
try:
await cleanup_downloads(str(cog.download_path))
cog.download_path.rmdir()
await asyncio.wait_for(
cleanup_downloads(str(cog.download_path)),
timeout=CLEANUP_TIMEOUT
)
if cog.download_path.exists():
cog.download_path.rmdir()
except Exception as e:
logger.error(f"Error cleaning up download directory: {str(e)}")
# Kill any remaining FFmpeg processes system-wide
try:
if os.name != 'nt': # Unix-like systems
os.system("pkill -9 ffmpeg")
else: # Windows
os.system("taskkill /F /IM ffmpeg.exe")
except Exception as e:
logger.error(f"Error killing FFmpeg processes: {str(e)}")
except Exception as e:
logger.error(f"Error during cleanup: {str(e)}")
raise
finally:
logger.info("Clearing ready flag")
cog.ready.clear()
async def force_cleanup_resources(cog: "VideoArchiver") -> None:
"""Force cleanup of resources when timeout occurs"""
try:
# Cancel all tasks
if hasattr(cog, "processor"):
logger.info("Starting force cleanup...")
# Cancel all tasks immediately
if hasattr(cog, "processor") and cog.processor:
logger.info("Force cleaning processor")
await cog.processor.force_cleanup()
cog.processor = None
# Force stop queue manager
if hasattr(cog, "queue_manager"):
if hasattr(cog, "queue_manager") and cog.queue_manager:
logger.info("Force stopping queue manager")
cog.queue_manager.force_stop()
cog.queue_manager = None
# Kill any remaining FFmpeg processes
if hasattr(cog, "ffmpeg_mgr"):
# Kill FFmpeg processes
if hasattr(cog, "ffmpeg_mgr") and cog.ffmpeg_mgr:
logger.info("Force killing FFmpeg processes")
cog.ffmpeg_mgr.kill_all_processes()
cog.ffmpeg_mgr = None
# Force kill any remaining FFmpeg processes system-wide
try:
if os.name != 'nt': # Unix-like systems
os.system("pkill -9 ffmpeg")
else: # Windows
os.system("taskkill /F /IM ffmpeg.exe")
except Exception as e:
logger.error(f"Error force killing FFmpeg processes: {str(e)}")
# Clean up download directory
if hasattr(cog, "download_path") and cog.download_path.exists():
logger.info("Force cleaning download directory")
try:
await cleanup_downloads(str(cog.download_path))
cog.download_path.rmdir()
await asyncio.wait_for(
cleanup_downloads(str(cog.download_path)),
timeout=FORCE_CLEANUP_TIMEOUT
)
if cog.download_path.exists():
cog.download_path.rmdir()
except Exception as e:
logger.error(f"Error force cleaning download directory: {str(e)}")
# Clear all components
if hasattr(cog, "components"):
logger.info("Force clearing components")
cog.components.clear()
except Exception as e:
logger.error(f"Error during force cleanup: {str(e)}")
finally:
logger.info("Clearing ready flag")
cog.ready.clear()
# Clear all references
cog.bot = None
cog.processor = None
cog.queue_manager = None
cog.update_checker = None
cog.ffmpeg_mgr = None
cog.components = {}
cog.db = None
cog._init_task = None
cog._cleanup_task = None
if hasattr(cog, '_queue_task'):
cog._queue_task = None