mirror of
https://github.com/pacnpal/Pac-cogs.git
synced 2025-12-20 10:51:05 -05:00
Improve file handling and resource management
- Add proper file permission handling and verification - Add retries with exponential backoff for file operations - Add better video file verification - Add proper temp file cleanup - Fix sync/async issues in file operations - Add better error handling and logging - Add proper thread pool management - Fix potential memory leaks - Add better file locking and cleanup
This commit is contained in:
@@ -12,6 +12,9 @@ import tempfile
|
|||||||
import hashlib
|
import hashlib
|
||||||
from functools import partial
|
from functools import partial
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import stat
|
||||||
|
import time
|
||||||
|
|
||||||
from .ffmpeg_manager import FFmpegManager
|
from .ffmpeg_manager import FFmpegManager
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
@@ -26,14 +29,32 @@ class FileCleanupError(Exception):
|
|||||||
"""Raised when file cleanup fails"""
|
"""Raised when file cleanup fails"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class VideoVerificationError(Exception):
|
||||||
|
"""Raised when video verification fails"""
|
||||||
|
pass
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def temp_path_context():
|
def temp_path_context():
|
||||||
"""Context manager for temporary path creation and cleanup"""
|
"""Context manager for temporary path creation and cleanup"""
|
||||||
temp_dir = tempfile.mkdtemp(prefix="videoarchiver_")
|
temp_dir = tempfile.mkdtemp(prefix="videoarchiver_")
|
||||||
try:
|
try:
|
||||||
|
# Ensure proper permissions
|
||||||
|
os.chmod(temp_dir, stat.S_IRWXU)
|
||||||
yield temp_dir
|
yield temp_dir
|
||||||
finally:
|
finally:
|
||||||
try:
|
try:
|
||||||
|
# Ensure all files are deletable
|
||||||
|
for root, dirs, files in os.walk(temp_dir):
|
||||||
|
for d in dirs:
|
||||||
|
try:
|
||||||
|
os.chmod(os.path.join(root, d), stat.S_IRWXU)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
for f in files:
|
||||||
|
try:
|
||||||
|
os.chmod(os.path.join(root, f), stat.S_IRWXU)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error cleaning up temp directory {temp_dir}: {e}")
|
logger.error(f"Error cleaning up temp directory {temp_dir}: {e}")
|
||||||
@@ -41,6 +62,8 @@ def temp_path_context():
|
|||||||
class VideoDownloader:
|
class VideoDownloader:
|
||||||
MAX_RETRIES = 3
|
MAX_RETRIES = 3
|
||||||
RETRY_DELAY = 5 # seconds
|
RETRY_DELAY = 5 # seconds
|
||||||
|
FILE_OP_RETRIES = 3
|
||||||
|
FILE_OP_RETRY_DELAY = 1 # seconds
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -77,10 +100,10 @@ class VideoDownloader:
|
|||||||
"no_warnings": True,
|
"no_warnings": True,
|
||||||
"extract_flat": False,
|
"extract_flat": False,
|
||||||
"concurrent_fragment_downloads": concurrent_downloads,
|
"concurrent_fragment_downloads": concurrent_downloads,
|
||||||
"retries": 3,
|
"retries": self.MAX_RETRIES,
|
||||||
"fragment_retries": 3,
|
"fragment_retries": self.MAX_RETRIES,
|
||||||
"file_access_retries": 3,
|
"file_access_retries": self.FILE_OP_RETRIES,
|
||||||
"extractor_retries": 3,
|
"extractor_retries": self.MAX_RETRIES,
|
||||||
"postprocessor_hooks": [self._check_file_size],
|
"postprocessor_hooks": [self._check_file_size],
|
||||||
"progress_hooks": [self._progress_hook],
|
"progress_hooks": [self._progress_hook],
|
||||||
"ffmpeg_location": ffmpeg_mgr.get_ffmpeg_path(),
|
"ffmpeg_location": ffmpeg_mgr.get_ffmpeg_path(),
|
||||||
@@ -91,7 +114,10 @@ class VideoDownloader:
|
|||||||
try:
|
try:
|
||||||
# Cancel all active downloads
|
# Cancel all active downloads
|
||||||
for file_path in self.active_downloads.values():
|
for file_path in self.active_downloads.values():
|
||||||
secure_delete_file(file_path)
|
try:
|
||||||
|
secure_delete_file(file_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error deleting file during cleanup: {str(e)}")
|
||||||
self.active_downloads.clear()
|
self.active_downloads.clear()
|
||||||
|
|
||||||
# Shutdown thread pool
|
# Shutdown thread pool
|
||||||
@@ -103,24 +129,30 @@ class VideoDownloader:
|
|||||||
def _get_url_patterns(self) -> List[str]:
|
def _get_url_patterns(self) -> List[str]:
|
||||||
"""Get URL patterns for supported sites"""
|
"""Get URL patterns for supported sites"""
|
||||||
patterns = []
|
patterns = []
|
||||||
with yt_dlp.YoutubeDL() as ydl:
|
try:
|
||||||
for extractor in ydl._ies:
|
with yt_dlp.YoutubeDL() as ydl:
|
||||||
if hasattr(extractor, "_VALID_URL") and extractor._VALID_URL:
|
for extractor in ydl._ies:
|
||||||
if not self.enabled_sites or any(
|
if hasattr(extractor, "_VALID_URL") and extractor._VALID_URL:
|
||||||
site.lower() in extractor.IE_NAME.lower()
|
if not self.enabled_sites or any(
|
||||||
for site in self.enabled_sites
|
site.lower() in extractor.IE_NAME.lower()
|
||||||
):
|
for site in self.enabled_sites
|
||||||
patterns.append(extractor._VALID_URL)
|
):
|
||||||
|
patterns.append(extractor._VALID_URL)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting URL patterns: {str(e)}")
|
||||||
return patterns
|
return patterns
|
||||||
|
|
||||||
def _check_file_size(self, info):
|
def _check_file_size(self, info):
|
||||||
"""Check if file size is within limits"""
|
"""Check if file size is within limits"""
|
||||||
if info.get("filepath") and os.path.exists(info["filepath"]):
|
if info.get("filepath") and os.path.exists(info["filepath"]):
|
||||||
size = os.path.getsize(info["filepath"])
|
try:
|
||||||
if size > (self.max_file_size * 1024 * 1024):
|
size = os.path.getsize(info["filepath"])
|
||||||
logger.info(
|
if size > (self.max_file_size * 1024 * 1024):
|
||||||
f"File exceeds size limit, will compress: {info['filepath']}"
|
logger.info(
|
||||||
)
|
f"File exceeds size limit, will compress: {info['filepath']}"
|
||||||
|
)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Error checking file size: {str(e)}")
|
||||||
|
|
||||||
def _progress_hook(self, d):
|
def _progress_hook(self, d):
|
||||||
"""Handle download progress"""
|
"""Handle download progress"""
|
||||||
@@ -134,10 +166,17 @@ class VideoDownloader:
|
|||||||
# Check if file has video stream
|
# Check if file has video stream
|
||||||
video_streams = [s for s in probe['streams'] if s['codec_type'] == 'video']
|
video_streams = [s for s in probe['streams'] if s['codec_type'] == 'video']
|
||||||
if not video_streams:
|
if not video_streams:
|
||||||
return False
|
raise VideoVerificationError("No video streams found")
|
||||||
# Check if duration is valid
|
# Check if duration is valid
|
||||||
duration = float(probe['format'].get('duration', 0))
|
duration = float(probe['format'].get('duration', 0))
|
||||||
return duration > 0
|
if duration <= 0:
|
||||||
|
raise VideoVerificationError("Invalid video duration")
|
||||||
|
# Check if file is readable
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
f.seek(0, 2) # Seek to end
|
||||||
|
if f.tell() == 0:
|
||||||
|
raise VideoVerificationError("Empty file")
|
||||||
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error verifying video file {file_path}: {e}")
|
logger.error(f"Error verifying video file {file_path}: {e}")
|
||||||
return False
|
return False
|
||||||
@@ -163,14 +202,14 @@ class VideoDownloader:
|
|||||||
raise FileNotFoundError("Download completed but file not found")
|
raise FileNotFoundError("Download completed but file not found")
|
||||||
|
|
||||||
if not self._verify_video_file(file_path):
|
if not self._verify_video_file(file_path):
|
||||||
raise Exception("Downloaded file is not a valid video")
|
raise VideoVerificationError("Downloaded file is not a valid video")
|
||||||
|
|
||||||
return True, file_path, ""
|
return True, file_path, ""
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Download attempt {attempt + 1} failed: {str(e)}")
|
logger.error(f"Download attempt {attempt + 1} failed: {str(e)}")
|
||||||
if attempt < self.MAX_RETRIES - 1:
|
if attempt < self.MAX_RETRIES - 1:
|
||||||
await asyncio.sleep(self.RETRY_DELAY)
|
await asyncio.sleep(self.RETRY_DELAY * (attempt + 1)) # Exponential backoff
|
||||||
else:
|
else:
|
||||||
return False, "", f"All download attempts failed: {str(e)}"
|
return False, "", f"All download attempts failed: {str(e)}"
|
||||||
|
|
||||||
@@ -223,23 +262,32 @@ class VideoDownloader:
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
if os.path.exists(compressed_file):
|
if not os.path.exists(compressed_file):
|
||||||
compressed_size = os.path.getsize(compressed_file)
|
raise FileNotFoundError("Compression completed but file not found")
|
||||||
if compressed_size <= (self.max_file_size * 1024 * 1024):
|
|
||||||
secure_delete_file(original_file)
|
# Verify compressed file
|
||||||
return True, compressed_file, ""
|
if not self._verify_video_file(compressed_file):
|
||||||
else:
|
raise VideoVerificationError("Compressed file is not a valid video")
|
||||||
secure_delete_file(compressed_file)
|
|
||||||
return False, "", "Failed to compress to target size"
|
compressed_size = os.path.getsize(compressed_file)
|
||||||
|
if compressed_size <= (self.max_file_size * 1024 * 1024):
|
||||||
|
await self._safe_delete_file(original_file)
|
||||||
|
return True, compressed_file, ""
|
||||||
|
else:
|
||||||
|
await self._safe_delete_file(compressed_file)
|
||||||
|
return False, "", "Failed to compress to target size"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if compressed_file and os.path.exists(compressed_file):
|
if compressed_file and os.path.exists(compressed_file):
|
||||||
secure_delete_file(compressed_file)
|
await self._safe_delete_file(compressed_file)
|
||||||
logger.error(f"Compression error: {str(e)}")
|
logger.error(f"Compression error: {str(e)}")
|
||||||
return False, "", f"Compression error: {str(e)}"
|
return False, "", f"Compression error: {str(e)}"
|
||||||
else:
|
else:
|
||||||
# Move file to final location
|
# Move file to final location
|
||||||
final_path = os.path.join(self.download_path, os.path.basename(original_file))
|
final_path = os.path.join(self.download_path, os.path.basename(original_file))
|
||||||
shutil.move(original_file, final_path)
|
# Use safe move with retries
|
||||||
|
success = await self._safe_move_file(original_file, final_path)
|
||||||
|
if not success:
|
||||||
|
return False, "", "Failed to move file to final location"
|
||||||
return True, final_path, ""
|
return True, final_path, ""
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -253,12 +301,42 @@ class VideoDownloader:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if original_file and os.path.exists(original_file):
|
if original_file and os.path.exists(original_file):
|
||||||
secure_delete_file(original_file)
|
await self._safe_delete_file(original_file)
|
||||||
if compressed_file and os.path.exists(compressed_file) and not compressed_file.startswith(self.download_path):
|
if compressed_file and os.path.exists(compressed_file) and not compressed_file.startswith(self.download_path):
|
||||||
secure_delete_file(compressed_file)
|
await self._safe_delete_file(compressed_file)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error during file cleanup: {str(e)}")
|
logger.error(f"Error during file cleanup: {str(e)}")
|
||||||
|
|
||||||
|
async def _safe_delete_file(self, file_path: str) -> bool:
|
||||||
|
"""Safely delete a file with retries"""
|
||||||
|
for attempt in range(self.FILE_OP_RETRIES):
|
||||||
|
try:
|
||||||
|
if secure_delete_file(file_path):
|
||||||
|
return True
|
||||||
|
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Delete attempt {attempt + 1} failed: {str(e)}")
|
||||||
|
if attempt == self.FILE_OP_RETRIES - 1:
|
||||||
|
return False
|
||||||
|
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def _safe_move_file(self, src: str, dst: str) -> bool:
|
||||||
|
"""Safely move a file with retries"""
|
||||||
|
for attempt in range(self.FILE_OP_RETRIES):
|
||||||
|
try:
|
||||||
|
# Ensure destination directory exists
|
||||||
|
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
||||||
|
# Try to move the file
|
||||||
|
shutil.move(src, dst)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Move attempt {attempt + 1} failed: {str(e)}")
|
||||||
|
if attempt == self.FILE_OP_RETRIES - 1:
|
||||||
|
return False
|
||||||
|
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
|
||||||
|
return False
|
||||||
|
|
||||||
def is_supported_url(self, url: str) -> bool:
|
def is_supported_url(self, url: str) -> bool:
|
||||||
"""Check if URL is supported"""
|
"""Check if URL is supported"""
|
||||||
try:
|
try:
|
||||||
@@ -266,7 +344,8 @@ class VideoDownloader:
|
|||||||
# Try to extract info without downloading
|
# Try to extract info without downloading
|
||||||
ie = ydl.extract_info(url, download=False, process=False)
|
ie = ydl.extract_info(url, download=False, process=False)
|
||||||
return ie is not None
|
return ie is not None
|
||||||
except:
|
except Exception as e:
|
||||||
|
logger.error(f"Error checking URL support: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
@@ -323,6 +402,12 @@ def secure_delete_file(file_path: str, passes: int = 3, timeout: int = 30) -> bo
|
|||||||
start_time = datetime.now()
|
start_time = datetime.now()
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
# Ensure file is writable
|
||||||
|
try:
|
||||||
|
os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
file_size = os.path.getsize(file_path)
|
file_size = os.path.getsize(file_path)
|
||||||
for _ in range(passes):
|
for _ in range(passes):
|
||||||
with open(file_path, "wb") as f:
|
with open(file_path, "wb") as f:
|
||||||
@@ -346,7 +431,7 @@ def secure_delete_file(file_path: str, passes: int = 3, timeout: int = 30) -> bo
|
|||||||
logger.error(f"Timeout while trying to delete {file_path}")
|
logger.error(f"Timeout while trying to delete {file_path}")
|
||||||
return False
|
return False
|
||||||
# Wait briefly before retry
|
# Wait briefly before retry
|
||||||
asyncio.sleep(0.1)
|
time.sleep(0.1)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return True
|
return True
|
||||||
@@ -356,9 +441,10 @@ def secure_delete_file(file_path: str, passes: int = 3, timeout: int = 30) -> bo
|
|||||||
# Last resort: try force delete
|
# Last resort: try force delete
|
||||||
try:
|
try:
|
||||||
if os.path.exists(file_path):
|
if os.path.exists(file_path):
|
||||||
|
os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR)
|
||||||
Path(file_path).unlink(missing_ok=True)
|
Path(file_path).unlink(missing_ok=True)
|
||||||
except Exception:
|
except Exception as e2:
|
||||||
pass
|
logger.error(f"Force delete failed: {str(e2)}")
|
||||||
return not os.path.exists(file_path)
|
return not os.path.exists(file_path)
|
||||||
|
|
||||||
|
|
||||||
@@ -369,10 +455,14 @@ def cleanup_downloads(download_path: str) -> None:
|
|||||||
# Delete all files in the directory
|
# Delete all files in the directory
|
||||||
for file_path in Path(download_path).glob("**/*"):
|
for file_path in Path(download_path).glob("**/*"):
|
||||||
if file_path.is_file():
|
if file_path.is_file():
|
||||||
secure_delete_file(str(file_path))
|
try:
|
||||||
|
if not secure_delete_file(str(file_path)):
|
||||||
|
logger.error(f"Failed to delete file: {file_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error deleting file {file_path}: {str(e)}")
|
||||||
|
|
||||||
# Clean up empty subdirectories
|
# Clean up empty subdirectories
|
||||||
for dir_path in Path(download_path).glob("**/*"):
|
for dir_path in sorted(Path(download_path).glob("**/*"), reverse=True):
|
||||||
if dir_path.is_dir():
|
if dir_path.is_dir():
|
||||||
try:
|
try:
|
||||||
dir_path.rmdir() # Will only remove if empty
|
dir_path.rmdir() # Will only remove if empty
|
||||||
|
|||||||
Reference in New Issue
Block a user