Improve file handling and resource management

- Add proper file permission handling and verification
- Add retries with exponential backoff for file operations
- Add better video file verification
- Add proper temp file cleanup
- Fix sync/async issues in file operations
- Add better error handling and logging
- Add proper thread pool management
- Fix potential memory leaks
- Add better file locking and cleanup
This commit is contained in:
pacnpal
2024-11-14 20:14:31 +00:00
parent 2b9a070896
commit dcb1587d4f

View File

@@ -12,6 +12,9 @@ import tempfile
import hashlib import hashlib
from functools import partial from functools import partial
import contextlib import contextlib
import stat
import time
from .ffmpeg_manager import FFmpegManager from .ffmpeg_manager import FFmpegManager
logging.basicConfig( logging.basicConfig(
@@ -26,14 +29,32 @@ class FileCleanupError(Exception):
"""Raised when file cleanup fails""" """Raised when file cleanup fails"""
pass pass
class VideoVerificationError(Exception):
"""Raised when video verification fails"""
pass
@contextlib.contextmanager @contextlib.contextmanager
def temp_path_context(): def temp_path_context():
"""Context manager for temporary path creation and cleanup""" """Context manager for temporary path creation and cleanup"""
temp_dir = tempfile.mkdtemp(prefix="videoarchiver_") temp_dir = tempfile.mkdtemp(prefix="videoarchiver_")
try: try:
# Ensure proper permissions
os.chmod(temp_dir, stat.S_IRWXU)
yield temp_dir yield temp_dir
finally: finally:
try: try:
# Ensure all files are deletable
for root, dirs, files in os.walk(temp_dir):
for d in dirs:
try:
os.chmod(os.path.join(root, d), stat.S_IRWXU)
except OSError:
pass
for f in files:
try:
os.chmod(os.path.join(root, f), stat.S_IRWXU)
except OSError:
pass
shutil.rmtree(temp_dir, ignore_errors=True) shutil.rmtree(temp_dir, ignore_errors=True)
except Exception as e: except Exception as e:
logger.error(f"Error cleaning up temp directory {temp_dir}: {e}") logger.error(f"Error cleaning up temp directory {temp_dir}: {e}")
@@ -41,6 +62,8 @@ def temp_path_context():
class VideoDownloader: class VideoDownloader:
MAX_RETRIES = 3 MAX_RETRIES = 3
RETRY_DELAY = 5 # seconds RETRY_DELAY = 5 # seconds
FILE_OP_RETRIES = 3
FILE_OP_RETRY_DELAY = 1 # seconds
def __init__( def __init__(
self, self,
@@ -77,10 +100,10 @@ class VideoDownloader:
"no_warnings": True, "no_warnings": True,
"extract_flat": False, "extract_flat": False,
"concurrent_fragment_downloads": concurrent_downloads, "concurrent_fragment_downloads": concurrent_downloads,
"retries": 3, "retries": self.MAX_RETRIES,
"fragment_retries": 3, "fragment_retries": self.MAX_RETRIES,
"file_access_retries": 3, "file_access_retries": self.FILE_OP_RETRIES,
"extractor_retries": 3, "extractor_retries": self.MAX_RETRIES,
"postprocessor_hooks": [self._check_file_size], "postprocessor_hooks": [self._check_file_size],
"progress_hooks": [self._progress_hook], "progress_hooks": [self._progress_hook],
"ffmpeg_location": ffmpeg_mgr.get_ffmpeg_path(), "ffmpeg_location": ffmpeg_mgr.get_ffmpeg_path(),
@@ -91,7 +114,10 @@ class VideoDownloader:
try: try:
# Cancel all active downloads # Cancel all active downloads
for file_path in self.active_downloads.values(): for file_path in self.active_downloads.values():
secure_delete_file(file_path) try:
secure_delete_file(file_path)
except Exception as e:
logger.error(f"Error deleting file during cleanup: {str(e)}")
self.active_downloads.clear() self.active_downloads.clear()
# Shutdown thread pool # Shutdown thread pool
@@ -103,24 +129,30 @@ class VideoDownloader:
def _get_url_patterns(self) -> List[str]: def _get_url_patterns(self) -> List[str]:
"""Get URL patterns for supported sites""" """Get URL patterns for supported sites"""
patterns = [] patterns = []
with yt_dlp.YoutubeDL() as ydl: try:
for extractor in ydl._ies: with yt_dlp.YoutubeDL() as ydl:
if hasattr(extractor, "_VALID_URL") and extractor._VALID_URL: for extractor in ydl._ies:
if not self.enabled_sites or any( if hasattr(extractor, "_VALID_URL") and extractor._VALID_URL:
site.lower() in extractor.IE_NAME.lower() if not self.enabled_sites or any(
for site in self.enabled_sites site.lower() in extractor.IE_NAME.lower()
): for site in self.enabled_sites
patterns.append(extractor._VALID_URL) ):
patterns.append(extractor._VALID_URL)
except Exception as e:
logger.error(f"Error getting URL patterns: {str(e)}")
return patterns return patterns
def _check_file_size(self, info): def _check_file_size(self, info):
"""Check if file size is within limits""" """Check if file size is within limits"""
if info.get("filepath") and os.path.exists(info["filepath"]): if info.get("filepath") and os.path.exists(info["filepath"]):
size = os.path.getsize(info["filepath"]) try:
if size > (self.max_file_size * 1024 * 1024): size = os.path.getsize(info["filepath"])
logger.info( if size > (self.max_file_size * 1024 * 1024):
f"File exceeds size limit, will compress: {info['filepath']}" logger.info(
) f"File exceeds size limit, will compress: {info['filepath']}"
)
except OSError as e:
logger.error(f"Error checking file size: {str(e)}")
def _progress_hook(self, d): def _progress_hook(self, d):
"""Handle download progress""" """Handle download progress"""
@@ -134,10 +166,17 @@ class VideoDownloader:
# Check if file has video stream # Check if file has video stream
video_streams = [s for s in probe['streams'] if s['codec_type'] == 'video'] video_streams = [s for s in probe['streams'] if s['codec_type'] == 'video']
if not video_streams: if not video_streams:
return False raise VideoVerificationError("No video streams found")
# Check if duration is valid # Check if duration is valid
duration = float(probe['format'].get('duration', 0)) duration = float(probe['format'].get('duration', 0))
return duration > 0 if duration <= 0:
raise VideoVerificationError("Invalid video duration")
# Check if file is readable
with open(file_path, 'rb') as f:
f.seek(0, 2) # Seek to end
if f.tell() == 0:
raise VideoVerificationError("Empty file")
return True
except Exception as e: except Exception as e:
logger.error(f"Error verifying video file {file_path}: {e}") logger.error(f"Error verifying video file {file_path}: {e}")
return False return False
@@ -163,14 +202,14 @@ class VideoDownloader:
raise FileNotFoundError("Download completed but file not found") raise FileNotFoundError("Download completed but file not found")
if not self._verify_video_file(file_path): if not self._verify_video_file(file_path):
raise Exception("Downloaded file is not a valid video") raise VideoVerificationError("Downloaded file is not a valid video")
return True, file_path, "" return True, file_path, ""
except Exception as e: except Exception as e:
logger.error(f"Download attempt {attempt + 1} failed: {str(e)}") logger.error(f"Download attempt {attempt + 1} failed: {str(e)}")
if attempt < self.MAX_RETRIES - 1: if attempt < self.MAX_RETRIES - 1:
await asyncio.sleep(self.RETRY_DELAY) await asyncio.sleep(self.RETRY_DELAY * (attempt + 1)) # Exponential backoff
else: else:
return False, "", f"All download attempts failed: {str(e)}" return False, "", f"All download attempts failed: {str(e)}"
@@ -223,23 +262,32 @@ class VideoDownloader:
), ),
) )
if os.path.exists(compressed_file): if not os.path.exists(compressed_file):
compressed_size = os.path.getsize(compressed_file) raise FileNotFoundError("Compression completed but file not found")
if compressed_size <= (self.max_file_size * 1024 * 1024):
secure_delete_file(original_file) # Verify compressed file
return True, compressed_file, "" if not self._verify_video_file(compressed_file):
else: raise VideoVerificationError("Compressed file is not a valid video")
secure_delete_file(compressed_file)
return False, "", "Failed to compress to target size" compressed_size = os.path.getsize(compressed_file)
if compressed_size <= (self.max_file_size * 1024 * 1024):
await self._safe_delete_file(original_file)
return True, compressed_file, ""
else:
await self._safe_delete_file(compressed_file)
return False, "", "Failed to compress to target size"
except Exception as e: except Exception as e:
if compressed_file and os.path.exists(compressed_file): if compressed_file and os.path.exists(compressed_file):
secure_delete_file(compressed_file) await self._safe_delete_file(compressed_file)
logger.error(f"Compression error: {str(e)}") logger.error(f"Compression error: {str(e)}")
return False, "", f"Compression error: {str(e)}" return False, "", f"Compression error: {str(e)}"
else: else:
# Move file to final location # Move file to final location
final_path = os.path.join(self.download_path, os.path.basename(original_file)) final_path = os.path.join(self.download_path, os.path.basename(original_file))
shutil.move(original_file, final_path) # Use safe move with retries
success = await self._safe_move_file(original_file, final_path)
if not success:
return False, "", "Failed to move file to final location"
return True, final_path, "" return True, final_path, ""
except Exception as e: except Exception as e:
@@ -253,12 +301,42 @@ class VideoDownloader:
try: try:
if original_file and os.path.exists(original_file): if original_file and os.path.exists(original_file):
secure_delete_file(original_file) await self._safe_delete_file(original_file)
if compressed_file and os.path.exists(compressed_file) and not compressed_file.startswith(self.download_path): if compressed_file and os.path.exists(compressed_file) and not compressed_file.startswith(self.download_path):
secure_delete_file(compressed_file) await self._safe_delete_file(compressed_file)
except Exception as e: except Exception as e:
logger.error(f"Error during file cleanup: {str(e)}") logger.error(f"Error during file cleanup: {str(e)}")
async def _safe_delete_file(self, file_path: str) -> bool:
"""Safely delete a file with retries"""
for attempt in range(self.FILE_OP_RETRIES):
try:
if secure_delete_file(file_path):
return True
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
except Exception as e:
logger.error(f"Delete attempt {attempt + 1} failed: {str(e)}")
if attempt == self.FILE_OP_RETRIES - 1:
return False
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
return False
async def _safe_move_file(self, src: str, dst: str) -> bool:
"""Safely move a file with retries"""
for attempt in range(self.FILE_OP_RETRIES):
try:
# Ensure destination directory exists
os.makedirs(os.path.dirname(dst), exist_ok=True)
# Try to move the file
shutil.move(src, dst)
return True
except Exception as e:
logger.error(f"Move attempt {attempt + 1} failed: {str(e)}")
if attempt == self.FILE_OP_RETRIES - 1:
return False
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
return False
def is_supported_url(self, url: str) -> bool: def is_supported_url(self, url: str) -> bool:
"""Check if URL is supported""" """Check if URL is supported"""
try: try:
@@ -266,7 +344,8 @@ class VideoDownloader:
# Try to extract info without downloading # Try to extract info without downloading
ie = ydl.extract_info(url, download=False, process=False) ie = ydl.extract_info(url, download=False, process=False)
return ie is not None return ie is not None
except: except Exception as e:
logger.error(f"Error checking URL support: {str(e)}")
return False return False
@@ -323,6 +402,12 @@ def secure_delete_file(file_path: str, passes: int = 3, timeout: int = 30) -> bo
start_time = datetime.now() start_time = datetime.now()
while True: while True:
try: try:
# Ensure file is writable
try:
os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR)
except OSError:
pass
file_size = os.path.getsize(file_path) file_size = os.path.getsize(file_path)
for _ in range(passes): for _ in range(passes):
with open(file_path, "wb") as f: with open(file_path, "wb") as f:
@@ -346,7 +431,7 @@ def secure_delete_file(file_path: str, passes: int = 3, timeout: int = 30) -> bo
logger.error(f"Timeout while trying to delete {file_path}") logger.error(f"Timeout while trying to delete {file_path}")
return False return False
# Wait briefly before retry # Wait briefly before retry
asyncio.sleep(0.1) time.sleep(0.1)
continue continue
return True return True
@@ -356,9 +441,10 @@ def secure_delete_file(file_path: str, passes: int = 3, timeout: int = 30) -> bo
# Last resort: try force delete # Last resort: try force delete
try: try:
if os.path.exists(file_path): if os.path.exists(file_path):
os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR)
Path(file_path).unlink(missing_ok=True) Path(file_path).unlink(missing_ok=True)
except Exception: except Exception as e2:
pass logger.error(f"Force delete failed: {str(e2)}")
return not os.path.exists(file_path) return not os.path.exists(file_path)
@@ -369,10 +455,14 @@ def cleanup_downloads(download_path: str) -> None:
# Delete all files in the directory # Delete all files in the directory
for file_path in Path(download_path).glob("**/*"): for file_path in Path(download_path).glob("**/*"):
if file_path.is_file(): if file_path.is_file():
secure_delete_file(str(file_path)) try:
if not secure_delete_file(str(file_path)):
logger.error(f"Failed to delete file: {file_path}")
except Exception as e:
logger.error(f"Error deleting file {file_path}: {str(e)}")
# Clean up empty subdirectories # Clean up empty subdirectories
for dir_path in Path(download_path).glob("**/*"): for dir_path in sorted(Path(download_path).glob("**/*"), reverse=True):
if dir_path.is_dir(): if dir_path.is_dir():
try: try:
dir_path.rmdir() # Will only remove if empty dir_path.rmdir() # Will only remove if empty