Enhanced FFmpeg Integration:

Added robust error handling and logging
Improved binary verification and initialization
Added proper GPU detection and hardware acceleration
Optimized encoding parameters for different content types
Improved File Operations:

Added retry mechanisms for file operations
Enhanced temporary directory management
Improved cleanup of failed downloads
Added proper permission handling
Enhanced Queue Management:

Fixed queue manager initialization
Added better error recovery
Improved status tracking and logging
Enhanced cleanup of failed items
Better Error Handling:

Added comprehensive exception hierarchy
Improved error logging and reporting
Added fallback mechanisms for failures
Enhanced error recovery strategies
This commit is contained in:
pacnpal
2024-11-15 03:21:25 +00:00
parent a04c576e0a
commit 8503fc6fdd
13 changed files with 1336 additions and 376 deletions

View File

@@ -4,83 +4,128 @@ import os
import stat
import time
import logging
import shutil
from datetime import datetime
from pathlib import Path
from typing import Optional
logger = logging.getLogger("VideoArchiver")
def secure_delete_file(file_path: str, passes: int = 3, timeout: int = 30) -> bool:
"""Securely delete a file by overwriting it multiple times before removal"""
"""Securely delete a file by overwriting it multiple times before removal
Args:
file_path: Path to the file to delete
passes: Number of overwrite passes (default: 3)
timeout: Maximum time in seconds to attempt deletion (default: 30)
Returns:
bool: True if file was successfully deleted, False otherwise
"""
if not os.path.exists(file_path):
return True
start_time = datetime.now()
while True:
try:
# Get file size before starting
try:
# Ensure file is writable
try:
os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR)
except OSError:
pass
file_size = os.path.getsize(file_path)
for _ in range(passes):
with open(file_path, "wb") as f:
f.write(os.urandom(file_size))
f.flush()
os.fsync(f.fileno())
except OSError:
file_size = 0
logger.warning(f"Could not get size of {file_path}, assuming 0")
# Try multiple deletion methods
try:
os.remove(file_path)
except OSError:
# Ensure file is writable
try:
current_mode = os.stat(file_path).st_mode
os.chmod(file_path, current_mode | stat.S_IWRITE)
except OSError as e:
logger.warning(f"Could not modify permissions of {file_path}: {e}")
# Overwrite file content
if file_size > 0:
for pass_num in range(passes):
try:
os.unlink(file_path)
except OSError:
Path(file_path).unlink(missing_ok=True)
with open(file_path, "wb") as f:
# Write random data
f.write(os.urandom(file_size))
# Ensure data is written to disk
f.flush()
os.fsync(f.fileno())
except OSError as e:
logger.warning(f"Error during pass {pass_num + 1} of overwriting {file_path}: {e}")
continue
# Verify file is gone
if os.path.exists(file_path):
# If file still exists, check timeout
if (datetime.now() - start_time).seconds > timeout:
logger.error(f"Timeout while trying to delete {file_path}")
return False
# Wait briefly before retry
time.sleep(0.1)
continue
# Try multiple deletion methods
deletion_methods = [
lambda p: os.remove(p),
lambda p: os.unlink(p),
lambda p: Path(p).unlink(missing_ok=True),
lambda p: shutil.rmtree(p, ignore_errors=True) if os.path.isdir(p) else os.remove(p)
]
return True
except Exception as e:
logger.error(f"Error during secure delete of {file_path}: {str(e)}")
# Last resort: try force delete
for method in deletion_methods:
try:
if os.path.exists(file_path):
os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR)
Path(file_path).unlink(missing_ok=True)
except Exception as e2:
logger.error(f"Force delete failed: {str(e2)}")
return not os.path.exists(file_path)
method(file_path)
if not os.path.exists(file_path):
logger.debug(f"Successfully deleted {file_path}")
return True
except OSError as e:
logger.debug(f"Deletion method failed for {file_path}: {e}")
continue
# If file still exists, check timeout
while os.path.exists(file_path):
if (datetime.now() - start_time).total_seconds() > timeout:
logger.error(f"Timeout while trying to delete {file_path}")
return False
time.sleep(0.1)
return True
except Exception as e:
logger.error(f"Error during secure deletion of {file_path}: {e}")
# Last resort: try force delete
try:
if os.path.exists(file_path):
os.chmod(file_path, stat.S_IWRITE | stat.S_IREAD)
Path(file_path).unlink(missing_ok=True)
except Exception as e2:
logger.error(f"Force delete failed for {file_path}: {e2}")
return not os.path.exists(file_path)
def cleanup_downloads(download_path: str) -> None:
"""Clean up the downloads directory without removing the directory itself"""
"""Clean up the downloads directory
Args:
download_path: Path to the downloads directory to clean
"""
try:
if os.path.exists(download_path):
# Delete all files in the directory
for file_path in Path(download_path).glob("**/*"):
if file_path.is_file():
try:
if not secure_delete_file(str(file_path)):
logger.error(f"Failed to delete file: {file_path}")
except Exception as e:
logger.error(f"Error deleting file {file_path}: {str(e)}")
# Clean up empty subdirectories
for dir_path in sorted(Path(download_path).glob("**/*"), reverse=True):
if dir_path.is_dir():
try:
dir_path.rmdir() # Will only remove if empty
except OSError:
pass # Directory not empty or other error
if not os.path.exists(download_path):
return
# Delete all files in the directory
for entry in os.scandir(download_path):
try:
path = entry.path
if entry.is_file():
if not secure_delete_file(path):
logger.error(f"Failed to delete file: {path}")
elif entry.is_dir():
shutil.rmtree(path, ignore_errors=True)
except Exception as e:
logger.error(f"Error processing {entry.path}: {e}")
continue
# Clean up empty subdirectories
for root, dirs, files in os.walk(download_path, topdown=False):
for name in dirs:
try:
dir_path = os.path.join(root, name)
if not os.listdir(dir_path): # Check if directory is empty
os.rmdir(dir_path)
except Exception as e:
logger.error(f"Error removing directory {name}: {e}")
except Exception as e:
logger.error(f"Error during cleanup: {str(e)}")
logger.error(f"Error during cleanup of {download_path}: {e}")

View File

@@ -6,31 +6,73 @@ import shutil
import stat
import logging
import contextlib
import time
logger = logging.getLogger("VideoArchiver")
@contextlib.contextmanager
def temp_path_context():
"""Context manager for temporary path creation and cleanup"""
temp_dir = tempfile.mkdtemp(prefix="videoarchiver_")
temp_dir = None
try:
# Ensure proper permissions
# Create temp directory with proper permissions
temp_dir = tempfile.mkdtemp(prefix="videoarchiver_")
logger.debug(f"Created temporary directory: {temp_dir}")
# Ensure directory has rwx permissions for user only
os.chmod(temp_dir, stat.S_IRWXU)
# Verify directory exists and is writable
if not os.path.exists(temp_dir):
raise OSError(f"Failed to create temporary directory: {temp_dir}")
if not os.access(temp_dir, os.W_OK):
raise OSError(f"Temporary directory is not writable: {temp_dir}")
yield temp_dir
except Exception as e:
logger.error(f"Error in temp_path_context: {str(e)}")
raise
finally:
try:
# Ensure all files are deletable
for root, dirs, files in os.walk(temp_dir):
for d in dirs:
if temp_dir and os.path.exists(temp_dir):
try:
# Ensure all files are deletable with retries
max_retries = 3
for attempt in range(max_retries):
try:
os.chmod(os.path.join(root, d), stat.S_IRWXU)
except OSError:
pass
for f in files:
try:
os.chmod(os.path.join(root, f), stat.S_IRWXU)
except OSError:
pass
shutil.rmtree(temp_dir, ignore_errors=True)
except Exception as e:
logger.error(f"Error cleaning up temp directory {temp_dir}: {e}")
# Set permissions recursively
for root, dirs, files in os.walk(temp_dir):
for d in dirs:
try:
dir_path = os.path.join(root, d)
os.chmod(dir_path, stat.S_IRWXU)
except OSError as e:
logger.warning(f"Failed to set permissions on directory {dir_path}: {e}")
for f in files:
try:
file_path = os.path.join(root, f)
os.chmod(file_path, stat.S_IRWXU)
except OSError as e:
logger.warning(f"Failed to set permissions on file {file_path}: {e}")
# Try to remove the directory
shutil.rmtree(temp_dir, ignore_errors=True)
# Verify directory is gone
if not os.path.exists(temp_dir):
logger.debug(f"Successfully cleaned up temporary directory: {temp_dir}")
break
if attempt < max_retries - 1:
time.sleep(1) # Wait before retry
except Exception as e:
if attempt == max_retries - 1:
logger.error(f"Failed to clean up temporary directory {temp_dir} after {max_retries} attempts: {e}")
elif attempt < max_retries - 1:
time.sleep(1) # Wait before retry
continue
except Exception as e:
logger.error(f"Error during temp directory cleanup: {str(e)}")

View File

@@ -33,16 +33,26 @@ class VideoDownloader:
enabled_sites: Optional[List[str]] = None,
concurrent_downloads: int = 3,
):
self.download_path = download_path
# Ensure download path exists with proper permissions
self.download_path = Path(download_path)
self.download_path.mkdir(parents=True, exist_ok=True)
# Ensure directory has rwx permissions for user and rx for group/others
os.chmod(str(self.download_path), 0o755)
logger.info(f"Initialized download directory: {self.download_path}")
self.video_format = video_format
self.max_quality = max_quality
self.max_file_size = max_file_size
self.enabled_sites = enabled_sites
self.url_patterns = self._get_url_patterns()
# Initialize FFmpeg manager
self.ffmpeg_mgr = FFmpegManager()
ffmpeg_path = self.ffmpeg_mgr.get_ffmpeg_path()
if not os.path.exists(ffmpeg_path):
raise FileNotFoundError(f"FFmpeg not found at {ffmpeg_path}")
logger.info(f"Using FFmpeg from: {ffmpeg_path}")
# Create thread pool for this instance
self.download_pool = ThreadPoolExecutor(
max_workers=max(1, min(5, concurrent_downloads)),
@@ -53,13 +63,14 @@ class VideoDownloader:
self.active_downloads: Dict[str, str] = {}
self._downloads_lock = asyncio.Lock()
# Configure yt-dlp options
# Configure yt-dlp options with absolute FFmpeg path
self.ydl_opts = {
"format": f"bestvideo[height<={max_quality}]+bestaudio/best[height<={max_quality}]",
"format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best", # More flexible format
"outtmpl": "%(title)s.%(ext)s", # Base filename only, path added later
"merge_output_format": video_format,
"quiet": True,
"no_warnings": True,
"quiet": False, # Enable output for debugging
"no_warnings": False, # Show warnings
"verbose": True, # Enable verbose output
"extract_flat": False,
"concurrent_fragment_downloads": concurrent_downloads,
"retries": self.MAX_RETRIES,
@@ -68,8 +79,22 @@ class VideoDownloader:
"extractor_retries": self.MAX_RETRIES,
"postprocessor_hooks": [self._check_file_size],
"progress_hooks": [self._progress_hook],
"ffmpeg_location": self.ffmpeg_mgr.get_ffmpeg_path(),
"ffmpeg_location": str(ffmpeg_path), # Convert Path to string
"prefer_ffmpeg": True, # Force use of FFmpeg
"hls_prefer_ffmpeg": True, # Use FFmpeg for HLS
"logger": logger, # Use our logger
"ignoreerrors": True, # Don't stop on download errors
"no_color": True, # Disable ANSI colors in output
"geo_bypass": True, # Try to bypass geo-restrictions
"socket_timeout": 30, # Increase timeout
"external_downloader": {
"m3u8": "ffmpeg", # Use FFmpeg for m3u8 downloads
},
"external_downloader_args": {
"ffmpeg": ["-v", "warning"], # Reduce FFmpeg verbosity
}
}
logger.info("VideoDownloader initialized successfully")
def __del__(self):
"""Ensure thread pool is shutdown and files are cleaned up"""
@@ -83,7 +108,7 @@ class VideoDownloader:
self.active_downloads.clear()
# Shutdown thread pool
if hasattr(self, 'download_pool'):
if hasattr(self, "download_pool"):
self.download_pool.shutdown(wait=True)
except Exception as e:
logger.error(f"Error during VideoDownloader cleanup: {str(e)}")
@@ -94,7 +119,7 @@ class VideoDownloader:
try:
with yt_dlp.YoutubeDL() as ydl:
for ie in ydl._ies:
if hasattr(ie, '_VALID_URL') and ie._VALID_URL:
if hasattr(ie, "_VALID_URL") and ie._VALID_URL:
if not self.enabled_sites or any(
site.lower() in ie.IE_NAME.lower()
for site in self.enabled_sites
@@ -120,21 +145,29 @@ class VideoDownloader:
"""Handle download progress"""
if d["status"] == "finished":
logger.info(f"Download completed: {d['filename']}")
elif d["status"] == "downloading":
try:
percent = d.get("_percent_str", "N/A")
speed = d.get("_speed_str", "N/A")
eta = d.get("_eta_str", "N/A")
logger.debug(f"Download progress: {percent} at {speed}, ETA: {eta}")
except Exception as e:
logger.debug(f"Error logging progress: {str(e)}")
def _verify_video_file(self, file_path: str) -> bool:
"""Verify video file integrity"""
try:
probe = ffmpeg.probe(file_path)
# Check if file has video stream
video_streams = [s for s in probe['streams'] if s['codec_type'] == 'video']
video_streams = [s for s in probe["streams"] if s["codec_type"] == "video"]
if not video_streams:
raise VideoVerificationError("No video streams found")
# Check if duration is valid
duration = float(probe['format'].get('duration', 0))
duration = float(probe["format"].get("duration", 0))
if duration <= 0:
raise VideoVerificationError("Invalid video duration")
# Check if file is readable
with open(file_path, 'rb') as f:
with open(file_path, "rb") as f:
f.seek(0, 2) # Seek to end
if f.tell() == 0:
raise VideoVerificationError("Empty file")
@@ -148,12 +181,11 @@ class VideoDownloader:
for attempt in range(self.MAX_RETRIES):
try:
ydl_opts = self.ydl_opts.copy()
ydl_opts['outtmpl'] = os.path.join(temp_dir, ydl_opts['outtmpl'])
ydl_opts["outtmpl"] = os.path.join(temp_dir, ydl_opts["outtmpl"])
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = await asyncio.get_event_loop().run_in_executor(
self.download_pool,
lambda: ydl.extract_info(url, download=True)
self.download_pool, lambda: ydl.extract_info(url, download=True)
)
if info is None:
@@ -171,7 +203,9 @@ class VideoDownloader:
except Exception as e:
logger.error(f"Download attempt {attempt + 1} failed: {str(e)}")
if attempt < self.MAX_RETRIES - 1:
await asyncio.sleep(self.RETRY_DELAY * (attempt + 1)) # Exponential backoff
await asyncio.sleep(
self.RETRY_DELAY * (attempt + 1)
) # Exponential backoff
else:
return False, "", f"All download attempts failed: {str(e)}"
@@ -206,7 +240,7 @@ class VideoDownloader:
)
compressed_file = os.path.join(
self.download_path,
f"compressed_{os.path.basename(original_file)}"
f"compressed_{os.path.basename(original_file)}",
)
# Configure ffmpeg with optimal parameters
@@ -225,11 +259,15 @@ class VideoDownloader:
)
if not os.path.exists(compressed_file):
raise FileNotFoundError("Compression completed but file not found")
raise FileNotFoundError(
"Compression completed but file not found"
)
# Verify compressed file
if not self._verify_video_file(compressed_file):
raise VideoVerificationError("Compressed file is not a valid video")
raise VideoVerificationError(
"Compressed file is not a valid video"
)
compressed_size = os.path.getsize(compressed_file)
if compressed_size <= (self.max_file_size * 1024 * 1024):
@@ -245,7 +283,9 @@ class VideoDownloader:
return False, "", f"Compression error: {str(e)}"
else:
# Move file to final location
final_path = os.path.join(self.download_path, os.path.basename(original_file))
final_path = os.path.join(
self.download_path, os.path.basename(original_file)
)
# Use safe move with retries
success = await self._safe_move_file(original_file, final_path)
if not success:
@@ -264,7 +304,11 @@ class VideoDownloader:
try:
if original_file and os.path.exists(original_file):
await self._safe_delete_file(original_file)
if compressed_file and os.path.exists(compressed_file) and not compressed_file.startswith(self.download_path):
if (
compressed_file
and os.path.exists(compressed_file)
and not compressed_file.startswith(self.download_path)
):
await self._safe_delete_file(compressed_file)
except Exception as e:
logger.error(f"Error during file cleanup: {str(e)}")