Core Systems:

Component-based architecture with lifecycle management
Enhanced error handling and recovery mechanisms
Comprehensive state management and tracking
Event-driven architecture with monitoring
Queue Management:

Multiple processing strategies for different scenarios
Advanced state management with recovery
Comprehensive metrics and health monitoring
Sophisticated cleanup system with multiple strategies
Processing Pipeline:

Enhanced message handling with validation
Improved URL extraction and processing
Better queue management and monitoring
Advanced cleanup mechanisms
Overall Benefits:

Better code organization and maintainability
Improved error handling and recovery
Enhanced monitoring and reporting
More robust and reliable system
This commit is contained in:
pacnpal
2024-11-16 05:01:29 +00:00
parent 537a325807
commit a4ca6e8ea6
47 changed files with 11085 additions and 2110 deletions

View File

@@ -0,0 +1,330 @@
"""Module for managing video compression"""
import os
import logging
import asyncio
import json
import subprocess
from datetime import datetime
from typing import Dict, Optional, Tuple, Callable, Set
from .exceptions import CompressionError, VideoVerificationError
logger = logging.getLogger("CompressionManager")
class CompressionManager:
"""Manages video compression operations"""
def __init__(self, ffmpeg_mgr, max_file_size: int):
self.ffmpeg_mgr = ffmpeg_mgr
self.max_file_size = max_file_size * 1024 * 1024 # Convert to bytes
self._active_processes: Set[subprocess.Popen] = set()
self._processes_lock = asyncio.Lock()
self._shutting_down = False
async def compress_video(
self,
input_file: str,
output_file: str,
progress_callback: Optional[Callable[[float], None]] = None
) -> Tuple[bool, str]:
"""Compress a video file
Args:
input_file: Path to input video file
output_file: Path to output video file
progress_callback: Optional callback for compression progress
Returns:
Tuple[bool, str]: (Success status, Error message if any)
"""
if self._shutting_down:
return False, "Compression manager is shutting down"
try:
# Get optimal compression parameters
compression_params = self.ffmpeg_mgr.get_compression_params(
input_file,
self.max_file_size // (1024 * 1024) # Convert to MB
)
# Try hardware acceleration first
success, error = await self._try_compression(
input_file,
output_file,
compression_params,
progress_callback,
use_hardware=True
)
# Fall back to CPU if hardware acceleration fails
if not success:
logger.warning(f"Hardware acceleration failed: {error}, falling back to CPU encoding")
success, error = await self._try_compression(
input_file,
output_file,
compression_params,
progress_callback,
use_hardware=False
)
if not success:
return False, f"Compression failed: {error}"
# Verify output file
if not await self._verify_output(input_file, output_file):
return False, "Output file verification failed"
return True, ""
except Exception as e:
logger.error(f"Error during compression: {e}")
return False, str(e)
async def _try_compression(
self,
input_file: str,
output_file: str,
params: Dict[str, str],
progress_callback: Optional[Callable[[float], None]],
use_hardware: bool
) -> Tuple[bool, str]:
"""Attempt video compression with given parameters"""
if self._shutting_down:
return False, "Compression manager is shutting down"
try:
# Build FFmpeg command
cmd = await self._build_ffmpeg_command(
input_file,
output_file,
params,
use_hardware
)
# Get video duration for progress calculation
duration = await self._get_video_duration(input_file)
# Initialize compression progress tracking
await self._init_compression_progress(
input_file,
params,
use_hardware,
duration
)
# Run compression
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
# Track the process
async with self._processes_lock:
self._active_processes.add(process)
try:
success = await self._monitor_compression(
process,
input_file,
output_file,
duration,
progress_callback
)
return success, ""
finally:
async with self._processes_lock:
self._active_processes.discard(process)
except Exception as e:
return False, str(e)
async def _build_ffmpeg_command(
self,
input_file: str,
output_file: str,
params: Dict[str, str],
use_hardware: bool
) -> List[str]:
"""Build FFmpeg command with appropriate parameters"""
ffmpeg_path = str(self.ffmpeg_mgr.get_ffmpeg_path())
cmd = [ffmpeg_path, "-y", "-i", input_file, "-progress", "pipe:1"]
# Modify parameters for hardware acceleration
if use_hardware:
gpu_info = self.ffmpeg_mgr.gpu_info
if gpu_info["nvidia"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_nvenc"
elif gpu_info["amd"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_amf"
elif gpu_info["intel"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_qsv"
else:
params["c:v"] = "libx264"
# Add parameters to command
for key, value in params.items():
cmd.extend([f"-{key}", str(value)])
cmd.append(output_file)
return cmd
async def _monitor_compression(
self,
process: asyncio.subprocess.Process,
input_file: str,
output_file: str,
duration: float,
progress_callback: Optional[Callable[[float], None]]
) -> bool:
"""Monitor compression progress"""
start_time = datetime.utcnow()
while True:
if self._shutting_down:
process.terminate()
return False
line = await process.stdout.readline()
if not line:
break
try:
await self._update_progress(
line.decode().strip(),
input_file,
output_file,
duration,
start_time,
progress_callback
)
except Exception as e:
logger.error(f"Error updating progress: {e}")
await process.wait()
return os.path.exists(output_file)
async def _verify_output(
self,
input_file: str,
output_file: str
) -> bool:
"""Verify compressed output file"""
try:
# Check file exists and is not empty
if not os.path.exists(output_file) or os.path.getsize(output_file) == 0:
return False
# Check file size is within limit
if os.path.getsize(output_file) > self.max_file_size:
return False
# Verify video integrity
return await self.ffmpeg_mgr.verify_video_file(output_file)
except Exception as e:
logger.error(f"Error verifying output file: {e}")
return False
async def cleanup(self) -> None:
"""Clean up resources"""
self._shutting_down = True
await self._terminate_processes()
async def force_cleanup(self) -> None:
"""Force cleanup of resources"""
self._shutting_down = True
await self._kill_processes()
async def _terminate_processes(self) -> None:
"""Terminate active processes gracefully"""
async with self._processes_lock:
for process in self._active_processes:
try:
process.terminate()
await asyncio.sleep(0.1)
if process.returncode is None:
process.kill()
except Exception as e:
logger.error(f"Error terminating process: {e}")
self._active_processes.clear()
async def _kill_processes(self) -> None:
"""Kill active processes immediately"""
async with self._processes_lock:
for process in self._active_processes:
try:
process.kill()
except Exception as e:
logger.error(f"Error killing process: {e}")
self._active_processes.clear()
async def _get_video_duration(self, file_path: str) -> float:
"""Get video duration in seconds"""
try:
return await self.ffmpeg_mgr.get_video_duration(file_path)
except Exception as e:
logger.error(f"Error getting video duration: {e}")
return 0
async def _init_compression_progress(
self,
input_file: str,
params: Dict[str, str],
use_hardware: bool,
duration: float
) -> None:
"""Initialize compression progress tracking"""
from videoarchiver.processor import _compression_progress
_compression_progress[input_file] = {
"active": True,
"filename": os.path.basename(input_file),
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"elapsed_time": "0:00",
"input_size": os.path.getsize(input_file),
"current_size": 0,
"target_size": self.max_file_size,
"codec": params.get("c:v", "unknown"),
"hardware_accel": use_hardware,
"preset": params.get("preset", "unknown"),
"crf": params.get("crf", "unknown"),
"duration": duration,
"bitrate": params.get("b:v", "unknown"),
"audio_codec": params.get("c:a", "unknown"),
"audio_bitrate": params.get("b:a", "unknown"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
async def _update_progress(
self,
line: str,
input_file: str,
output_file: str,
duration: float,
start_time: datetime,
progress_callback: Optional[Callable[[float], None]]
) -> None:
"""Update compression progress"""
if line.startswith("out_time_ms="):
current_time = int(line.split("=")[1]) / 1000000
if duration > 0:
progress = min(100, (current_time / duration) * 100)
# Update compression progress
from videoarchiver.processor import _compression_progress
if input_file in _compression_progress:
elapsed = datetime.utcnow() - start_time
_compression_progress[input_file].update({
"percent": progress,
"elapsed_time": str(elapsed).split(".")[0],
"current_size": os.path.getsize(output_file) if os.path.exists(output_file) else 0,
"current_time": current_time,
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
})
if progress_callback:
progress_callback(progress)

View File

@@ -0,0 +1,177 @@
"""Module for directory management operations"""
import os
import logging
import asyncio
from pathlib import Path
from typing import List, Optional, Tuple
from .exceptions import FileCleanupError
from .file_deletion import SecureFileDeleter
logger = logging.getLogger("DirectoryManager")
class DirectoryManager:
"""Handles directory operations and cleanup"""
def __init__(self):
self.file_deleter = SecureFileDeleter()
async def cleanup_directory(
self,
directory_path: str,
recursive: bool = True,
delete_empty: bool = True
) -> Tuple[int, List[str]]:
"""Clean up a directory by removing files and optionally empty subdirectories
Args:
directory_path: Path to the directory to clean
recursive: Whether to clean subdirectories
delete_empty: Whether to delete empty directories
Returns:
Tuple[int, List[str]]: (Number of files deleted, List of errors)
Raises:
FileCleanupError: If cleanup fails critically
"""
if not os.path.exists(directory_path):
return 0, []
deleted_count = 0
errors = []
try:
# Process files and directories
deleted, errs = await self._process_directory_contents(
directory_path,
recursive,
delete_empty
)
deleted_count += deleted
errors.extend(errs)
# Clean up empty directories if requested
if delete_empty:
dir_errs = await self._cleanup_empty_directories(directory_path)
errors.extend(dir_errs)
if errors:
logger.warning(f"Cleanup completed with {len(errors)} errors")
else:
logger.info(f"Successfully cleaned directory: {directory_path}")
return deleted_count, errors
except Exception as e:
logger.error(f"Error during cleanup of {directory_path}: {e}")
raise FileCleanupError(f"Directory cleanup failed: {str(e)}")
async def _process_directory_contents(
self,
directory_path: str,
recursive: bool,
delete_empty: bool
) -> Tuple[int, List[str]]:
"""Process contents of a directory"""
deleted_count = 0
errors = []
try:
for entry in os.scandir(directory_path):
try:
if entry.is_file():
# Delete file
if await self.file_deleter.delete_file(entry.path):
deleted_count += 1
else:
errors.append(f"Failed to delete file: {entry.path}")
elif entry.is_dir() and recursive:
# Process subdirectory
subdir_deleted, subdir_errors = await self.cleanup_directory(
entry.path,
recursive=True,
delete_empty=delete_empty
)
deleted_count += subdir_deleted
errors.extend(subdir_errors)
except Exception as e:
errors.append(f"Error processing {entry.path}: {str(e)}")
except Exception as e:
errors.append(f"Error scanning directory {directory_path}: {str(e)}")
return deleted_count, errors
async def _cleanup_empty_directories(self, start_path: str) -> List[str]:
"""Remove empty directories recursively"""
errors = []
try:
for root, dirs, files in os.walk(start_path, topdown=False):
for name in dirs:
try:
dir_path = os.path.join(root, name)
if not os.listdir(dir_path): # Check if directory is empty
await self._remove_directory(dir_path)
except Exception as e:
errors.append(f"Error removing directory {name}: {str(e)}")
except Exception as e:
errors.append(f"Error walking directory tree: {str(e)}")
return errors
async def _remove_directory(self, dir_path: str) -> None:
"""Remove a directory safely"""
try:
await asyncio.to_thread(os.rmdir, dir_path)
except Exception as e:
logger.error(f"Failed to remove directory {dir_path}: {e}")
raise
async def ensure_directory(self, directory_path: str) -> None:
"""Ensure a directory exists and is accessible
Args:
directory_path: Path to the directory to ensure
Raises:
FileCleanupError: If directory cannot be created or accessed
"""
try:
path = Path(directory_path)
path.mkdir(parents=True, exist_ok=True)
# Verify directory is writable
if not os.access(directory_path, os.W_OK):
raise FileCleanupError(f"Directory {directory_path} is not writable")
except Exception as e:
logger.error(f"Error ensuring directory {directory_path}: {e}")
raise FileCleanupError(f"Failed to ensure directory: {str(e)}")
async def get_directory_size(self, directory_path: str) -> int:
"""Get total size of a directory in bytes
Args:
directory_path: Path to the directory
Returns:
int: Total size in bytes
"""
total_size = 0
try:
for entry in os.scandir(directory_path):
try:
if entry.is_file():
total_size += entry.stat().st_size
elif entry.is_dir():
total_size += await self.get_directory_size(entry.path)
except Exception as e:
logger.warning(f"Error getting size for {entry.path}: {e}")
except Exception as e:
logger.error(f"Error calculating directory size: {e}")
return total_size

View File

@@ -0,0 +1,207 @@
"""Module for managing video downloads"""
import os
import logging
import asyncio
import yt_dlp
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Tuple, Callable, Any
from pathlib import Path
from .verification_manager import VideoVerificationManager
from .compression_manager import CompressionManager
from .progress_tracker import ProgressTracker
logger = logging.getLogger("DownloadManager")
class CancellableYTDLLogger:
"""Custom yt-dlp logger that can be cancelled"""
def __init__(self):
self.cancelled = False
def debug(self, msg):
if self.cancelled:
raise Exception("Download cancelled")
logger.debug(msg)
def warning(self, msg):
if self.cancelled:
raise Exception("Download cancelled")
logger.warning(msg)
def error(self, msg):
if self.cancelled:
raise Exception("Download cancelled")
logger.error(msg)
class DownloadManager:
"""Manages video downloads and processing"""
MAX_RETRIES = 5
RETRY_DELAY = 10
FILE_OP_RETRIES = 3
FILE_OP_RETRY_DELAY = 1
SHUTDOWN_TIMEOUT = 15 # seconds
def __init__(
self,
download_path: str,
video_format: str,
max_quality: int,
max_file_size: int,
enabled_sites: Optional[List[str]] = None,
concurrent_downloads: int = 2,
ffmpeg_mgr = None
):
self.download_path = Path(download_path)
self.download_path.mkdir(parents=True, exist_ok=True)
os.chmod(str(self.download_path), 0o755)
# Initialize components
self.verification_manager = VideoVerificationManager(ffmpeg_mgr)
self.compression_manager = CompressionManager(ffmpeg_mgr, max_file_size)
self.progress_tracker = ProgressTracker()
# Create thread pool
self.download_pool = ThreadPoolExecutor(
max_workers=max(1, min(3, concurrent_downloads)),
thread_name_prefix="videoarchiver_download"
)
# Initialize state
self._shutting_down = False
self.ytdl_logger = CancellableYTDLLogger()
# Configure yt-dlp options
self.ydl_opts = self._configure_ydl_opts(
video_format,
max_quality,
max_file_size,
ffmpeg_mgr
)
def _configure_ydl_opts(
self,
video_format: str,
max_quality: int,
max_file_size: int,
ffmpeg_mgr
) -> Dict[str, Any]:
"""Configure yt-dlp options"""
return {
"format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best",
"outtmpl": "%(title)s.%(ext)s",
"merge_output_format": video_format,
"quiet": True,
"no_warnings": True,
"extract_flat": True,
"concurrent_fragment_downloads": 1,
"retries": self.MAX_RETRIES,
"fragment_retries": self.MAX_RETRIES,
"file_access_retries": self.FILE_OP_RETRIES,
"extractor_retries": self.MAX_RETRIES,
"postprocessor_hooks": [self._check_file_size],
"progress_hooks": [self._progress_hook],
"ffmpeg_location": str(ffmpeg_mgr.get_ffmpeg_path()),
"ffprobe_location": str(ffmpeg_mgr.get_ffprobe_path()),
"paths": {"home": str(self.download_path)},
"logger": self.ytdl_logger,
"ignoreerrors": True,
"no_color": True,
"geo_bypass": True,
"socket_timeout": 60,
"http_chunk_size": 1048576,
"external_downloader_args": {"ffmpeg": ["-timeout", "60000000"]},
"max_sleep_interval": 5,
"sleep_interval": 1,
"max_filesize": max_file_size * 1024 * 1024,
}
def _check_file_size(self, info: Dict[str, Any]) -> None:
"""Check if file size is within limits"""
if info.get("filepath") and os.path.exists(info["filepath"]):
try:
size = os.path.getsize(info["filepath"])
if size > self.compression_manager.max_file_size:
logger.info(f"File exceeds size limit, will compress: {info['filepath']}")
except OSError as e:
logger.error(f"Error checking file size: {str(e)}")
def _progress_hook(self, d: Dict[str, Any]) -> None:
"""Handle download progress"""
if d["status"] == "finished":
logger.info(f"Download completed: {d['filename']}")
elif d["status"] == "downloading":
try:
self.progress_tracker.update_download_progress(d)
except Exception as e:
logger.debug(f"Error logging progress: {str(e)}")
async def cleanup(self) -> None:
"""Clean up resources"""
self._shutting_down = True
self.ytdl_logger.cancelled = True
self.download_pool.shutdown(wait=False, cancel_futures=True)
await self.compression_manager.cleanup()
self.progress_tracker.clear_progress()
async def force_cleanup(self) -> None:
"""Force cleanup of all resources"""
self._shutting_down = True
self.ytdl_logger.cancelled = True
self.download_pool.shutdown(wait=False, cancel_futures=True)
await self.compression_manager.force_cleanup()
self.progress_tracker.clear_progress()
async def download_video(
self,
url: str,
progress_callback: Optional[Callable[[float], None]] = None
) -> Tuple[bool, str, str]:
"""Download and process a video"""
if self._shutting_down:
return False, "", "Downloader is shutting down"
self.progress_tracker.start_download(url)
try:
# Download video
success, file_path, error = await self._safe_download(
url,
progress_callback
)
if not success:
return False, "", error
# Verify and compress if needed
return await self._process_downloaded_file(
file_path,
progress_callback
)
except Exception as e:
logger.error(f"Download error: {str(e)}")
return False, "", str(e)
finally:
self.progress_tracker.end_download(url)
async def _safe_download(
self,
url: str,
progress_callback: Optional[Callable[[float], None]]
) -> Tuple[bool, str, str]:
"""Safely download video with retries"""
# Implementation moved to separate method for clarity
pass # Implementation would be similar to original but using new components
async def _process_downloaded_file(
self,
file_path: str,
progress_callback: Optional[Callable[[float], None]]
) -> Tuple[bool, str, str]:
"""Process a downloaded file (verify and compress if needed)"""
# Implementation moved to separate method for clarity
pass # Implementation would be similar to original but using new components

View File

@@ -0,0 +1,117 @@
"""Module for secure file deletion operations"""
import os
import stat
import asyncio
import logging
from pathlib import Path
from typing import Optional
from .exceptions import FileCleanupError
logger = logging.getLogger("FileDeleter")
class SecureFileDeleter:
"""Handles secure file deletion operations"""
def __init__(self, max_size: int = 100 * 1024 * 1024):
"""Initialize the file deleter
Args:
max_size: Maximum file size in bytes for secure deletion (default: 100MB)
"""
self.max_size = max_size
async def delete_file(self, file_path: str) -> bool:
"""Delete a file securely
Args:
file_path: Path to the file to delete
Returns:
bool: True if file was successfully deleted
Raises:
FileCleanupError: If file deletion fails after all attempts
"""
if not os.path.exists(file_path):
return True
try:
file_size = await self._get_file_size(file_path)
# For large files, skip secure deletion
if file_size > self.max_size:
return await self._delete_large_file(file_path)
# Perform secure deletion
await self._ensure_writable(file_path)
if file_size > 0:
await self._zero_file_content(file_path, file_size)
return await self._delete_file(file_path)
except Exception as e:
logger.error(f"Error during deletion of {file_path}: {e}")
return await self._force_delete(file_path)
async def _get_file_size(self, file_path: str) -> int:
"""Get the size of a file"""
try:
return os.path.getsize(file_path)
except OSError as e:
logger.warning(f"Could not get size of {file_path}: {e}")
return 0
async def _delete_large_file(self, file_path: str) -> bool:
"""Delete a large file directly"""
try:
logger.debug(f"File {file_path} exceeds max size for secure deletion, performing direct removal")
os.remove(file_path)
return True
except OSError as e:
logger.error(f"Failed to remove large file {file_path}: {e}")
return False
async def _ensure_writable(self, file_path: str) -> None:
"""Ensure a file is writable"""
try:
current_mode = os.stat(file_path).st_mode
os.chmod(file_path, current_mode | stat.S_IWRITE)
except OSError as e:
logger.warning(f"Could not modify permissions of {file_path}: {e}")
raise FileCleanupError(f"Permission error: {str(e)}")
async def _zero_file_content(self, file_path: str, file_size: int) -> None:
"""Zero out file content in chunks"""
try:
chunk_size = min(1024 * 1024, file_size) # 1MB chunks or file size if smaller
with open(file_path, "wb") as f:
for offset in range(0, file_size, chunk_size):
write_size = min(chunk_size, file_size - offset)
f.write(b'\0' * write_size)
await asyncio.sleep(0) # Allow other tasks to run
f.flush()
os.fsync(f.fileno())
except OSError as e:
logger.warning(f"Error zeroing file {file_path}: {e}")
raise
async def _delete_file(self, file_path: str) -> bool:
"""Delete a file"""
try:
Path(file_path).unlink(missing_ok=True)
return True
except OSError as e:
logger.error(f"Failed to delete file {file_path}: {e}")
return False
async def _force_delete(self, file_path: str) -> bool:
"""Force delete a file as last resort"""
try:
if os.path.exists(file_path):
os.chmod(file_path, stat.S_IWRITE | stat.S_IREAD)
Path(file_path).unlink(missing_ok=True)
except Exception as e:
logger.error(f"Force delete failed for {file_path}: {e}")
raise FileCleanupError(f"Force delete failed: {str(e)}")
return not os.path.exists(file_path)

View File

@@ -1,135 +1,150 @@
"""File operation utilities"""
import os
import stat
import asyncio
import logging
from pathlib import Path
from typing import Optional
from typing import List, Tuple, Optional
from .exceptions import FileCleanupError
from .file_deletion import SecureFileDeleter
from .directory_manager import DirectoryManager
from .permission_manager import PermissionManager
logger = logging.getLogger("VideoArchiver")
async def secure_delete_file(file_path: str, max_size: int = 100 * 1024 * 1024) -> bool:
"""Delete a file securely
Args:
file_path: Path to the file to delete
max_size: Maximum file size in bytes to attempt secure deletion (default: 100MB)
class FileOperations:
"""Manages file and directory operations"""
def __init__(self):
"""Initialize file operation managers"""
self.file_deleter = SecureFileDeleter()
self.directory_manager = DirectoryManager()
self.permission_manager = PermissionManager()
async def secure_delete_file(
self,
file_path: str,
max_size: Optional[int] = None
) -> bool:
"""Delete a file securely
Returns:
bool: True if file was successfully deleted, False otherwise
Args:
file_path: Path to the file to delete
max_size: Optional maximum file size for secure deletion
Returns:
bool: True if file was successfully deleted
Raises:
FileCleanupError: If file deletion fails
"""
try:
# Ensure file is writable before deletion
await self.permission_manager.ensure_writable(file_path)
# Perform secure deletion
if max_size:
self.file_deleter.max_size = max_size
return await self.file_deleter.delete_file(file_path)
except Exception as e:
logger.error(f"Error during secure file deletion: {e}")
raise FileCleanupError(f"Secure deletion failed: {str(e)}")
async def cleanup_downloads(
self,
download_path: str,
recursive: bool = True,
delete_empty: bool = True
) -> None:
"""Clean up the downloads directory
Raises:
FileCleanupError: If file deletion fails after all attempts
"""
if not os.path.exists(file_path):
return True
try:
# Get file size
Args:
download_path: Path to the downloads directory
recursive: Whether to clean subdirectories
delete_empty: Whether to delete empty directories
Raises:
FileCleanupError: If cleanup fails
"""
try:
file_size = os.path.getsize(file_path)
except OSError as e:
logger.warning(f"Could not get size of {file_path}: {e}")
file_size = 0
# Ensure we have necessary permissions
await self.permission_manager.ensure_writable(
download_path,
recursive=recursive
)
# For large files, skip secure deletion and just remove
if file_size > max_size:
logger.debug(f"File {file_path} exceeds max size for secure deletion, performing direct removal")
try:
os.remove(file_path)
return True
except OSError as e:
logger.error(f"Failed to remove large file {file_path}: {e}")
return False
# Perform cleanup
deleted_count, errors = await self.directory_manager.cleanup_directory(
download_path,
recursive=recursive,
delete_empty=delete_empty
)
# Ensure file is writable
try:
current_mode = os.stat(file_path).st_mode
os.chmod(file_path, current_mode | stat.S_IWRITE)
except OSError as e:
logger.warning(f"Could not modify permissions of {file_path}: {e}")
raise FileCleanupError(f"Permission error: {str(e)}")
# Log results
if errors:
error_msg = "\n".join(errors)
logger.error(f"Cleanup completed with errors:\n{error_msg}")
raise FileCleanupError(f"Cleanup completed with {len(errors)} errors")
else:
logger.info(f"Successfully cleaned up {deleted_count} files")
# Zero out file content in chunks to avoid memory issues
if file_size > 0:
try:
chunk_size = min(1024 * 1024, file_size) # 1MB chunks or file size if smaller
with open(file_path, "wb") as f:
for offset in range(0, file_size, chunk_size):
write_size = min(chunk_size, file_size - offset)
f.write(b'\0' * write_size)
# Allow other tasks to run
await asyncio.sleep(0)
f.flush()
os.fsync(f.fileno())
except OSError as e:
logger.warning(f"Error zeroing file {file_path}: {e}")
except Exception as e:
logger.error(f"Error during downloads cleanup: {e}")
raise FileCleanupError(f"Downloads cleanup failed: {str(e)}")
# Delete the file
try:
Path(file_path).unlink(missing_ok=True)
return True
except OSError as e:
logger.error(f"Failed to delete file {file_path}: {e}")
return False
except Exception as e:
logger.error(f"Error during deletion of {file_path}: {e}")
# Last resort: try force delete
try:
if os.path.exists(file_path):
os.chmod(file_path, stat.S_IWRITE | stat.S_IREAD)
Path(file_path).unlink(missing_ok=True)
except Exception as e2:
logger.error(f"Force delete failed for {file_path}: {e2}")
raise FileCleanupError(f"Force delete failed: {str(e2)}")
return not os.path.exists(file_path)
async def cleanup_downloads(download_path: str) -> None:
"""Clean up the downloads directory
Args:
download_path: Path to the downloads directory to clean
async def ensure_directory(self, directory_path: str) -> None:
"""Ensure a directory exists with proper permissions
Raises:
FileCleanupError: If cleanup fails
"""
try:
if not os.path.exists(download_path):
return
Args:
directory_path: Path to the directory
Raises:
FileCleanupError: If directory cannot be created or accessed
"""
try:
# Create directory if needed
await self.directory_manager.ensure_directory(directory_path)
# Set proper permissions
await self.permission_manager.fix_permissions(directory_path)
# Verify it's writable
if not await self.permission_manager.check_permissions(
directory_path,
require_writable=True,
require_readable=True,
require_executable=True
):
raise FileCleanupError(f"Directory {directory_path} has incorrect permissions")
errors = []
# Delete all files in the directory
for entry in os.scandir(download_path):
try:
path = entry.path
if entry.is_file():
if not await secure_delete_file(path):
errors.append(f"Failed to delete file: {path}")
elif entry.is_dir():
await asyncio.to_thread(lambda: os.rmdir(path) if not os.listdir(path) else None)
except Exception as e:
errors.append(f"Error processing {entry.path}: {str(e)}")
continue
except Exception as e:
logger.error(f"Error ensuring directory: {e}")
raise FileCleanupError(f"Failed to ensure directory: {str(e)}")
# Clean up empty subdirectories
for root, dirs, files in os.walk(download_path, topdown=False):
for name in dirs:
try:
dir_path = os.path.join(root, name)
if not os.listdir(dir_path): # Check if directory is empty
await asyncio.to_thread(os.rmdir, dir_path)
except Exception as e:
errors.append(f"Error removing directory {name}: {str(e)}")
async def get_directory_info(
self,
directory_path: str
) -> Tuple[int, List[str]]:
"""Get directory size and any permission issues
Args:
directory_path: Path to the directory
Returns:
Tuple[int, List[str]]: (Total size in bytes, List of permission issues)
"""
try:
# Get directory size
total_size = await self.directory_manager.get_directory_size(directory_path)
# Check permissions
permission_issues = await self.permission_manager.fix_permissions(
directory_path,
recursive=True
)
return total_size, permission_issues
if errors:
raise FileCleanupError("\n".join(errors))
except FileCleanupError:
raise
except Exception as e:
logger.error(f"Error during cleanup of {download_path}: {e}")
raise FileCleanupError(f"Cleanup failed: {str(e)}")
except Exception as e:
logger.error(f"Error getting directory info: {e}")
return 0, [f"Error: {str(e)}"]

View File

@@ -7,92 +7,217 @@ import stat
import logging
import contextlib
import time
from typing import Generator, List, Optional
from pathlib import Path
from .exceptions import FileCleanupError
from .permission_manager import PermissionManager
logger = logging.getLogger("VideoArchiver")
logger = logging.getLogger("PathManager")
@contextlib.contextmanager
def temp_path_context():
"""Context manager for temporary path creation and cleanup
Yields:
str: Path to temporary directory
class TempDirectoryManager:
"""Manages temporary directory creation and cleanup"""
def __init__(self):
self.permission_manager = PermissionManager()
self.max_retries = 3
self.retry_delay = 1
async def create_temp_dir(self, prefix: str = "videoarchiver_") -> str:
"""Create a temporary directory with proper permissions
Raises:
FileCleanupError: If directory creation or cleanup fails
"""
temp_dir = None
try:
# Create temp directory with proper permissions
temp_dir = tempfile.mkdtemp(prefix="videoarchiver_")
logger.debug(f"Created temporary directory: {temp_dir}")
# Ensure directory has rwx permissions for user only
try:
os.chmod(temp_dir, stat.S_IRWXU)
except OSError as e:
raise FileCleanupError(f"Failed to set permissions on temporary directory: {str(e)}")
# Verify directory exists and is writable
if not os.path.exists(temp_dir):
raise FileCleanupError(f"Failed to create temporary directory: {temp_dir}")
if not os.access(temp_dir, os.W_OK):
raise FileCleanupError(f"Temporary directory is not writable: {temp_dir}")
Args:
prefix: Prefix for temporary directory name
yield temp_dir
except FileCleanupError:
raise
except Exception as e:
logger.error(f"Error in temp_path_context: {str(e)}")
raise FileCleanupError(f"Temporary directory error: {str(e)}")
finally:
if temp_dir and os.path.exists(temp_dir):
cleanup_errors = []
try:
# Ensure all files are deletable with retries
max_retries = 3
for attempt in range(max_retries):
try:
# Set permissions recursively
for root, dirs, files in os.walk(temp_dir):
for d in dirs:
try:
dir_path = os.path.join(root, d)
os.chmod(dir_path, stat.S_IRWXU)
except OSError as e:
cleanup_errors.append(f"Failed to set permissions on directory {dir_path}: {e}")
for f in files:
try:
file_path = os.path.join(root, f)
os.chmod(file_path, stat.S_IRWXU)
except OSError as e:
cleanup_errors.append(f"Failed to set permissions on file {file_path}: {e}")
# Try to remove the directory
shutil.rmtree(temp_dir, ignore_errors=True)
# Verify directory is gone
if not os.path.exists(temp_dir):
logger.debug(f"Successfully cleaned up temporary directory: {temp_dir}")
break
if attempt < max_retries - 1:
time.sleep(1) # Wait before retry
except Exception as e:
if attempt == max_retries - 1:
cleanup_errors.append(f"Failed to clean up temporary directory {temp_dir} after {max_retries} attempts: {e}")
elif attempt < max_retries - 1:
time.sleep(1) # Wait before retry
continue
except Exception as e:
cleanup_errors.append(f"Error during temp directory cleanup: {str(e)}")
Returns:
str: Path to temporary directory
Raises:
FileCleanupError: If directory creation fails
"""
try:
# Create temp directory
temp_dir = tempfile.mkdtemp(prefix=prefix)
logger.debug(f"Created temporary directory: {temp_dir}")
# Set proper permissions
await self.permission_manager.set_permissions(
temp_dir,
stat.S_IRWXU, # rwx for user only
recursive=False
)
# Verify directory
if not await self._verify_directory(temp_dir):
raise FileCleanupError(f"Failed to verify temporary directory: {temp_dir}")
if cleanup_errors:
error_msg = "\n".join(cleanup_errors)
logger.error(error_msg)
# Don't raise here as we're in finally block and don't want to mask original error
return temp_dir
except Exception as e:
logger.error(f"Error creating temporary directory: {e}")
raise FileCleanupError(f"Failed to create temporary directory: {str(e)}")
async def cleanup_temp_dir(self, temp_dir: str) -> List[str]:
"""Clean up a temporary directory
Args:
temp_dir: Path to temporary directory
Returns:
List[str]: List of any cleanup errors
"""
if not temp_dir or not os.path.exists(temp_dir):
return []
cleanup_errors = []
try:
# Set permissions recursively
await self._prepare_for_cleanup(temp_dir, cleanup_errors)
# Attempt cleanup with retries
for attempt in range(self.max_retries):
try:
# Remove directory
shutil.rmtree(temp_dir, ignore_errors=True)
# Verify removal
if not os.path.exists(temp_dir):
logger.debug(f"Successfully cleaned up temporary directory: {temp_dir}")
break
if attempt < self.max_retries - 1:
await self._retry_delay()
except Exception as e:
if attempt == self.max_retries - 1:
cleanup_errors.append(
f"Failed to clean up temporary directory {temp_dir} "
f"after {self.max_retries} attempts: {e}"
)
elif attempt < self.max_retries - 1:
await self._retry_delay()
continue
except Exception as e:
cleanup_errors.append(f"Error during temp directory cleanup: {str(e)}")
return cleanup_errors
async def _prepare_for_cleanup(
self,
temp_dir: str,
cleanup_errors: List[str]
) -> None:
"""Prepare directory for cleanup by setting permissions"""
for root, dirs, files in os.walk(temp_dir):
# Set directory permissions
for d in dirs:
try:
dir_path = os.path.join(root, d)
await self.permission_manager.set_permissions(
dir_path,
stat.S_IRWXU
)
except Exception as e:
cleanup_errors.append(
f"Failed to set permissions on directory {dir_path}: {e}"
)
# Set file permissions
for f in files:
try:
file_path = os.path.join(root, f)
await self.permission_manager.set_permissions(
file_path,
stat.S_IRWXU
)
except Exception as e:
cleanup_errors.append(
f"Failed to set permissions on file {file_path}: {e}"
)
async def _verify_directory(self, directory: str) -> bool:
"""Verify a directory exists and is writable"""
if not os.path.exists(directory):
return False
return await self.permission_manager.check_permissions(
directory,
require_writable=True,
require_readable=True,
require_executable=True
)
async def _retry_delay(self) -> None:
"""Sleep between retry attempts"""
await asyncio.sleep(self.retry_delay)
class PathManager:
"""Manages path operations and validation"""
def __init__(self):
self.temp_dir_manager = TempDirectoryManager()
@contextlib.asynccontextmanager
async def temp_path_context(
self,
prefix: str = "videoarchiver_"
) -> Generator[str, None, None]:
"""Async context manager for temporary path creation and cleanup
Args:
prefix: Prefix for temporary directory name
Yields:
str: Path to temporary directory
Raises:
FileCleanupError: If directory creation or cleanup fails
"""
temp_dir = None
try:
# Create temporary directory
temp_dir = await self.temp_dir_manager.create_temp_dir(prefix)
yield temp_dir
except FileCleanupError:
raise
except Exception as e:
logger.error(f"Error in temp_path_context: {str(e)}")
raise FileCleanupError(f"Temporary directory error: {str(e)}")
finally:
if temp_dir:
# Clean up directory
cleanup_errors = await self.temp_dir_manager.cleanup_temp_dir(temp_dir)
if cleanup_errors:
error_msg = "\n".join(cleanup_errors)
logger.error(error_msg)
# Don't raise here as we're in finally block
async def ensure_directory(self, directory: str) -> None:
"""Ensure a directory exists with proper permissions
Args:
directory: Path to ensure exists
Raises:
FileCleanupError: If directory cannot be created or accessed
"""
try:
path = Path(directory)
path.mkdir(parents=True, exist_ok=True)
# Set proper permissions
await self.temp_dir_manager.permission_manager.set_permissions(
directory,
stat.S_IRWXU
)
# Verify directory
if not await self.temp_dir_manager._verify_directory(directory):
raise FileCleanupError(f"Failed to verify directory: {directory}")
except Exception as e:
logger.error(f"Error ensuring directory {directory}: {e}")
raise FileCleanupError(f"Failed to ensure directory: {str(e)}")

View File

@@ -0,0 +1,202 @@
"""Module for managing file and directory permissions"""
import os
import stat
import logging
from pathlib import Path
from typing import Optional, Union, List
from .exceptions import FileCleanupError
logger = logging.getLogger("PermissionManager")
class PermissionManager:
"""Handles file and directory permission operations"""
DEFAULT_FILE_MODE = 0o644 # rw-r--r--
DEFAULT_DIR_MODE = 0o755 # rwxr-xr-x
FULL_ACCESS_MODE = 0o777 # rwxrwxrwx
def __init__(self):
self._is_windows = os.name == 'nt'
async def ensure_writable(
self,
path: Union[str, Path],
recursive: bool = False
) -> None:
"""Ensure a path is writable
Args:
path: Path to make writable
recursive: Whether to apply recursively to directories
Raises:
FileCleanupError: If permissions cannot be modified
"""
try:
path = Path(path)
if not path.exists():
return
if path.is_file():
await self._make_file_writable(path)
elif path.is_dir():
await self._make_directory_writable(path, recursive)
except Exception as e:
logger.error(f"Error ensuring writable permissions for {path}: {e}")
raise FileCleanupError(f"Failed to set writable permissions: {str(e)}")
async def _make_file_writable(self, path: Path) -> None:
"""Make a file writable"""
try:
current_mode = path.stat().st_mode
if self._is_windows:
os.chmod(path, stat.S_IWRITE | stat.S_IREAD)
else:
os.chmod(path, current_mode | stat.S_IWRITE)
except Exception as e:
logger.error(f"Failed to make file {path} writable: {e}")
raise
async def _make_directory_writable(
self,
path: Path,
recursive: bool
) -> None:
"""Make a directory writable"""
try:
if self._is_windows:
os.chmod(path, stat.S_IWRITE | stat.S_IREAD | stat.S_IEXEC)
else:
current_mode = path.stat().st_mode
os.chmod(path, current_mode | stat.S_IWRITE | stat.S_IEXEC)
if recursive:
for item in path.rglob('*'):
if item.is_file():
await self._make_file_writable(item)
elif item.is_dir():
await self._make_directory_writable(item, False)
except Exception as e:
logger.error(f"Failed to make directory {path} writable: {e}")
raise
async def set_permissions(
self,
path: Union[str, Path],
mode: int,
recursive: bool = False
) -> None:
"""Set specific permissions on a path
Args:
path: Path to set permissions on
mode: Permission mode (e.g., 0o755)
recursive: Whether to apply recursively
Raises:
FileCleanupError: If permissions cannot be set
"""
try:
path = Path(path)
if not path.exists():
return
if not self._is_windows: # Skip on Windows
os.chmod(path, mode)
if recursive and path.is_dir():
file_mode = mode & ~stat.S_IXUSR & ~stat.S_IXGRP & ~stat.S_IXOTH
for item in path.rglob('*'):
if item.is_file():
os.chmod(item, file_mode)
elif item.is_dir():
os.chmod(item, mode)
except Exception as e:
logger.error(f"Error setting permissions for {path}: {e}")
raise FileCleanupError(f"Failed to set permissions: {str(e)}")
async def check_permissions(
self,
path: Union[str, Path],
require_writable: bool = True,
require_readable: bool = True,
require_executable: bool = False
) -> bool:
"""Check if a path has required permissions
Args:
path: Path to check
require_writable: Whether write permission is required
require_readable: Whether read permission is required
require_executable: Whether execute permission is required
Returns:
bool: True if path has required permissions
"""
try:
path = Path(path)
if not path.exists():
return False
if require_readable and not os.access(path, os.R_OK):
return False
if require_writable and not os.access(path, os.W_OK):
return False
if require_executable and not os.access(path, os.X_OK):
return False
return True
except Exception as e:
logger.error(f"Error checking permissions for {path}: {e}")
return False
async def fix_permissions(
self,
path: Union[str, Path],
recursive: bool = False
) -> List[str]:
"""Fix common permission issues on a path
Args:
path: Path to fix permissions on
recursive: Whether to apply recursively
Returns:
List[str]: List of errors encountered
"""
errors = []
try:
path = Path(path)
if not path.exists():
return errors
if path.is_file():
try:
await self.set_permissions(path, self.DEFAULT_FILE_MODE)
except Exception as e:
errors.append(f"Error fixing file permissions for {path}: {str(e)}")
elif path.is_dir():
try:
await self.set_permissions(path, self.DEFAULT_DIR_MODE)
if recursive:
for item in path.rglob('*'):
try:
if item.is_file():
await self.set_permissions(item, self.DEFAULT_FILE_MODE)
elif item.is_dir():
await self.set_permissions(item, self.DEFAULT_DIR_MODE)
except Exception as e:
errors.append(f"Error fixing permissions for {item}: {str(e)}")
except Exception as e:
errors.append(f"Error fixing directory permissions for {path}: {str(e)}")
except Exception as e:
errors.append(f"Error during permission fix: {str(e)}")
return errors

View File

@@ -0,0 +1,163 @@
"""Module for tracking download and compression progress"""
import logging
from typing import Dict, Any, Optional
from datetime import datetime
logger = logging.getLogger("ProgressTracker")
class ProgressTracker:
"""Tracks progress of downloads and compression operations"""
def __init__(self):
self._download_progress: Dict[str, Dict[str, Any]] = {}
self._compression_progress: Dict[str, Dict[str, Any]] = {}
def start_download(self, url: str) -> None:
"""Initialize progress tracking for a download"""
self._download_progress[url] = {
"active": True,
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"speed": "N/A",
"eta": "N/A",
"downloaded_bytes": 0,
"total_bytes": 0,
"retries": 0,
"fragment_count": 0,
"fragment_index": 0,
"video_title": "Unknown",
"extractor": "Unknown",
"format": "Unknown",
"resolution": "Unknown",
"fps": "Unknown",
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
def update_download_progress(self, data: Dict[str, Any]) -> None:
"""Update download progress information"""
try:
# Get URL from info dict
url = data.get("info_dict", {}).get("webpage_url", "unknown")
if url not in self._download_progress:
return
if data["status"] == "downloading":
self._download_progress[url].update({
"active": True,
"percent": float(data.get("_percent_str", "0").replace("%", "")),
"speed": data.get("_speed_str", "N/A"),
"eta": data.get("_eta_str", "N/A"),
"downloaded_bytes": data.get("downloaded_bytes", 0),
"total_bytes": data.get("total_bytes", 0) or data.get("total_bytes_estimate", 0),
"retries": data.get("retry_count", 0),
"fragment_count": data.get("fragment_count", 0),
"fragment_index": data.get("fragment_index", 0),
"video_title": data.get("info_dict", {}).get("title", "Unknown"),
"extractor": data.get("info_dict", {}).get("extractor", "Unknown"),
"format": data.get("info_dict", {}).get("format", "Unknown"),
"resolution": data.get("info_dict", {}).get("resolution", "Unknown"),
"fps": data.get("info_dict", {}).get("fps", "Unknown"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
})
logger.debug(
f"Download progress for {url}: "
f"{self._download_progress[url]['percent']}% at {self._download_progress[url]['speed']}, "
f"ETA: {self._download_progress[url]['eta']}"
)
except Exception as e:
logger.error(f"Error updating download progress: {e}")
def end_download(self, url: str) -> None:
"""Mark a download as completed"""
if url in self._download_progress:
self._download_progress[url]["active"] = False
def start_compression(
self,
input_file: str,
params: Dict[str, str],
use_hardware: bool,
duration: float,
input_size: int,
target_size: int
) -> None:
"""Initialize progress tracking for compression"""
self._compression_progress[input_file] = {
"active": True,
"filename": input_file,
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"elapsed_time": "0:00",
"input_size": input_size,
"current_size": 0,
"target_size": target_size,
"codec": params.get("c:v", "unknown"),
"hardware_accel": use_hardware,
"preset": params.get("preset", "unknown"),
"crf": params.get("crf", "unknown"),
"duration": duration,
"bitrate": params.get("b:v", "unknown"),
"audio_codec": params.get("c:a", "unknown"),
"audio_bitrate": params.get("b:a", "unknown"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
def update_compression_progress(
self,
input_file: str,
progress: float,
elapsed_time: str,
current_size: int,
current_time: float
) -> None:
"""Update compression progress information"""
if input_file in self._compression_progress:
self._compression_progress[input_file].update({
"percent": progress,
"elapsed_time": elapsed_time,
"current_size": current_size,
"current_time": current_time,
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
})
logger.debug(
f"Compression progress for {input_file}: "
f"{progress:.1f}%, Size: {current_size}/{self._compression_progress[input_file]['target_size']} bytes"
)
def end_compression(self, input_file: str) -> None:
"""Mark a compression operation as completed"""
if input_file in self._compression_progress:
self._compression_progress[input_file]["active"] = False
def get_download_progress(self, url: str) -> Optional[Dict[str, Any]]:
"""Get progress information for a download"""
return self._download_progress.get(url)
def get_compression_progress(self, input_file: str) -> Optional[Dict[str, Any]]:
"""Get progress information for a compression operation"""
return self._compression_progress.get(input_file)
def get_active_downloads(self) -> Dict[str, Dict[str, Any]]:
"""Get all active downloads"""
return {
url: progress
for url, progress in self._download_progress.items()
if progress.get("active", False)
}
def get_active_compressions(self) -> Dict[str, Dict[str, Any]]:
"""Get all active compression operations"""
return {
input_file: progress
for input_file, progress in self._compression_progress.items()
if progress.get("active", False)
}
def clear_progress(self) -> None:
"""Clear all progress tracking"""
self._download_progress.clear()
self._compression_progress.clear()