This commit is contained in:
pacnpal
2024-11-16 22:32:08 +00:00
parent b7d99490cf
commit dac21f2fcd
30 changed files with 5854 additions and 2279 deletions

View File

@@ -1,5 +1,7 @@
"""Utility functions and classes for VideoArchiver"""
from typing import Dict, Optional, Any, Union, List
from .file_ops import (
cleanup_downloads,
ensure_directory,
@@ -12,16 +14,65 @@ from .directory_manager import DirectoryManager
from .permission_manager import PermissionManager
from .download_manager import DownloadManager
from .compression_manager import CompressionManager
from .progress_tracker import ProgressTracker
from .progress_tracker import (
ProgressTracker,
ProgressStatus,
DownloadProgress,
CompressionProgress,
CompressionParams
)
from .path_manager import PathManager
from .exceptions import (
# Base exception
VideoArchiverError,
ErrorSeverity,
ErrorContext,
# File operations
FileOperationError,
DirectoryError,
PermissionError,
DownloadError,
CompressionError,
TrackingError,
PathError
FileCleanupError,
# Video operations
VideoDownloadError,
VideoProcessingError,
VideoVerificationError,
VideoUploadError,
VideoCleanupError,
# Resource management
ResourceError,
ResourceExhaustedError,
# Network and API
NetworkError,
DiscordAPIError,
# Component operations
ComponentError,
ConfigurationError,
DatabaseError,
FFmpegError,
# Queue operations
QueueError,
QueueHandlerError,
QueueProcessorError,
# Processing operations
ProcessingError,
ProcessorError,
ValidationError,
DisplayError,
URLExtractionError,
MessageHandlerError,
# Cleanup operations
CleanupError,
# Health monitoring
HealthCheckError
)
__all__ = [
@@ -41,16 +92,75 @@ __all__ = [
'ProgressTracker',
'PathManager',
# Exceptions
# Progress Tracking Types
'ProgressStatus',
'DownloadProgress',
'CompressionProgress',
'CompressionParams',
# Base Exceptions
'VideoArchiverError',
'ErrorSeverity',
'ErrorContext',
# File Operation Exceptions
'FileOperationError',
'DirectoryError',
'PermissionError',
'DownloadError',
'CompressionError',
'TrackingError',
'PathError'
'FileCleanupError',
# Video Operation Exceptions
'VideoDownloadError',
'VideoProcessingError',
'VideoVerificationError',
'VideoUploadError',
'VideoCleanupError',
# Resource Exceptions
'ResourceError',
'ResourceExhaustedError',
# Network and API Exceptions
'NetworkError',
'DiscordAPIError',
# Component Exceptions
'ComponentError',
'ConfigurationError',
'DatabaseError',
'FFmpegError',
# Queue Exceptions
'QueueError',
'QueueHandlerError',
'QueueProcessorError',
# Processing Exceptions
'ProcessingError',
'ProcessorError',
'ValidationError',
'DisplayError',
'URLExtractionError',
'MessageHandlerError',
# Cleanup Exceptions
'CleanupError',
# Health Monitoring Exceptions
'HealthCheckError',
# Helper Functions
'get_download_progress',
'get_compression_progress',
'get_active_downloads',
'get_active_compressions'
]
# Version information
__version__ = "1.0.0"
__author__ = "VideoArchiver Team"
__description__ = "Utility functions and classes for VideoArchiver"
# Initialize shared instances for module-level access
directory_manager = DirectoryManager()
permission_manager = PermissionManager()
@@ -58,3 +168,93 @@ download_manager = DownloadManager()
compression_manager = CompressionManager()
progress_tracker = ProgressTracker()
path_manager = PathManager()
# Progress tracking helper functions
def get_download_progress(url: Optional[str] = None) -> Union[Dict[str, DownloadProgress], Optional[DownloadProgress]]:
"""
Get progress information for a download.
Args:
url: Optional URL to get progress for. If None, returns all progress.
Returns:
If url is provided, returns progress for that URL or None if not found.
If url is None, returns dictionary of all download progress.
Raises:
TrackingError: If there's an error getting progress information
"""
try:
return progress_tracker.get_download_progress(url)
except Exception as e:
raise TrackingError(f"Failed to get download progress: {str(e)}")
def get_compression_progress(input_file: Optional[str] = None) -> Union[Dict[str, CompressionProgress], Optional[CompressionProgress]]:
"""
Get progress information for a compression operation.
Args:
input_file: Optional file to get progress for. If None, returns all progress.
Returns:
If input_file is provided, returns progress for that file or None if not found.
If input_file is None, returns dictionary of all compression progress.
Raises:
TrackingError: If there's an error getting progress information
"""
try:
return progress_tracker.get_compression_progress(input_file)
except Exception as e:
raise TrackingError(f"Failed to get compression progress: {str(e)}")
def get_active_downloads() -> Dict[str, DownloadProgress]:
"""
Get all active downloads.
Returns:
Dictionary mapping URLs to their download progress information
Raises:
TrackingError: If there's an error getting active downloads
"""
try:
return progress_tracker.get_active_downloads()
except Exception as e:
raise TrackingError(f"Failed to get active downloads: {str(e)}")
def get_active_compressions() -> Dict[str, CompressionProgress]:
"""
Get all active compression operations.
Returns:
Dictionary mapping file paths to their compression progress information
Raises:
TrackingError: If there's an error getting active compressions
"""
try:
return progress_tracker.get_active_compressions()
except Exception as e:
raise TrackingError(f"Failed to get active compressions: {str(e)}")
# Error handling helper functions
def create_error_context(
component: str,
operation: str,
details: Optional[Dict[str, Any]] = None,
severity: ErrorSeverity = ErrorSeverity.MEDIUM
) -> ErrorContext:
"""
Create an error context object.
Args:
component: Component where error occurred
operation: Operation that failed
details: Optional error details
severity: Error severity level
Returns:
ErrorContext object
"""
return ErrorContext(component, operation, details, severity)

View File

@@ -0,0 +1,210 @@
"""Video compression handling utilities"""
import os
import asyncio
import logging
import subprocess
from datetime import datetime
from typing import Dict, Optional, Callable, Set, Tuple
from videoarchiver.ffmpeg.ffmpeg_manager import FFmpegManager
from videoarchiver.ffmpeg.exceptions import CompressionError
from videoarchiver.utils.exceptions import VideoVerificationError
from videoarchiver.utils.file_operations import FileOperations
from videoarchiver.utils.progress_handler import ProgressHandler
logger = logging.getLogger("VideoArchiver")
class CompressionHandler:
"""Handles video compression operations"""
def __init__(self, ffmpeg_mgr: FFmpegManager, progress_handler: ProgressHandler,
file_ops: FileOperations):
self.ffmpeg_mgr = ffmpeg_mgr
self.progress_handler = progress_handler
self.file_ops = file_ops
self._active_processes: Set[subprocess.Popen] = set()
self._processes_lock = asyncio.Lock()
self._shutting_down = False
self.max_file_size = 0 # Will be set during compression
async def cleanup(self) -> None:
"""Clean up compression resources"""
self._shutting_down = True
try:
async with self._processes_lock:
for process in self._active_processes:
try:
process.terminate()
await asyncio.sleep(0.1)
if process.poll() is None:
process.kill()
except Exception as e:
logger.error(f"Error killing compression process: {e}")
self._active_processes.clear()
finally:
self._shutting_down = False
async def compress_video(
self,
input_file: str,
output_file: str,
max_size_mb: int,
progress_callback: Optional[Callable[[float], None]] = None
) -> Tuple[bool, str]:
"""Compress video to target size"""
if self._shutting_down:
return False, "Compression handler is shutting down"
self.max_file_size = max_size_mb
try:
# Get optimal compression parameters
compression_params = self.ffmpeg_mgr.get_compression_params(
input_file, max_size_mb
)
# Try hardware acceleration first
success = await self._try_compression(
input_file,
output_file,
compression_params,
progress_callback,
use_hardware=True
)
# Fall back to CPU if hardware acceleration fails
if not success:
logger.warning("Hardware acceleration failed, falling back to CPU encoding")
success = await self._try_compression(
input_file,
output_file,
compression_params,
progress_callback,
use_hardware=False
)
if not success:
return False, "Failed to compress with both hardware and CPU encoding"
# Verify compressed file
if not self.file_ops.verify_video_file(output_file, str(self.ffmpeg_mgr.get_ffprobe_path())):
return False, "Compressed file verification failed"
# Check final size
within_limit, final_size = self.file_ops.check_file_size(output_file, max_size_mb)
if not within_limit:
return False, f"Failed to compress to target size: {final_size} bytes"
return True, ""
except Exception as e:
return False, str(e)
async def _try_compression(
self,
input_file: str,
output_file: str,
params: Dict[str, str],
progress_callback: Optional[Callable[[float], None]] = None,
use_hardware: bool = True,
) -> bool:
"""Attempt video compression with given parameters"""
if self._shutting_down:
return False
try:
# Build FFmpeg command
ffmpeg_path = str(self.ffmpeg_mgr.get_ffmpeg_path())
cmd = [ffmpeg_path, "-y", "-i", input_file]
# Add progress monitoring
cmd.extend(["-progress", "pipe:1"])
# Modify parameters based on hardware acceleration preference
if use_hardware:
gpu_info = self.ffmpeg_mgr.gpu_info
if gpu_info["nvidia"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_nvenc"
elif gpu_info["amd"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_amf"
elif gpu_info["intel"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_qsv"
else:
params["c:v"] = "libx264"
# Add all parameters to command
for key, value in params.items():
cmd.extend([f"-{key}", str(value)])
# Add output file
cmd.append(output_file)
# Get video duration for progress calculation
duration = self.file_ops.get_video_duration(input_file, str(self.ffmpeg_mgr.get_ffprobe_path()))
# Initialize compression progress
self.progress_handler.update(input_file, {
"active": True,
"filename": os.path.basename(input_file),
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"elapsed_time": "0:00",
"input_size": os.path.getsize(input_file),
"current_size": 0,
"target_size": self.max_file_size * 1024 * 1024,
"codec": params.get("c:v", "unknown"),
"hardware_accel": use_hardware,
"preset": params.get("preset", "unknown"),
"crf": params.get("crf", "unknown"),
"duration": duration,
"bitrate": params.get("b:v", "unknown"),
"audio_codec": params.get("c:a", "unknown"),
"audio_bitrate": params.get("b:a", "unknown"),
})
# Run compression with progress monitoring
try:
process = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
# Track the process
async with self._processes_lock:
self._active_processes.add(process)
start_time = datetime.utcnow()
while True:
if self._shutting_down:
process.terminate()
return False
line = await process.stdout.readline()
if not line:
break
try:
line = line.decode().strip()
if line.startswith("out_time_ms="):
current_time = int(line.split("=")[1]) / 1000000
self.progress_handler.handle_compression_progress(
input_file, current_time, duration,
output_file, start_time, progress_callback
)
except Exception as e:
logger.error(f"Error parsing FFmpeg progress: {e}")
await process.wait()
return os.path.exists(output_file)
except Exception as e:
logger.error(f"Error during compression process: {e}")
return False
finally:
# Remove process from tracking
async with self._processes_lock:
self._active_processes.discard(process)
except Exception as e:
logger.error(f"Compression attempt failed: {str

View File

@@ -0,0 +1,271 @@
"""Core download functionality for video archiver"""
import os
import asyncio
import logging
import yt_dlp
from typing import Dict, Optional, Callable, Tuple
from pathlib import Path
from videoarchiver.utils.url_validator import check_url_support
from videoarchiver.utils.progress_handler import ProgressHandler, CancellableYTDLLogger
from videoarchiver.utils.file_operations import FileOperations
from videoarchiver.utils.compression_handler import CompressionHandler
from videoarchiver.utils.process_manager import ProcessManager
from videoarchiver.ffmpeg.ffmpeg_manager import FFmpegManager
logger = logging.getLogger("VideoArchiver")
class DownloadCore:
"""Core download functionality for video archiver"""
def __init__(
self,
download_path: str,
video_format: str,
max_quality: int,
max_file_size: int,
enabled_sites: Optional[list[str]] = None,
concurrent_downloads: int = 2,
ffmpeg_mgr: Optional[FFmpegManager] = None,
):
self.download_path = Path(download_path)
self.download_path.mkdir(parents=True, exist_ok=True)
os.chmod(str(self.download_path), 0o755)
self.video_format = video_format
self.max_quality = max_quality
self.max_file_size = max_file_size
self.enabled_sites = enabled_sites
self.ffmpeg_mgr = ffmpeg_mgr or FFmpegManager()
# Initialize components
self.process_manager = ProcessManager(concurrent_downloads)
self.progress_handler = ProgressHandler()
self.file_ops = FileOperations()
self.compression_handler = CompressionHandler(
self.ffmpeg_mgr, self.progress_handler, self.file_ops
)
# Create cancellable logger
self.ytdl_logger = CancellableYTDLLogger()
# Configure yt-dlp options
self.ydl_opts = self._configure_ydl_options()
def _configure_ydl_options(self) -> Dict:
"""Configure yt-dlp options"""
return {
"format": f"bv*[height<={self.max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={self.max_quality}]/best",
"outtmpl": "%(title)s.%(ext)s",
"merge_output_format": self.video_format,
"quiet": True,
"no_warnings": True,
"extract_flat": True,
"concurrent_fragment_downloads": 1,
"retries": 5,
"fragment_retries": 5,
"file_access_retries": 3,
"extractor_retries": 5,
"postprocessor_hooks": [self._check_file_size],
"progress_hooks": [self._handle_progress],
"ffmpeg_location": str(self.ffmpeg_mgr.get_ffmpeg_path()),
"ffprobe_location": str(self.ffmpeg_mgr.get_ffprobe_path()),
"paths": {"home": str(self.download_path)},
"logger": self.ytdl_logger,
"ignoreerrors": True,
"no_color": True,
"geo_bypass": True,
"socket_timeout": 60,
"http_chunk_size": 1048576,
"external_downloader_args": {"ffmpeg": ["-timeout", "60000000"]},
"max_sleep_interval": 5,
"sleep_interval": 1,
"max_filesize": self.max_file_size * 1024 * 1024,
}
def _check_file_size(self, info: Dict) -> None:
"""Check if file size is within limits"""
if info.get("filepath") and os.path.exists(info["filepath"]):
try:
size = os.path.getsize(info["filepath"])
if size > (self.max_file_size * 1024 * 1024):
logger.info(
f"File exceeds size limit, will compress: {info['filepath']}"
)
except OSError as e:
logger.error(f"Error checking file size: {str(e)}")
def _handle_progress(self, d: Dict) -> None:
"""Handle download progress updates"""
url = d.get("info_dict", {}).get("webpage_url", "unknown")
self.progress_handler.handle_download_progress(d, url)
def is_supported_url(self, url: str) -> bool:
"""Check if URL is supported"""
return check_url_support(url, self.ydl_opts, self.enabled_sites)
async def download_video(
self, url: str, progress_callback: Optional[Callable[[float], None]] = None
) -> Tuple[bool, str, str]:
"""Download and process a video"""
if self.process_manager.is_shutting_down:
return False, "", "Download manager is shutting down"
# Initialize progress tracking
self.progress_handler.initialize_progress(url)
original_file = None
compressed_file = None
try:
# Download the video
success, file_path, error = await self._safe_download(
url, str(self.download_path), progress_callback
)
if not success:
return False, "", error
original_file = file_path
await self.process_manager.track_download(url, original_file)
# Check file size and compress if needed
within_limit, file_size = self.file_ops.check_file_size(original_file, self.max_file_size)
if not within_limit:
logger.info(f"Compressing video: {original_file}")
try:
compressed_file = os.path.join(
self.download_path,
f"compressed_{os.path.basename(original_file)}",
)
# Attempt compression
success, error = await self.compression_handler.compress_video(
original_file,
compressed_file,
self.max_file_size,
progress_callback
)
if not success:
await self._cleanup_files(original_file, compressed_file)
return False, "", error
# Verify compressed file
if not self.file_ops.verify_video_file(
compressed_file,
str(self.ffmpeg_mgr.get_ffprobe_path())
):
await self._cleanup_files(original_file, compressed_file)
return False, "", "Compressed file verification failed"
# Delete original and return compressed
await self.file_ops.safe_delete_file(original_file)
return True, compressed_file, ""
except Exception as e:
error_msg = f"Compression failed: {str(e)}"
await self._cleanup_files(original_file, compressed_file)
return False, "", error_msg
else:
# Move file to final location if no compression needed
final_path = os.path.join(
self.download_path,
os.path.basename(original_file)
)
success = await self.file_ops.safe_move_file(original_file, final_path)
if not success:
await self._cleanup_files(original_file)
return False, "", "Failed to move file to final location"
return True, final_path, ""
except Exception as e:
logger.error(f"Download error: {str(e)}")
await self._cleanup_files(original_file, compressed_file)
return False, "", str(e)
finally:
# Clean up tracking
await self.process_manager.untrack_download(url)
self.progress_handler.complete(url)
async def _safe_download(
self,
url: str,
output_dir: str,
progress_callback: Optional[Callable[[float], None]] = None,
) -> Tuple[bool, str, str]:
"""Safely download video with retries"""
if self.process_manager.is_shutting_down:
return False, "", "Download manager is shutting down"
last_error = None
for attempt in range(5): # Max retries
try:
ydl_opts = self.ydl_opts.copy()
ydl_opts["outtmpl"] = os.path.join(output_dir, ydl_opts["outtmpl"])
# Add progress callback
if progress_callback:
original_progress_hook = ydl_opts["progress_hooks"][0]
def combined_progress_hook(d):
original_progress_hook(d)
if d["status"] == "downloading":
try:
percent = float(
d.get("_percent_str", "0").replace("%", "")
)
progress_callback(percent)
except Exception as e:
logger.error(f"Error in progress callback: {e}")
ydl_opts["progress_hooks"] = [combined_progress_hook]
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = await asyncio.get_event_loop().run_in_executor(
self.process_manager.download_pool,
lambda: ydl.extract_info(url, download=True)
)
if info is None:
raise Exception("Failed to extract video information")
file_path = os.path.join(output_dir, ydl.prepare_filename(info))
if not os.path.exists(file_path):
raise FileNotFoundError("Download completed but file not found")
if not self.file_ops.verify_video_file(
file_path,
str(self.ffmpeg_mgr.get_ffprobe_path())
):
raise Exception("Downloaded file is not a valid video")
return True, file_path, ""
except Exception as e:
last_error = str(e)
logger.error(f"Download attempt {attempt + 1} failed: {str(e)}")
if attempt < 4: # Less than max retries
delay = 10 * (2**attempt) + (attempt * 2) # Exponential backoff
await asyncio.sleep(delay)
else:
return False, "", f"All download attempts failed: {last_error}"
async def _cleanup_files(self, *files: str) -> None:
"""Clean up multiple files"""
for file in files:
if file and os.path.exists(file):
await self.file_ops.safe_delete_file(file)
async def cleanup(self) -> None:
"""Clean up resources"""
await self.process_manager.cleanup()
await self.compression_handler.cleanup()
async def force_cleanup(self) -> None:
"""Force cleanup of all resources"""
self.ytdl_logger.cancelled = True
await self.process_m
self.ytdl_logger.cancelled = True
await self.process_manager.force_cleanup()
await self.compress

View File

@@ -1,8 +1,44 @@
"""Custom exceptions for VideoArchiver"""
from typing import Optional, Dict, Any
from enum import Enum, auto
class ErrorSeverity(Enum):
"""Severity levels for errors"""
LOW = auto()
MEDIUM = auto()
HIGH = auto()
CRITICAL = auto()
class ErrorContext:
"""Context information for errors"""
def __init__(
self,
component: str,
operation: str,
details: Optional[Dict[str, Any]] = None,
severity: ErrorSeverity = ErrorSeverity.MEDIUM
) -> None:
self.component = component
self.operation = operation
self.details = details or {}
self.severity = severity
def __str__(self) -> str:
return (
f"[{self.severity.name}] {self.component}.{self.operation}: "
f"{', '.join(f'{k}={v}' for k, v in self.details.items())}"
)
class VideoArchiverError(Exception):
"""Base exception for VideoArchiver errors"""
pass
def __init__(
self,
message: str,
context: Optional[ErrorContext] = None
) -> None:
self.context = context
super().__init__(f"{context}: {message}" if context else message)
class VideoDownloadError(VideoArchiverError):
"""Error downloading video"""
@@ -38,7 +74,17 @@ class PermissionError(VideoArchiverError):
class NetworkError(VideoArchiverError):
"""Error with network operations"""
pass
def __init__(
self,
message: str,
url: Optional[str] = None,
status_code: Optional[int] = None,
context: Optional[ErrorContext] = None
) -> None:
self.url = url
self.status_code = status_code
details = f" (URL: {url}" + (f", Status: {status_code})" if status_code else ")")
super().__init__(message + details, context)
class ResourceError(VideoArchiverError):
"""Error with system resources"""
@@ -54,15 +100,27 @@ class ComponentError(VideoArchiverError):
class DiscordAPIError(VideoArchiverError):
"""Error with Discord API operations"""
def __init__(self, message: str, status_code: int = None):
def __init__(
self,
message: str,
status_code: Optional[int] = None,
context: Optional[ErrorContext] = None
) -> None:
self.status_code = status_code
super().__init__(f"Discord API Error: {message} (Status: {status_code if status_code else 'Unknown'})")
details = f" (Status: {status_code})" if status_code else ""
super().__init__(f"Discord API Error: {message}{details}", context)
class ResourceExhaustedError(VideoArchiverError):
"""Error when system resources are exhausted"""
def __init__(self, message: str, resource_type: str = None):
def __init__(
self,
message: str,
resource_type: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> None:
self.resource_type = resource_type
super().__init__(f"Resource exhausted: {message} (Type: {resource_type if resource_type else 'Unknown'})")
details = f" (Type: {resource_type})" if resource_type else ""
super().__init__(f"Resource exhausted: {message}{details}", context)
class ProcessingError(VideoArchiverError):
"""Error during video processing"""
@@ -74,4 +132,126 @@ class CleanupError(VideoArchiverError):
class FileOperationError(VideoArchiverError):
"""Error during file operations"""
def __init__(
self,
message: str,
path: Optional[str] = None,
operation: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> None:
self.path = path
self.operation = operation
details = []
if path:
details.append(f"Path: {path}")
if operation:
details.append(f"Operation: {operation}")
details_str = f" ({', '.join(details)})" if details else ""
super().__init__(f"File operation error: {message}{details_str}", context)
# New exceptions for processor components
class ProcessorError(VideoArchiverError):
"""Error in video processor operations"""
pass
class ValidationError(VideoArchiverError):
"""Error in message or content validation"""
pass
class DisplayError(VideoArchiverError):
"""Error in status display operations"""
pass
class URLExtractionError(VideoArchiverError):
"""Error extracting URLs from content"""
def __init__(
self,
message: str,
url: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> None:
self.url = url
details = f" (URL: {url})" if url else ""
super().__init__(f"URL extraction error: {message}{details}", context)
class MessageHandlerError(VideoArchiverError):
"""Error in message handling operations"""
def __init__(
self,
message: str,
message_id: Optional[int] = None,
context: Optional[ErrorContext] = None
) -> None:
self.message_id = message_id
details = f" (Message ID: {message_id})" if message_id else ""
super().__init__(f"Message handler error: {message}{details}", context)
class QueueHandlerError(VideoArchiverError):
"""Error in queue handling operations"""
pass
class QueueProcessorError(VideoArchiverError):
"""Error in queue processing operations"""
pass
class FFmpegError(VideoArchiverError):
"""Error in FFmpeg operations"""
def __init__(
self,
message: str,
command: Optional[str] = None,
exit_code: Optional[int] = None,
context: Optional[ErrorContext] = None
) -> None:
self.command = command
self.exit_code = exit_code
details = []
if command:
details.append(f"Command: {command}")
if exit_code is not None:
details.append(f"Exit Code: {exit_code}")
details_str = f" ({', '.join(details)})" if details else ""
super().__init__(f"FFmpeg error: {message}{details_str}", context)
class DatabaseError(VideoArchiverError):
"""Error in database operations"""
def __init__(
self,
message: str,
query: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> None:
self.query = query
details = f" (Query: {query})" if query else ""
super().__init__(f"Database error: {message}{details}", context)
class HealthCheckError(VideoArchiverError):
"""Error in health check operations"""
def __init__(
self,
message: str,
component: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> None:
self.component = component
details = f" (Component: {component})" if component else ""
super().__init__(f"Health check error: {message}{details}", context)
class TrackingError(VideoArchiverError):
"""Error in progress tracking operations"""
def __init__(
self,
message: str,
operation: Optional[str] = None,
item_id: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> None:
self.operation = operation
self.item_id = item_id
details = []
if operation:
details.append(f"Operation: {operation}")
if item_id:
details.append(f"Item ID: {item_id}")
details_str = f" ({', '.join(details)})" if details else ""
super().__init__(f"Progress tracking error: {message}{details_str}", context)

View File

@@ -0,0 +1,138 @@
"""Safe file operations with retry logic"""
import os
import shutil
import asyncio
import logging
import json
import subprocess
from typing import Tuple
from pathlib import Path
from videoarchiver.utils.exceptions import VideoVerificationError
from videoarchiver.utils.file_deletion import secure_delete_file
logger = logging.getLogger("VideoArchiver")
class FileOperations:
"""Handles safe file operations with retries"""
def __init__(self, max_retries: int = 3, retry_delay: int = 1):
self.max_retries = max_retries
self.retry_delay = retry_delay
async def safe_delete_file(self, file_path: str) -> bool:
"""Safely delete a file with retries"""
for attempt in range(self.max_retries):
try:
if os.path.exists(file_path):
await secure_delete_file(file_path)
return True
except Exception as e:
logger.error(f"Delete attempt {attempt + 1} failed: {str(e)}")
if attempt == self.max_retries - 1:
return False
await asyncio.sleep(self.retry_delay * (attempt + 1))
return False
async def safe_move_file(self, src: str, dst: str) -> bool:
"""Safely move a file with retries"""
for attempt in range(self.max_retries):
try:
os.makedirs(os.path.dirname(dst), exist_ok=True)
shutil.move(src, dst)
return True
except Exception as e:
logger.error(f"Move attempt {attempt + 1} failed: {str(e)}")
if attempt == self.max_retries - 1:
return False
await asyncio.sleep(self.retry_delay * (attempt + 1))
return False
def verify_video_file(self, file_path: str, ffprobe_path: str) -> bool:
"""Verify video file integrity"""
try:
cmd = [
ffprobe_path,
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
file_path,
]
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=30,
)
if result.returncode != 0:
raise VideoVerificationError(f"FFprobe failed: {result.stderr}")
probe = json.loads(result.stdout)
# Verify video stream
video_streams = [s for s in probe["streams"] if s["codec_type"] == "video"]
if not video_streams:
raise VideoVerificationError("No video streams found")
# Verify duration
duration = float(probe["format"].get("duration", 0))
if duration <= 0:
raise VideoVerificationError("Invalid video duration")
# Verify file is readable
try:
with open(file_path, "rb") as f:
f.seek(0, 2)
if f.tell() == 0:
raise VideoVerificationError("Empty file")
except Exception as e:
raise VideoVerificationError(f"File read error: {str(e)}")
return True
except subprocess.TimeoutExpired:
logger.error(f"FFprobe timed out for {file_path}")
return False
except json.JSONDecodeError:
logger.error(f"Invalid FFprobe output for {file_path}")
return False
except Exception as e:
logger.error(f"Error verifying video file {file_path}: {e}")
return False
def get_video_duration(self, file_path: str, ffprobe_path: str) -> float:
"""Get video duration in seconds"""
try:
cmd = [
ffprobe_path,
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
file_path,
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise Exception(f"FFprobe failed: {result.stderr}")
data = json.loads(result.stdout)
return float(data["format"]["duration"])
except Exception as e:
logger.error(f"Error getting video duration: {e}")
return 0
def check_file_size(self, file_path: str, max_size_mb: int) -> Tuple[bool, int]:
"""Check if file size is within limits"""
try:
if os.path.exists(file_path):
size = os.path.getsize(file_path)
max_size = max_size_mb * 1024 * 1024

View File

@@ -0,0 +1,111 @@
"""Process management and cleanup utilities"""
import asyncio
import logging
import subprocess
from typing import Set, Dict, Any
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
logger = logging.getLogger("VideoArchiver")
class ProcessManager:
"""Manages processes and resources for video operations"""
def __init__(self, concurrent_downloads: int = 2):
self._active_processes: Set[subprocess.Popen] = set()
self._processes_lock = asyncio.Lock()
self._shutting_down = False
# Create thread pool with proper naming
self.download_pool = ThreadPoolExecutor(
max_workers=max(1, min(3, concurrent_downloads)),
thread_name_prefix="videoarchiver_download"
)
# Track active downloads
self.active_downloads: Dict[str, Dict[str, Any]] = {}
self._downloads_lock = asyncio.Lock()
async def cleanup(self) -> None:
"""Clean up resources with proper shutdown"""
self._shutting_down = True
try:
# Kill any active processes
async with self._processes_lock:
for process in self._active_processes:
try:
process.terminate()
await asyncio.sleep(0.1) # Give process time to terminate
if process.poll() is None:
process.kill() # Force kill if still running
except Exception as e:
logger.error(f"Error killing process: {e}")
self._active_processes.clear()
# Clean up thread pool
self.download_pool.shutdown(wait=False, cancel_futures=True)
# Clean up active downloads
async with self._downloads_lock:
self.active_downloads.clear()
except Exception as e:
logger.error(f"Error during process manager cleanup: {e}")
finally:
self._shutting_down = False
async def force_cleanup(self) -> None:
"""Force cleanup of all resources"""
try:
# Kill all processes immediately
async with self._processes_lock:
for process in self._active_processes:
try:
process.kill()
except Exception as e:
logger.error(f"Error force killing process: {e}")
self._active_processes.clear()
# Force shutdown thread pool
self.download_pool.shutdown(wait=False, cancel_futures=True)
# Clear all tracking
async with self._downloads_lock:
self.active_downloads.clear()
except Exception as e:
logger.error(f"Error during force cleanup: {e}")
async def track_download(self, url: str, file_path: str) -> None:
"""Track a new download"""
async with self._downloads_lock:
self.active_downloads[url] = {
"file_path": file_path,
"start_time": datetime.utcnow(),
}
async def untrack_download(self, url: str) -> None:
"""Remove download from tracking"""
async with self._downloads_lock:
self.active_downloads.pop(url, None)
async def track_process(self, process: subprocess.Popen) -> None:
"""Track a new process"""
async with self._processes_lock:
self._active_processes.add(process)
async def untrack_process(self, process: subprocess.Popen) -> None:
"""Remove process from tracking"""
async with self._processes_lock:
self._active_processes.discard(process)
@property
def is_shutting_down(self) -> bool:
"""Check if manager is shutting down"""
return self._shutting_down
def get_active_downloads(self) -> Dict[str, Dict[str, Any]]:
"""Get current active downloads"""
return self.acti

View File

@@ -0,0 +1,126 @@
"""Progress tracking and logging utilities for video downloads"""
import logging
from datetime import datetime
from typing import Dict, Any, Optional, Callable
logger = logging.getLogger("VideoArchiver")
class CancellableYTDLLogger:
"""Custom yt-dlp logger that can handle cancellation"""
def __init__(self):
self.cancelled = False
def debug(self, msg):
if self.cancelled:
raise yt_dlp.utils.DownloadError("Download cancelled")
logger.debug(msg)
def warning(self, msg):
if self.cancelled:
raise yt_dlp.utils.DownloadError("Download cancelled")
logger.warning(msg)
def error(self, msg):
if self.cancelled:
raise yt_dlp.utils.DownloadError("Download cancelled")
logger.error(msg)
class ProgressHandler:
"""Handles progress tracking and callbacks for video operations"""
def __init__(self):
self.progress_data: Dict[str, Dict[str, Any]] = {}
def initialize_progress(self, url: str) -> None:
"""Initialize progress tracking for a URL"""
self.progress_data[url] = {
"active": True,
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"speed": "N/A",
"eta": "N/A",
"downloaded_bytes": 0,
"total_bytes": 0,
"retries": 0,
"fragment_count": 0,
"fragment_index": 0,
"video_title": "Unknown",
"extractor": "Unknown",
"format": "Unknown",
"resolution": "Unknown",
"fps": "Unknown",
}
def update(self, key: str, data: Dict[str, Any]) -> None:
"""Update progress data for a key"""
if key in self.progress_data:
self.progress_data[key].update(data)
def complete(self, key: str) -> None:
"""Mark progress as complete for a key"""
if key in self.progress_data:
self.progress_data[key]["active"] = False
self.progress_data[key]["percent"] = 100
def get_progress(self, key: str) -> Optional[Dict[str, Any]]:
"""Get progress data for a key"""
return self.progress_data.get(key)
def handle_download_progress(self, d: Dict[str, Any], url: str,
progress_callback: Optional[Callable[[float], None]] = None) -> None:
"""Handle download progress updates"""
try:
if d["status"] == "downloading":
progress_data = {
"active": True,
"percent": float(d.get("_percent_str", "0").replace("%", "")),
"speed": d.get("_speed_str", "N/A"),
"eta": d.get("_eta_str", "N/A"),
"downloaded_bytes": d.get("downloaded_bytes", 0),
"total_bytes": d.get("total_bytes", 0) or d.get("total_bytes_estimate", 0),
"retries": d.get("retry_count", 0),
"fragment_count": d.get("fragment_count", 0),
"fragment_index": d.get("fragment_index", 0),
"video_title": d.get("info_dict", {}).get("title", "Unknown"),
"extractor": d.get("info_dict", {}).get("extractor", "Unknown"),
"format": d.get("info_dict", {}).get("format", "Unknown"),
"resolution": d.get("info_dict", {}).get("resolution", "Unknown"),
"fps": d.get("info_dict", {}).get("fps", "Unknown"),
}
self.update(url, progress_data)
if progress_callback:
progress_callback(progress_data["percent"])
logger.debug(
f"Download progress: {progress_data['percent']}% at {progress_data['speed']}, "
f"ETA: {progress_data['eta']}, Downloaded: {progress_data['downloaded_bytes']}/"
f"{progress_data['total_bytes']} bytes"
)
elif d["status"] == "finished":
logger.info(f"Download completed: {d.get('filename', 'unknown')}")
except Exception as e:
logger.error(f"Error in progress handler: {str(e)}")
def handle_compression_progress(self, input_file: str, current_time: float, duration: float,
output_file: str, start_time: datetime,
progress_callback: Optional[Callable[[float], None]] = None) -> None:
"""Handle compression progress updates"""
try:
if duration > 0:
progress = min(100, (current_time / duration) * 100)
elapsed = datetime.utcnow() - start_time
self.update(input_file, {
"percent": progress,
"elapsed_time": str(elapsed).split(".")[0],
"current_size": os.path.getsize(output_file) if os.path.exists(output_file) else 0,
"current_time": current_time,
})
if progress_callback:
progress_callback(progress)
except Exception as e:
logger.error(f"Error upda

View File

@@ -1,109 +1,205 @@
"""Module for tracking download and compression progress"""
"""Progress tracking module."""
import logging
from typing import Dict, Any, Optional
from datetime import datetime
logger = logging.getLogger("ProgressTracker")
logger = logging.getLogger(__name__)
class ProgressTracker:
"""Tracks progress of downloads and compression operations"""
"""Progress tracker singleton."""
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
self._download_progress: Dict[str, Dict[str, Any]] = {}
self._compression_progress: Dict[str, Dict[str, Any]] = {}
if not hasattr(self, '_initialized'):
self._data: Dict[str, Dict[str, Any]] = {}
self._initialized = True
def start_download(self, url: str) -> None:
"""Initialize progress tracking for a download"""
self._download_progress[url] = {
"active": True,
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"speed": "N/A",
"eta": "N/A",
"downloaded_bytes": 0,
"total_bytes": 0,
"retries": 0,
"fragment_count": 0,
"fragment_index": 0,
"video_title": "Unknown",
"extractor": "Unknown",
"format": "Unknown",
"resolution": "Unknown",
"fps": "Unknown",
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
def update(self, key: str, data: Dict[str, Any]) -> None:
"""Update progress for a key."""
if key not in self._data:
self._data[key] = {
'active': True,
'start_time': datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
'percent': 0
}
self._data[key].update(data)
self._data[key]['last_update'] = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
logger.debug(f"Progress for {key}: {self._data[key].get('percent', 0)}%")
def update_download_progress(self, data: Dict[str, Any]) -> None:
"""Update download progress information"""
def get(self, key: Optional[str] = None) -> Dict[str, Any]:
"""Get progress for a key."""
if key is None:
return self._data
return self._data.get(key, {})
def complete(self, key: str) -> None:
"""Mark progress as complete."""
if key in self._data:
self._data[key]['active'] = False
logger.info(f"Operation completed for {key}")
def clear(self) -> None:
"""Clear all progress data."""
self._data.clear()
logger.info("Progress data cleared")
_tracker = ProgressTracker()
def get_compression(self, file_path: Optional[str] = None) -> Dict[str, Any]:
"""Get compression progress."""
if file_path is None:
return self._compressions
return self._compressions.get(file_path, {})
def complete_download(self, url: str) -> None:
"""Mark download as complete."""
if url in self._downloads:
self._downloads[url]['active'] = False
logger.info(f"Download completed for {url}")
def complete_compression(self, file_path: str) -> None:
"""Mark compression as complete."""
if file_path in self._compressions:
self._compressions[file_path]['active'] = False
logger.info(f"Compression completed for {file_path}")
def clear(self) -> None:
"""Clear all progress data."""
self._downloads.clear()
self._compressions.clear()
logger.info("Progress data cleared")
# Global instance
_tracker = ProgressTrack
# Global instance
_tracker = ProgressTracker()
def get_tracker() -> Progre
"""Clear all progress tracking"""
self._download_progress.clear()
self._compression_progress.clear()
logger.info("Cleared all progress tracking data")
# Create singleton instance
progress_tracker = ProgressTracker()
def get_progress_tracker() -> ProgressTracker:
def mark_compression_complete(self, file_path: str) -> None:
"""Mark a compression operation as complete"""
if file_path in self._compression_progress:
self._compression_progress[file_path]['active'] = False
logger.info(f"Compression completed for {file_path}")
def clear_progress(self) -> None:
"""Clear all progress tracking"""
self._download_progress.clear()
self._compression_progress.clear()
logger.info("Cleared all progress tracking data")
# Create singleton instance
progress_tracker = ProgressTracker()
# Export the singleton instance
def get_progress_tracker() -> ProgressTracker:
Args:
data: Dictionary containing download progress data
"""
try:
# Get URL from info dict
url = data.get("info_dict", {}).get("webpage_url", "unknown")
if url not in self._download_progress:
info_dict = data.get("info_dict", {})
url = info_dict.get("webpage_url")
if not url or url not in self._download_progress:
return
if data["status"] == "downloading":
if data.get("status") == "downloading":
percent_str = data.get("_percent_str", "0").replace("%", "")
try:
percent = float(percent_str)
except ValueError:
percent = 0.0
total_bytes = (
data.get("total_bytes", 0) or
data.get("total_bytes_estimate", 0)
)
self._download_progress[url].update({
"active": True,
"percent": float(data.get("_percent_str", "0").replace("%", "")),
"percent": percent,
"speed": data.get("_speed_str", "N/A"),
"eta": data.get("_eta_str", "N/A"),
"downloaded_bytes": data.get("downloaded_bytes", 0),
"total_bytes": data.get("total_bytes", 0) or data.get("total_bytes_estimate", 0),
"total_bytes": total_bytes,
"retries": data.get("retry_count", 0),
"fragment_count": data.get("fragment_count", 0),
"fragment_index": data.get("fragment_index", 0),
"video_title": data.get("info_dict", {}).get("title", "Unknown"),
"extractor": data.get("info_dict", {}).get("extractor", "Unknown"),
"format": data.get("info_dict", {}).get("format", "Unknown"),
"resolution": data.get("info_dict", {}).get("resolution", "Unknown"),
"fps": data.get("info_dict", {}).get("fps", "Unknown"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"video_title": info_dict.get("title", "Unknown"),
"extractor": info_dict.get("extractor", "Unknown"),
"format": info_dict.get("format", "Unknown"),
"resolution": info_dict.get("resolution", "Unknown"),
"fps": info_dict.get("fps", "Unknown"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
})
logger.debug(
f"Download progress for {url}: "
f"{self._download_progress[url]['percent']}% at {self._download_progress[url]['speed']}, "
f"{percent:.1f}% at {self._download_progress[url]['speed']}, "
f"ETA: {self._download_progress[url]['eta']}"
)
except Exception as e:
logger.error(f"Error updating download progress: {e}")
logger.error(f"Error updating download progress: {e}", exc_info=True)
def end_download(self, url: str) -> None:
"""Mark a download as completed"""
def end_download(self, url: str, status: ProgressStatus = ProgressStatus.COMPLETED) -> None:
"""
Mark a download as completed.
Args:
url: The URL being downloaded
status: The final status of the download
"""
if url in self._download_progress:
self._download_progress[url]["active"] = False
logger.info(f"Download {status.value} for {url}")
def start_compression(
self,
input_file: str,
params: Dict[str, str],
use_hardware: bool,
duration: float,
input_size: int,
target_size: int
) -> None:
"""Initialize progress tracking for compression"""
self._compression_progress[input_file] = {
"active": True,
"filename": input_file,
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"elapsed_time": "0:00",
"input_size": input_size,
"current_size": 0,
"target_size": target_size,
"codec": params.get("c:v", "unknown"),
"hardware_accel": use_hardware,
"preset": params.get("preset", "unknown"),
"crf": params.get("crf", "unknown"),
"duration": duration,
"bitrate": params.get("b:v", "unknown"),
"audio_codec": params.get("c:a", "unknown"),
"audio_bitrate": params.get("b:a", "unknown"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
def start_compression(self, params: CompressionParams) -> None:
"""
Initialize progress tracking for compression.
Args:
params: Compression parameters
"""
current_time = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
self._compression_progress[params.input_file] = CompressionProgress(
active=True,
filename=params.input_file,
start_time=current_time,
percent=0.0,
elapsed_time="0:00",
input_size=params.input_size,
current_size=0,
target_size=params.target_size,
codec=params.codec_params.get("c:v", "unknown"),
hardware_accel=params.use_hardware,
preset=params.codec_params.get("preset", "unknown"),
crf=params.codec_params.get("crf", "unknown"),
duration=params.duration,
bitrate=params.codec_params.get("b:v", "unknown"),
audio_codec=params.codec_params.get("c:a", "unknown"),
audio_bitrate=params.codec_params.get("b:a", "unknown"),
last_update=current_time,
current_time=None
)
def update_compression_progress(
self,
@@ -113,14 +209,23 @@ class ProgressTracker:
current_size: int,
current_time: float
) -> None:
"""Update compression progress information"""
"""
Update compression progress information.
Args:
input_file: The input file being compressed
progress: Current progress percentage (0-100)
elapsed_time: Time elapsed as string
current_size: Current file size in bytes
current_time: Current timestamp in seconds
"""
if input_file in self._compression_progress:
self._compression_progress[input_file].update({
"percent": progress,
"percent": max(0.0, min(100.0, progress)),
"elapsed_time": elapsed_time,
"current_size": current_size,
"current_time": current_time,
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
})
logger.debug(
@@ -128,29 +233,73 @@ class ProgressTracker:
f"{progress:.1f}%, Size: {current_size}/{self._compression_progress[input_file]['target_size']} bytes"
)
def end_compression(self, input_file: str) -> None:
"""Mark a compression operation as completed"""
def end_compression(
self,
input_file: str,
status: ProgressStatus = ProgressStatus.COMPLETED
) -> None:
"""
Mark a compression operation as completed.
Args:
input_file: The input file being compressed
status: The final status of the compression
"""
if input_file in self._compression_progress:
self._compression_progress[input_file]["active"] = False
logger.info(f"Compression {status.value} for {input_file}")
def get_download_progress(self, url: str) -> Optional[Dict[str, Any]]:
"""Get progress information for a download"""
def get_download_progress(self, url: Optional[str] = None) -> Optional[DownloadProgress]:
"""
Get progress information for a download.
Args:
url: Optional URL to get progress for. If None, returns all progress.
Returns:
Progress information for the specified download or None if not found
"""
if url is None:
return self._download_progress
return self._download_progress.get(url)
def get_compression_progress(self, input_file: str) -> Optional[Dict[str, Any]]:
"""Get progress information for a compression operation"""
def get_compression_progress(
self,
input_file: Optional[str] = None
) -> Optional[CompressionProgress]:
"""
Get progress information for a compression operation.
Args:
input_file: Optional file to get progress for. If None, returns all progress.
Returns:
Progress information for the specified compression or None if not found
"""
if input_file is None:
return self._compression_progress
return self._compression_progress.get(input_file)
def get_active_downloads(self) -> Dict[str, Dict[str, Any]]:
"""Get all active downloads"""
def get_active_downloads(self) -> Dict[str, DownloadProgress]:
"""
Get all active downloads.
Returns:
Dictionary of active downloads and their progress
"""
return {
url: progress
for url, progress in self._download_progress.items()
if progress.get("active", False)
}
def get_active_compressions(self) -> Dict[str, Dict[str, Any]]:
"""Get all active compression operations"""
def get_active_compressions(self) -> Dict[str, CompressionProgress]:
"""
Get all active compression operations.
Returns:
Dictionary of active compressions and their progress
"""
return {
input_file: progress
for input_file, progress in self._compression_progress.items()
@@ -161,3 +310,4 @@ class ProgressTracker:
"""Clear all progress tracking"""
self._download_progress.clear()
self._compression_progress.clear()
logger.info("Cleared

View File

@@ -0,0 +1,76 @@
"""URL validation utilities for video downloads"""
import re
import logging
import yt_dlp
from typing import List, Optional
logger = logging.getLogger("VideoArchiver")
def is_video_url_pattern(url: str) -> bool:
"""Check if URL matches common video platform patterns"""
video_patterns = [
r"youtube\.com/watch\?v=",
r"youtu\.be/",
r"vimeo\.com/",
r"tiktok\.com/",
r"twitter\.com/.*/video/",
r"x\.com/.*/video/",
r"bsky\.app/",
r"facebook\.com/.*/videos/",
r"instagram\.com/.*/(tv|reel|p)/",
r"twitch\.tv/.*/clip/",
r"streamable\.com/",
r"v\.redd\.it/",
r"clips\.twitch\.tv/",
r"dailymotion\.com/video/",
r"\.mp4$",
r"\.webm$",
r"\.mov$",
]
return any(re.search(pattern, url, re.IGNORECASE) for pattern in video_patterns)
def check_url_support(url: str, ydl_opts: dict, enabled_sites: Optional[List[str]] = None) -> bool:
"""Check if URL is supported by attempting a simulated download"""
if not is_video_url_pattern(url):
return False
try:
simulate_opts = {
**ydl_opts,
"simulate": True,
"quiet": True,
"no_warnings": True,
"extract_flat": True,
"skip_download": True,
"format": "best",
}
with yt_dlp.YoutubeDL(simulate_opts) as ydl:
try:
info = ydl.extract_info(url, download=False)
if info is None:
return False
if enabled_sites:
extractor = info.get("extractor", "").lower()
if not any(
site.lower() in extractor for site in enabled_sites
):
logger.info(f"Site {extractor} not in enabled sites list")
return False
logger.info(
f"URL supported: {url} (Extractor: {info.get('extractor', 'unknown')})"
)
return True
except yt_dlp.utils.UnsupportedError:
return False
except Exception as e:
if "Unsupported URL" not in str(e):
logger.error(f"Error checking URL {url}: {str(e)}")
return False
except Exception as e:
logger.error

View File

@@ -1,809 +0,0 @@
"""Video download and processing utilities"""
import os
import re
import logging
import asyncio
import ffmpeg
import yt_dlp
import shutil
import subprocess
import json
import signal
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Tuple, Callable, Set
from pathlib import Path
from datetime import datetime
from videoarchiver.ffmpeg.ffmpeg_manager import FFmpegManager
from videoarchiver.ffmpeg.exceptions import (
FFmpegError,
CompressionError,
VerificationError,
FFprobeError,
TimeoutError,
handle_ffmpeg_error,
)
from videoarchiver.utils.exceptions import VideoVerificationError
from videoarchiver.utils.file_ops import secure_delete_file
from videoarchiver.utils.path_manager import temp_path_context
logger = logging.getLogger("VideoArchiver")
# Add a custom yt-dlp logger to handle cancellation
class CancellableYTDLLogger:
def __init__(self):
self.cancelled = False
def debug(self, msg):
if self.cancelled:
raise Exception("Download cancelled")
logger.debug(msg)
def warning(self, msg):
if self.cancelled:
raise Exception("Download cancelled")
logger.warning(msg)
def error(self, msg):
if self.cancelled:
raise Exception("Download cancelled")
logger.error(msg)
def is_video_url_pattern(url: str) -> bool:
"""Check if URL matches common video platform patterns"""
video_patterns = [
r"youtube\.com/watch\?v=",
r"youtu\.be/",
r"vimeo\.com/",
r"tiktok\.com/",
r"twitter\.com/.*/video/",
r"x\.com/.*/video/",
r"bsky\.app/",
r"facebook\.com/.*/videos/",
r"instagram\.com/.*/(tv|reel|p)/",
r"twitch\.tv/.*/clip/",
r"streamable\.com/",
r"v\.redd\.it/",
r"clips\.twitch\.tv/",
r"dailymotion\.com/video/",
r"\.mp4$",
r"\.webm$",
r"\.mov$",
]
return any(re.search(pattern, url, re.IGNORECASE) for pattern in video_patterns)
class VideoDownloader:
MAX_RETRIES = 5
RETRY_DELAY = 10
FILE_OP_RETRIES = 3
FILE_OP_RETRY_DELAY = 1
SHUTDOWN_TIMEOUT = 15 # seconds
def __init__(
self,
download_path: str,
video_format: str,
max_quality: int,
max_file_size: int,
enabled_sites: Optional[List[str]] = None,
concurrent_downloads: int = 2,
ffmpeg_mgr: Optional[FFmpegManager] = None,
):
self.download_path = Path(download_path)
self.download_path.mkdir(parents=True, exist_ok=True)
os.chmod(str(self.download_path), 0o755)
self.video_format = video_format
self.max_quality = max_quality
self.max_file_size = max_file_size
self.enabled_sites = enabled_sites
self.ffmpeg_mgr = ffmpeg_mgr or FFmpegManager()
# Create thread pool with proper naming
self.download_pool = ThreadPoolExecutor(
max_workers=max(1, min(3, concurrent_downloads)),
thread_name_prefix="videoarchiver_download",
)
# Track active downloads and processes
self.active_downloads: Dict[str, Dict[str, Any]] = {}
self._downloads_lock = asyncio.Lock()
self._active_processes: Set[subprocess.Popen] = set()
self._processes_lock = asyncio.Lock()
self._shutting_down = False
# Create cancellable logger
self.ytdl_logger = CancellableYTDLLogger()
# Configure yt-dlp options
self.ydl_opts = {
"format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best",
"outtmpl": "%(title)s.%(ext)s",
"merge_output_format": video_format,
"quiet": True,
"no_warnings": True,
"extract_flat": True,
"concurrent_fragment_downloads": 1,
"retries": self.MAX_RETRIES,
"fragment_retries": self.MAX_RETRIES,
"file_access_retries": self.FILE_OP_RETRIES,
"extractor_retries": self.MAX_RETRIES,
"postprocessor_hooks": [self._check_file_size],
"progress_hooks": [self._progress_hook, self._detailed_progress_hook],
"ffmpeg_location": str(self.ffmpeg_mgr.get_ffmpeg_path()),
"ffprobe_location": str(self.ffmpeg_mgr.get_ffprobe_path()),
"paths": {"home": str(self.download_path)},
"logger": self.ytdl_logger,
"ignoreerrors": True,
"no_color": True,
"geo_bypass": True,
"socket_timeout": 60,
"http_chunk_size": 1048576,
"external_downloader_args": {"ffmpeg": ["-timeout", "60000000"]},
"max_sleep_interval": 5,
"sleep_interval": 1,
"max_filesize": max_file_size * 1024 * 1024,
}
async def cleanup(self) -> None:
"""Clean up resources with proper shutdown"""
self._shutting_down = True
try:
# Cancel active downloads
self.ytdl_logger.cancelled = True
# Kill any active FFmpeg processes
async with self._processes_lock:
for process in self._active_processes:
try:
process.terminate()
await asyncio.sleep(0.1) # Give process time to terminate
if process.poll() is None:
process.kill() # Force kill if still running
except Exception as e:
logger.error(f"Error killing process: {e}")
self._active_processes.clear()
# Clean up thread pool
self.download_pool.shutdown(wait=False, cancel_futures=True)
# Clean up active downloads
async with self._downloads_lock:
self.active_downloads.clear()
except Exception as e:
logger.error(f"Error during downloader cleanup: {e}")
finally:
self._shutting_down = False
async def force_cleanup(self) -> None:
"""Force cleanup of all resources"""
try:
# Force cancel all downloads
self.ytdl_logger.cancelled = True
# Kill all processes immediately
async with self._processes_lock:
for process in self._active_processes:
try:
process.kill()
except Exception as e:
logger.error(f"Error force killing process: {e}")
self._active_processes.clear()
# Force shutdown thread pool
self.download_pool.shutdown(wait=False, cancel_futures=True)
# Clear all tracking
async with self._downloads_lock:
self.active_downloads.clear()
except Exception as e:
logger.error(f"Error during force cleanup: {e}")
def _detailed_progress_hook(self, d):
"""Handle detailed download progress tracking"""
try:
if d["status"] == "downloading":
# Get URL from info dict
url = d.get("info_dict", {}).get("webpage_url", "unknown")
# Update global progress tracking
from videoarchiver.processor import _download_progress
if url in _download_progress:
_download_progress[url].update(
{
"active": True,
"percent": float(
d.get("_percent_str", "0").replace("%", "")
),
"speed": d.get("_speed_str", "N/A"),
"eta": d.get("_eta_str", "N/A"),
"downloaded_bytes": d.get("downloaded_bytes", 0),
"total_bytes": d.get("total_bytes", 0)
or d.get("total_bytes_estimate", 0),
"retries": d.get("retry_count", 0),
"fragment_count": d.get("fragment_count", 0),
"fragment_index": d.get("fragment_index", 0),
"video_title": d.get("info_dict", {}).get(
"title", "Unknown"
),
"extractor": d.get("info_dict", {}).get(
"extractor", "Unknown"
),
"format": d.get("info_dict", {}).get("format", "Unknown"),
"resolution": d.get("info_dict", {}).get(
"resolution", "Unknown"
),
"fps": d.get("info_dict", {}).get("fps", "Unknown"),
"last_update": datetime.utcnow().strftime(
"%Y-%m-%d %H:%M:%S"
),
}
)
logger.debug(
f"Detailed progress for {url}: "
f"{_download_progress[url]['percent']}% at {_download_progress[url]['speed']}, "
f"ETA: {_download_progress[url]['eta']}"
)
except Exception as e:
logger.error(f"Error in detailed progress hook: {str(e)}")
def _progress_hook(self, d):
"""Handle download progress"""
if d["status"] == "finished":
logger.info(f"Download completed: {d['filename']}")
elif d["status"] == "downloading":
try:
percent = float(d.get("_percent_str", "0").replace("%", ""))
speed = d.get("_speed_str", "N/A")
eta = d.get("_eta_str", "N/A")
downloaded = d.get("downloaded_bytes", 0)
total = d.get("total_bytes", 0) or d.get("total_bytes_estimate", 0)
logger.debug(
f"Download progress: {percent}% at {speed}, "
f"ETA: {eta}, Downloaded: {downloaded}/{total} bytes"
)
except Exception as e:
logger.debug(f"Error logging progress: {str(e)}")
def is_supported_url(self, url: str) -> bool:
"""Check if URL is supported by attempting a simulated download"""
if not is_video_url_pattern(url):
return False
try:
simulate_opts = {
**self.ydl_opts,
"simulate": True,
"quiet": True,
"no_warnings": True,
"extract_flat": True,
"skip_download": True,
"format": "best",
}
with yt_dlp.YoutubeDL(simulate_opts) as ydl:
try:
info = ydl.extract_info(url, download=False)
if info is None:
return False
if self.enabled_sites:
extractor = info.get("extractor", "").lower()
if not any(
site.lower() in extractor for site in self.enabled_sites
):
logger.info(f"Site {extractor} not in enabled sites list")
return False
logger.info(
f"URL supported: {url} (Extractor: {info.get('extractor', 'unknown')})"
)
return True
except yt_dlp.utils.UnsupportedError:
return False
except Exception as e:
if "Unsupported URL" not in str(e):
logger.error(f"Error checking URL {url}: {str(e)}")
return False
except Exception as e:
logger.error(f"Error during URL check: {str(e)}")
return False
async def download_video(
self, url: str, progress_callback: Optional[Callable[[float], None]] = None
) -> Tuple[bool, str, str]:
"""Download and process a video with improved error handling"""
if self._shutting_down:
return False, "", "Downloader is shutting down"
# Initialize progress tracking for this URL
from videoarchiver.processor import _download_progress
_download_progress[url] = {
"active": True,
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"speed": "N/A",
"eta": "N/A",
"downloaded_bytes": 0,
"total_bytes": 0,
"retries": 0,
"fragment_count": 0,
"fragment_index": 0,
"video_title": "Unknown",
"extractor": "Unknown",
"format": "Unknown",
"resolution": "Unknown",
"fps": "Unknown",
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
original_file = None
compressed_file = None
temp_dir = None
hardware_accel_failed = False
compression_params = None
try:
with temp_path_context() as temp_dir:
# Download the video
success, file_path, error = await self._safe_download(
url, temp_dir, progress_callback
)
if not success:
return False, "", error
original_file = file_path
async with self._downloads_lock:
self.active_downloads[url] = {
"file_path": original_file,
"start_time": datetime.utcnow(),
}
# Check file size and compress if needed
file_size = os.path.getsize(original_file)
if file_size > (self.max_file_size * 1024 * 1024):
logger.info(f"Compressing video: {original_file}")
try:
# Get optimal compression parameters
compression_params = self.ffmpeg_mgr.get_compression_params(
original_file, self.max_file_size
)
compressed_file = os.path.join(
self.download_path,
f"compressed_{os.path.basename(original_file)}",
)
# Try hardware acceleration first
success = await self._try_compression(
original_file,
compressed_file,
compression_params,
progress_callback,
use_hardware=True,
)
# If hardware acceleration fails, fall back to CPU
if not success:
hardware_accel_failed = True
logger.warning(
"Hardware acceleration failed, falling back to CPU encoding"
)
success = await self._try_compression(
original_file,
compressed_file,
compression_params,
progress_callback,
use_hardware=False,
)
if not success:
raise CompressionError(
"Failed to compress with both hardware and CPU encoding",
file_size,
self.max_file_size * 1024 * 1024,
)
# Verify compressed file
if not self._verify_video_file(compressed_file):
raise VideoVerificationError(
"Compressed file verification failed"
)
compressed_size = os.path.getsize(compressed_file)
if compressed_size <= (self.max_file_size * 1024 * 1024):
await self._safe_delete_file(original_file)
return True, compressed_file, ""
else:
await self._safe_delete_file(compressed_file)
raise CompressionError(
"Failed to compress to target size",
file_size,
self.max_file_size * 1024 * 1024,
)
except Exception as e:
error_msg = str(e)
if hardware_accel_failed:
error_msg = f"Hardware acceleration failed, CPU fallback error: {error_msg}"
if compressed_file and os.path.exists(compressed_file):
await self._safe_delete_file(compressed_file)
return False, "", error_msg
else:
# Move file to final location
final_path = os.path.join(
self.download_path, os.path.basename(original_file)
)
success = await self._safe_move_file(original_file, final_path)
if not success:
return False, "", "Failed to move file to final location"
return True, final_path, ""
except Exception as e:
logger.error(f"Download error: {str(e)}")
return False, "", str(e)
finally:
# Clean up
async with self._downloads_lock:
self.active_downloads.pop(url, None)
if url in _download_progress:
_download_progress[url]["active"] = False
try:
if original_file and os.path.exists(original_file):
await self._safe_delete_file(original_file)
if (
compressed_file
and os.path.exists(compressed_file)
and not compressed_file.startswith(self.download_path)
):
await self._safe_delete_file(compressed_file)
except Exception as e:
logger.error(f"Error during file cleanup: {str(e)}")
async def _try_compression(
self,
input_file: str,
output_file: str,
params: Dict[str, str],
progress_callback: Optional[Callable[[float], None]] = None,
use_hardware: bool = True,
) -> bool:
"""Attempt video compression with given parameters"""
if self._shutting_down:
return False
try:
# Build FFmpeg command
ffmpeg_path = str(self.ffmpeg_mgr.get_ffmpeg_path())
cmd = [ffmpeg_path, "-y", "-i", input_file]
# Add progress monitoring
cmd.extend(["-progress", "pipe:1"])
# Modify parameters based on hardware acceleration preference
if use_hardware:
gpu_info = self.ffmpeg_mgr.gpu_info
if gpu_info["nvidia"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_nvenc"
elif gpu_info["amd"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_amf"
elif gpu_info["intel"] and params.get("c:v") == "libx264":
params["c:v"] = "h264_qsv"
else:
params["c:v"] = "libx264"
# Add all parameters to command
for key, value in params.items():
cmd.extend([f"-{key}", str(value)])
# Add output file
cmd.append(output_file)
# Get video duration for progress calculation
duration = self._get_video_duration(input_file)
# Update compression progress tracking
from videoarchiver.processor import _compression_progress
# Get input file size
input_size = os.path.getsize(input_file)
# Initialize compression progress
_compression_progress[input_file] = {
"active": True,
"filename": os.path.basename(input_file),
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
"percent": 0,
"elapsed_time": "0:00",
"input_size": input_size,
"current_size": 0,
"target_size": self.max_file_size * 1024 * 1024,
"codec": params.get("c:v", "unknown"),
"hardware_accel": use_hardware,
"preset": params.get("preset", "unknown"),
"crf": params.get("crf", "unknown"),
"duration": duration,
"bitrate": params.get("b:v", "unknown"),
"audio_codec": params.get("c:a", "unknown"),
"audio_bitrate": params.get("b:a", "unknown"),
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
}
# Run compression with progress monitoring
process = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
# Track the process
async with self._processes_lock:
self._active_processes.add(process)
start_time = datetime.utcnow()
loop = asyncio.get_running_loop()
try:
while True:
if self._shutting_down:
process.terminate()
return False
line = await process.stdout.readline()
if not line:
break
try:
line = line.decode().strip()
if line.startswith("out_time_ms="):
current_time = (
int(line.split("=")[1]) / 1000000
) # Convert microseconds to seconds
if duration > 0:
progress = min(100, (current_time / duration) * 100)
# Update compression progress
elapsed = datetime.utcnow() - start_time
_compression_progress[input_file].update(
{
"percent": progress,
"elapsed_time": str(elapsed).split(".")[
0
], # Remove microseconds
"current_size": (
os.path.getsize(output_file)
if os.path.exists(output_file)
else 0
),
"current_time": current_time,
"last_update": datetime.utcnow().strftime(
"%Y-%m-%d %H:%M:%S"
),
}
)
if progress_callback:
# Call the callback directly since it now handles task creation
progress_callback(progress)
except Exception as e:
logger.error(f"Error parsing FFmpeg progress: {e}")
await process.wait()
success = os.path.exists(output_file)
# Update final status
if success and input_file in _compression_progress:
_compression_progress[input_file].update(
{
"active": False,
"percent": 100,
"current_size": os.path.getsize(output_file),
"last_update": datetime.utcnow().strftime(
"%Y-%m-%d %H:%M:%S"
),
}
)
return success
finally:
# Remove process from tracking
async with self._processes_lock:
self._active_processes.discard(process)
except subprocess.CalledProcessError as e:
logger.error(f"FFmpeg compression failed: {e.stderr.decode()}")
return False
except Exception as e:
logger.error(f"Compression attempt failed: {str(e)}")
return False
finally:
# Ensure compression progress is marked as inactive
if input_file in _compression_progress:
_compression_progress[input_file]["active"] = False
def _get_video_duration(self, file_path: str) -> float:
"""Get video duration in seconds"""
try:
ffprobe_path = str(self.ffmpeg_mgr.get_ffprobe_path())
cmd = [
ffprobe_path,
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
file_path,
]
result = subprocess.run(cmd, capture_output=True, text=True)
data = json.loads(result.stdout)
return float(data["format"]["duration"])
except Exception as e:
logger.error(f"Error getting video duration: {e}")
return 0
def _check_file_size(self, info):
"""Check if file size is within limits"""
if info.get("filepath") and os.path.exists(info["filepath"]):
try:
size = os.path.getsize(info["filepath"])
if size > (self.max_file_size * 1024 * 1024):
logger.info(
f"File exceeds size limit, will compress: {info['filepath']}"
)
except OSError as e:
logger.error(f"Error checking file size: {str(e)}")
def _verify_video_file(self, file_path: str) -> bool:
"""Verify video file integrity"""
try:
ffprobe_path = str(self.ffmpeg_mgr.get_ffprobe_path())
cmd = [
ffprobe_path,
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
file_path,
]
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=30,
)
if result.returncode != 0:
raise VideoVerificationError(f"FFprobe failed: {result.stderr}")
probe = json.loads(result.stdout)
# Verify video stream
video_streams = [s for s in probe["streams"] if s["codec_type"] == "video"]
if not video_streams:
raise VideoVerificationError("No video streams found")
# Verify duration
duration = float(probe["format"].get("duration", 0))
if duration <= 0:
raise VideoVerificationError("Invalid video duration")
# Verify file is readable
with open(file_path, "rb") as f:
f.seek(0, 2)
if f.tell() == 0:
raise VideoVerificationError("Empty file")
return True
except Exception as e:
logger.error(f"Error verifying video file {file_path}: {e}")
return False
async def _safe_download(
self,
url: str,
temp_dir: str,
progress_callback: Optional[Callable[[float], None]] = None,
) -> Tuple[bool, str, str]:
"""Safely download video with retries"""
if self._shutting_down:
return False, "", "Downloader is shutting down"
last_error = None
for attempt in range(self.MAX_RETRIES):
try:
ydl_opts = self.ydl_opts.copy()
ydl_opts["outtmpl"] = os.path.join(temp_dir, ydl_opts["outtmpl"])
# Add progress callback
if progress_callback:
original_progress_hook = ydl_opts["progress_hooks"][0]
def combined_progress_hook(d):
original_progress_hook(d)
if d["status"] == "downloading":
try:
percent = float(
d.get("_percent_str", "0").replace("%", "")
)
# Call the callback directly since it now handles task creation
progress_callback(percent)
except Exception as e:
logger.error(f"Error in progress callback: {e}")
ydl_opts["progress_hooks"] = [combined_progress_hook]
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = await asyncio.get_event_loop().run_in_executor(
self.download_pool, lambda: ydl.extract_info(url, download=True)
)
if info is None:
raise Exception("Failed to extract video information")
file_path = os.path.join(temp_dir, ydl.prepare_filename(info))
if not os.path.exists(file_path):
raise FileNotFoundError("Download completed but file not found")
if not self._verify_video_file(file_path):
raise VideoVerificationError("Downloaded file is not a valid video")
return True, file_path, ""
except Exception as e:
last_error = str(e)
logger.error(f"Download attempt {attempt + 1} failed: {str(e)}")
if attempt < self.MAX_RETRIES - 1:
# Exponential backoff with jitter
delay = self.RETRY_DELAY * (2**attempt) + (attempt * 2)
await asyncio.sleep(delay)
else:
return False, "", f"All download attempts failed: {last_error}"
async def _safe_delete_file(self, file_path: str) -> bool:
"""Safely delete a file with retries"""
for attempt in range(self.FILE_OP_RETRIES):
try:
if await secure_delete_file(file_path):
return True
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
except Exception as e:
logger.error(f"Delete attempt {attempt + 1} failed: {str(e)}")
if attempt == self.FILE_OP_RETRIES - 1:
return False
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
return False
async def _safe_move_file(self, src: str, dst: str) -> bool:
"""Safely move a file with retries"""
for attempt in range(self.FILE_OP_RETRIES):
try:
os.makedirs(os.path.dirname(dst), exist_ok=True)
shutil.move(src, dst)
return True
except Exception as e:
logger.error(f"Move attempt {attempt + 1} failed: {str(e)}")
if attempt == self.FILE_OP_RETRIES - 1:
return False
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
return False