mirror of
https://github.com/pacnpal/Pac-cogs.git
synced 2025-12-20 02:41:06 -05:00
fixed
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
"""Utility functions and classes for VideoArchiver"""
|
||||
|
||||
from typing import Dict, Optional, Any, Union, List
|
||||
|
||||
from .file_ops import (
|
||||
cleanup_downloads,
|
||||
ensure_directory,
|
||||
@@ -12,16 +14,65 @@ from .directory_manager import DirectoryManager
|
||||
from .permission_manager import PermissionManager
|
||||
from .download_manager import DownloadManager
|
||||
from .compression_manager import CompressionManager
|
||||
from .progress_tracker import ProgressTracker
|
||||
from .progress_tracker import (
|
||||
ProgressTracker,
|
||||
ProgressStatus,
|
||||
DownloadProgress,
|
||||
CompressionProgress,
|
||||
CompressionParams
|
||||
)
|
||||
from .path_manager import PathManager
|
||||
from .exceptions import (
|
||||
# Base exception
|
||||
VideoArchiverError,
|
||||
ErrorSeverity,
|
||||
ErrorContext,
|
||||
|
||||
# File operations
|
||||
FileOperationError,
|
||||
DirectoryError,
|
||||
PermissionError,
|
||||
DownloadError,
|
||||
CompressionError,
|
||||
TrackingError,
|
||||
PathError
|
||||
FileCleanupError,
|
||||
|
||||
# Video operations
|
||||
VideoDownloadError,
|
||||
VideoProcessingError,
|
||||
VideoVerificationError,
|
||||
VideoUploadError,
|
||||
VideoCleanupError,
|
||||
|
||||
# Resource management
|
||||
ResourceError,
|
||||
ResourceExhaustedError,
|
||||
|
||||
# Network and API
|
||||
NetworkError,
|
||||
DiscordAPIError,
|
||||
|
||||
# Component operations
|
||||
ComponentError,
|
||||
ConfigurationError,
|
||||
DatabaseError,
|
||||
FFmpegError,
|
||||
|
||||
# Queue operations
|
||||
QueueError,
|
||||
QueueHandlerError,
|
||||
QueueProcessorError,
|
||||
|
||||
# Processing operations
|
||||
ProcessingError,
|
||||
ProcessorError,
|
||||
ValidationError,
|
||||
DisplayError,
|
||||
URLExtractionError,
|
||||
MessageHandlerError,
|
||||
|
||||
# Cleanup operations
|
||||
CleanupError,
|
||||
|
||||
# Health monitoring
|
||||
HealthCheckError
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
@@ -41,16 +92,75 @@ __all__ = [
|
||||
'ProgressTracker',
|
||||
'PathManager',
|
||||
|
||||
# Exceptions
|
||||
# Progress Tracking Types
|
||||
'ProgressStatus',
|
||||
'DownloadProgress',
|
||||
'CompressionProgress',
|
||||
'CompressionParams',
|
||||
|
||||
# Base Exceptions
|
||||
'VideoArchiverError',
|
||||
'ErrorSeverity',
|
||||
'ErrorContext',
|
||||
|
||||
# File Operation Exceptions
|
||||
'FileOperationError',
|
||||
'DirectoryError',
|
||||
'PermissionError',
|
||||
'DownloadError',
|
||||
'CompressionError',
|
||||
'TrackingError',
|
||||
'PathError'
|
||||
'FileCleanupError',
|
||||
|
||||
# Video Operation Exceptions
|
||||
'VideoDownloadError',
|
||||
'VideoProcessingError',
|
||||
'VideoVerificationError',
|
||||
'VideoUploadError',
|
||||
'VideoCleanupError',
|
||||
|
||||
# Resource Exceptions
|
||||
'ResourceError',
|
||||
'ResourceExhaustedError',
|
||||
|
||||
# Network and API Exceptions
|
||||
'NetworkError',
|
||||
'DiscordAPIError',
|
||||
|
||||
# Component Exceptions
|
||||
'ComponentError',
|
||||
'ConfigurationError',
|
||||
'DatabaseError',
|
||||
'FFmpegError',
|
||||
|
||||
# Queue Exceptions
|
||||
'QueueError',
|
||||
'QueueHandlerError',
|
||||
'QueueProcessorError',
|
||||
|
||||
# Processing Exceptions
|
||||
'ProcessingError',
|
||||
'ProcessorError',
|
||||
'ValidationError',
|
||||
'DisplayError',
|
||||
'URLExtractionError',
|
||||
'MessageHandlerError',
|
||||
|
||||
# Cleanup Exceptions
|
||||
'CleanupError',
|
||||
|
||||
# Health Monitoring Exceptions
|
||||
'HealthCheckError',
|
||||
|
||||
# Helper Functions
|
||||
'get_download_progress',
|
||||
'get_compression_progress',
|
||||
'get_active_downloads',
|
||||
'get_active_compressions'
|
||||
]
|
||||
|
||||
# Version information
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "VideoArchiver Team"
|
||||
__description__ = "Utility functions and classes for VideoArchiver"
|
||||
|
||||
# Initialize shared instances for module-level access
|
||||
directory_manager = DirectoryManager()
|
||||
permission_manager = PermissionManager()
|
||||
@@ -58,3 +168,93 @@ download_manager = DownloadManager()
|
||||
compression_manager = CompressionManager()
|
||||
progress_tracker = ProgressTracker()
|
||||
path_manager = PathManager()
|
||||
|
||||
# Progress tracking helper functions
|
||||
def get_download_progress(url: Optional[str] = None) -> Union[Dict[str, DownloadProgress], Optional[DownloadProgress]]:
|
||||
"""
|
||||
Get progress information for a download.
|
||||
|
||||
Args:
|
||||
url: Optional URL to get progress for. If None, returns all progress.
|
||||
|
||||
Returns:
|
||||
If url is provided, returns progress for that URL or None if not found.
|
||||
If url is None, returns dictionary of all download progress.
|
||||
|
||||
Raises:
|
||||
TrackingError: If there's an error getting progress information
|
||||
"""
|
||||
try:
|
||||
return progress_tracker.get_download_progress(url)
|
||||
except Exception as e:
|
||||
raise TrackingError(f"Failed to get download progress: {str(e)}")
|
||||
|
||||
def get_compression_progress(input_file: Optional[str] = None) -> Union[Dict[str, CompressionProgress], Optional[CompressionProgress]]:
|
||||
"""
|
||||
Get progress information for a compression operation.
|
||||
|
||||
Args:
|
||||
input_file: Optional file to get progress for. If None, returns all progress.
|
||||
|
||||
Returns:
|
||||
If input_file is provided, returns progress for that file or None if not found.
|
||||
If input_file is None, returns dictionary of all compression progress.
|
||||
|
||||
Raises:
|
||||
TrackingError: If there's an error getting progress information
|
||||
"""
|
||||
try:
|
||||
return progress_tracker.get_compression_progress(input_file)
|
||||
except Exception as e:
|
||||
raise TrackingError(f"Failed to get compression progress: {str(e)}")
|
||||
|
||||
def get_active_downloads() -> Dict[str, DownloadProgress]:
|
||||
"""
|
||||
Get all active downloads.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping URLs to their download progress information
|
||||
|
||||
Raises:
|
||||
TrackingError: If there's an error getting active downloads
|
||||
"""
|
||||
try:
|
||||
return progress_tracker.get_active_downloads()
|
||||
except Exception as e:
|
||||
raise TrackingError(f"Failed to get active downloads: {str(e)}")
|
||||
|
||||
def get_active_compressions() -> Dict[str, CompressionProgress]:
|
||||
"""
|
||||
Get all active compression operations.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping file paths to their compression progress information
|
||||
|
||||
Raises:
|
||||
TrackingError: If there's an error getting active compressions
|
||||
"""
|
||||
try:
|
||||
return progress_tracker.get_active_compressions()
|
||||
except Exception as e:
|
||||
raise TrackingError(f"Failed to get active compressions: {str(e)}")
|
||||
|
||||
# Error handling helper functions
|
||||
def create_error_context(
|
||||
component: str,
|
||||
operation: str,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
severity: ErrorSeverity = ErrorSeverity.MEDIUM
|
||||
) -> ErrorContext:
|
||||
"""
|
||||
Create an error context object.
|
||||
|
||||
Args:
|
||||
component: Component where error occurred
|
||||
operation: Operation that failed
|
||||
details: Optional error details
|
||||
severity: Error severity level
|
||||
|
||||
Returns:
|
||||
ErrorContext object
|
||||
"""
|
||||
return ErrorContext(component, operation, details, severity)
|
||||
|
||||
210
videoarchiver/utils/compression_handler.py
Normal file
210
videoarchiver/utils/compression_handler.py
Normal file
@@ -0,0 +1,210 @@
|
||||
"""Video compression handling utilities"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional, Callable, Set, Tuple
|
||||
|
||||
from videoarchiver.ffmpeg.ffmpeg_manager import FFmpegManager
|
||||
from videoarchiver.ffmpeg.exceptions import CompressionError
|
||||
from videoarchiver.utils.exceptions import VideoVerificationError
|
||||
from videoarchiver.utils.file_operations import FileOperations
|
||||
from videoarchiver.utils.progress_handler import ProgressHandler
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class CompressionHandler:
|
||||
"""Handles video compression operations"""
|
||||
|
||||
def __init__(self, ffmpeg_mgr: FFmpegManager, progress_handler: ProgressHandler,
|
||||
file_ops: FileOperations):
|
||||
self.ffmpeg_mgr = ffmpeg_mgr
|
||||
self.progress_handler = progress_handler
|
||||
self.file_ops = file_ops
|
||||
self._active_processes: Set[subprocess.Popen] = set()
|
||||
self._processes_lock = asyncio.Lock()
|
||||
self._shutting_down = False
|
||||
self.max_file_size = 0 # Will be set during compression
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
"""Clean up compression resources"""
|
||||
self._shutting_down = True
|
||||
try:
|
||||
async with self._processes_lock:
|
||||
for process in self._active_processes:
|
||||
try:
|
||||
process.terminate()
|
||||
await asyncio.sleep(0.1)
|
||||
if process.poll() is None:
|
||||
process.kill()
|
||||
except Exception as e:
|
||||
logger.error(f"Error killing compression process: {e}")
|
||||
self._active_processes.clear()
|
||||
finally:
|
||||
self._shutting_down = False
|
||||
|
||||
async def compress_video(
|
||||
self,
|
||||
input_file: str,
|
||||
output_file: str,
|
||||
max_size_mb: int,
|
||||
progress_callback: Optional[Callable[[float], None]] = None
|
||||
) -> Tuple[bool, str]:
|
||||
"""Compress video to target size"""
|
||||
if self._shutting_down:
|
||||
return False, "Compression handler is shutting down"
|
||||
|
||||
self.max_file_size = max_size_mb
|
||||
|
||||
try:
|
||||
# Get optimal compression parameters
|
||||
compression_params = self.ffmpeg_mgr.get_compression_params(
|
||||
input_file, max_size_mb
|
||||
)
|
||||
|
||||
# Try hardware acceleration first
|
||||
success = await self._try_compression(
|
||||
input_file,
|
||||
output_file,
|
||||
compression_params,
|
||||
progress_callback,
|
||||
use_hardware=True
|
||||
)
|
||||
|
||||
# Fall back to CPU if hardware acceleration fails
|
||||
if not success:
|
||||
logger.warning("Hardware acceleration failed, falling back to CPU encoding")
|
||||
success = await self._try_compression(
|
||||
input_file,
|
||||
output_file,
|
||||
compression_params,
|
||||
progress_callback,
|
||||
use_hardware=False
|
||||
)
|
||||
|
||||
if not success:
|
||||
return False, "Failed to compress with both hardware and CPU encoding"
|
||||
|
||||
# Verify compressed file
|
||||
if not self.file_ops.verify_video_file(output_file, str(self.ffmpeg_mgr.get_ffprobe_path())):
|
||||
return False, "Compressed file verification failed"
|
||||
|
||||
# Check final size
|
||||
within_limit, final_size = self.file_ops.check_file_size(output_file, max_size_mb)
|
||||
if not within_limit:
|
||||
return False, f"Failed to compress to target size: {final_size} bytes"
|
||||
|
||||
return True, ""
|
||||
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
async def _try_compression(
|
||||
self,
|
||||
input_file: str,
|
||||
output_file: str,
|
||||
params: Dict[str, str],
|
||||
progress_callback: Optional[Callable[[float], None]] = None,
|
||||
use_hardware: bool = True,
|
||||
) -> bool:
|
||||
"""Attempt video compression with given parameters"""
|
||||
if self._shutting_down:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Build FFmpeg command
|
||||
ffmpeg_path = str(self.ffmpeg_mgr.get_ffmpeg_path())
|
||||
cmd = [ffmpeg_path, "-y", "-i", input_file]
|
||||
|
||||
# Add progress monitoring
|
||||
cmd.extend(["-progress", "pipe:1"])
|
||||
|
||||
# Modify parameters based on hardware acceleration preference
|
||||
if use_hardware:
|
||||
gpu_info = self.ffmpeg_mgr.gpu_info
|
||||
if gpu_info["nvidia"] and params.get("c:v") == "libx264":
|
||||
params["c:v"] = "h264_nvenc"
|
||||
elif gpu_info["amd"] and params.get("c:v") == "libx264":
|
||||
params["c:v"] = "h264_amf"
|
||||
elif gpu_info["intel"] and params.get("c:v") == "libx264":
|
||||
params["c:v"] = "h264_qsv"
|
||||
else:
|
||||
params["c:v"] = "libx264"
|
||||
|
||||
# Add all parameters to command
|
||||
for key, value in params.items():
|
||||
cmd.extend([f"-{key}", str(value)])
|
||||
|
||||
# Add output file
|
||||
cmd.append(output_file)
|
||||
|
||||
# Get video duration for progress calculation
|
||||
duration = self.file_ops.get_video_duration(input_file, str(self.ffmpeg_mgr.get_ffprobe_path()))
|
||||
|
||||
# Initialize compression progress
|
||||
self.progress_handler.update(input_file, {
|
||||
"active": True,
|
||||
"filename": os.path.basename(input_file),
|
||||
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"percent": 0,
|
||||
"elapsed_time": "0:00",
|
||||
"input_size": os.path.getsize(input_file),
|
||||
"current_size": 0,
|
||||
"target_size": self.max_file_size * 1024 * 1024,
|
||||
"codec": params.get("c:v", "unknown"),
|
||||
"hardware_accel": use_hardware,
|
||||
"preset": params.get("preset", "unknown"),
|
||||
"crf": params.get("crf", "unknown"),
|
||||
"duration": duration,
|
||||
"bitrate": params.get("b:v", "unknown"),
|
||||
"audio_codec": params.get("c:a", "unknown"),
|
||||
"audio_bitrate": params.get("b:a", "unknown"),
|
||||
})
|
||||
|
||||
# Run compression with progress monitoring
|
||||
try:
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
# Track the process
|
||||
async with self._processes_lock:
|
||||
self._active_processes.add(process)
|
||||
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
while True:
|
||||
if self._shutting_down:
|
||||
process.terminate()
|
||||
return False
|
||||
|
||||
line = await process.stdout.readline()
|
||||
if not line:
|
||||
break
|
||||
|
||||
try:
|
||||
line = line.decode().strip()
|
||||
if line.startswith("out_time_ms="):
|
||||
current_time = int(line.split("=")[1]) / 1000000
|
||||
self.progress_handler.handle_compression_progress(
|
||||
input_file, current_time, duration,
|
||||
output_file, start_time, progress_callback
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing FFmpeg progress: {e}")
|
||||
|
||||
await process.wait()
|
||||
return os.path.exists(output_file)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during compression process: {e}")
|
||||
return False
|
||||
finally:
|
||||
# Remove process from tracking
|
||||
async with self._processes_lock:
|
||||
self._active_processes.discard(process)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Compression attempt failed: {str
|
||||
271
videoarchiver/utils/download_core.py
Normal file
271
videoarchiver/utils/download_core.py
Normal file
@@ -0,0 +1,271 @@
|
||||
"""Core download functionality for video archiver"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
import yt_dlp
|
||||
from typing import Dict, Optional, Callable, Tuple
|
||||
from pathlib import Path
|
||||
|
||||
from videoarchiver.utils.url_validator import check_url_support
|
||||
from videoarchiver.utils.progress_handler import ProgressHandler, CancellableYTDLLogger
|
||||
from videoarchiver.utils.file_operations import FileOperations
|
||||
from videoarchiver.utils.compression_handler import CompressionHandler
|
||||
from videoarchiver.utils.process_manager import ProcessManager
|
||||
from videoarchiver.ffmpeg.ffmpeg_manager import FFmpegManager
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class DownloadCore:
|
||||
"""Core download functionality for video archiver"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
download_path: str,
|
||||
video_format: str,
|
||||
max_quality: int,
|
||||
max_file_size: int,
|
||||
enabled_sites: Optional[list[str]] = None,
|
||||
concurrent_downloads: int = 2,
|
||||
ffmpeg_mgr: Optional[FFmpegManager] = None,
|
||||
):
|
||||
self.download_path = Path(download_path)
|
||||
self.download_path.mkdir(parents=True, exist_ok=True)
|
||||
os.chmod(str(self.download_path), 0o755)
|
||||
|
||||
self.video_format = video_format
|
||||
self.max_quality = max_quality
|
||||
self.max_file_size = max_file_size
|
||||
self.enabled_sites = enabled_sites
|
||||
self.ffmpeg_mgr = ffmpeg_mgr or FFmpegManager()
|
||||
|
||||
# Initialize components
|
||||
self.process_manager = ProcessManager(concurrent_downloads)
|
||||
self.progress_handler = ProgressHandler()
|
||||
self.file_ops = FileOperations()
|
||||
self.compression_handler = CompressionHandler(
|
||||
self.ffmpeg_mgr, self.progress_handler, self.file_ops
|
||||
)
|
||||
|
||||
# Create cancellable logger
|
||||
self.ytdl_logger = CancellableYTDLLogger()
|
||||
|
||||
# Configure yt-dlp options
|
||||
self.ydl_opts = self._configure_ydl_options()
|
||||
|
||||
def _configure_ydl_options(self) -> Dict:
|
||||
"""Configure yt-dlp options"""
|
||||
return {
|
||||
"format": f"bv*[height<={self.max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={self.max_quality}]/best",
|
||||
"outtmpl": "%(title)s.%(ext)s",
|
||||
"merge_output_format": self.video_format,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"extract_flat": True,
|
||||
"concurrent_fragment_downloads": 1,
|
||||
"retries": 5,
|
||||
"fragment_retries": 5,
|
||||
"file_access_retries": 3,
|
||||
"extractor_retries": 5,
|
||||
"postprocessor_hooks": [self._check_file_size],
|
||||
"progress_hooks": [self._handle_progress],
|
||||
"ffmpeg_location": str(self.ffmpeg_mgr.get_ffmpeg_path()),
|
||||
"ffprobe_location": str(self.ffmpeg_mgr.get_ffprobe_path()),
|
||||
"paths": {"home": str(self.download_path)},
|
||||
"logger": self.ytdl_logger,
|
||||
"ignoreerrors": True,
|
||||
"no_color": True,
|
||||
"geo_bypass": True,
|
||||
"socket_timeout": 60,
|
||||
"http_chunk_size": 1048576,
|
||||
"external_downloader_args": {"ffmpeg": ["-timeout", "60000000"]},
|
||||
"max_sleep_interval": 5,
|
||||
"sleep_interval": 1,
|
||||
"max_filesize": self.max_file_size * 1024 * 1024,
|
||||
}
|
||||
|
||||
def _check_file_size(self, info: Dict) -> None:
|
||||
"""Check if file size is within limits"""
|
||||
if info.get("filepath") and os.path.exists(info["filepath"]):
|
||||
try:
|
||||
size = os.path.getsize(info["filepath"])
|
||||
if size > (self.max_file_size * 1024 * 1024):
|
||||
logger.info(
|
||||
f"File exceeds size limit, will compress: {info['filepath']}"
|
||||
)
|
||||
except OSError as e:
|
||||
logger.error(f"Error checking file size: {str(e)}")
|
||||
|
||||
def _handle_progress(self, d: Dict) -> None:
|
||||
"""Handle download progress updates"""
|
||||
url = d.get("info_dict", {}).get("webpage_url", "unknown")
|
||||
self.progress_handler.handle_download_progress(d, url)
|
||||
|
||||
def is_supported_url(self, url: str) -> bool:
|
||||
"""Check if URL is supported"""
|
||||
return check_url_support(url, self.ydl_opts, self.enabled_sites)
|
||||
|
||||
async def download_video(
|
||||
self, url: str, progress_callback: Optional[Callable[[float], None]] = None
|
||||
) -> Tuple[bool, str, str]:
|
||||
"""Download and process a video"""
|
||||
if self.process_manager.is_shutting_down:
|
||||
return False, "", "Download manager is shutting down"
|
||||
|
||||
# Initialize progress tracking
|
||||
self.progress_handler.initialize_progress(url)
|
||||
original_file = None
|
||||
compressed_file = None
|
||||
|
||||
try:
|
||||
# Download the video
|
||||
success, file_path, error = await self._safe_download(
|
||||
url, str(self.download_path), progress_callback
|
||||
)
|
||||
if not success:
|
||||
return False, "", error
|
||||
|
||||
original_file = file_path
|
||||
await self.process_manager.track_download(url, original_file)
|
||||
|
||||
# Check file size and compress if needed
|
||||
within_limit, file_size = self.file_ops.check_file_size(original_file, self.max_file_size)
|
||||
if not within_limit:
|
||||
logger.info(f"Compressing video: {original_file}")
|
||||
try:
|
||||
compressed_file = os.path.join(
|
||||
self.download_path,
|
||||
f"compressed_{os.path.basename(original_file)}",
|
||||
)
|
||||
|
||||
# Attempt compression
|
||||
success, error = await self.compression_handler.compress_video(
|
||||
original_file,
|
||||
compressed_file,
|
||||
self.max_file_size,
|
||||
progress_callback
|
||||
)
|
||||
|
||||
if not success:
|
||||
await self._cleanup_files(original_file, compressed_file)
|
||||
return False, "", error
|
||||
|
||||
# Verify compressed file
|
||||
if not self.file_ops.verify_video_file(
|
||||
compressed_file,
|
||||
str(self.ffmpeg_mgr.get_ffprobe_path())
|
||||
):
|
||||
await self._cleanup_files(original_file, compressed_file)
|
||||
return False, "", "Compressed file verification failed"
|
||||
|
||||
# Delete original and return compressed
|
||||
await self.file_ops.safe_delete_file(original_file)
|
||||
return True, compressed_file, ""
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Compression failed: {str(e)}"
|
||||
await self._cleanup_files(original_file, compressed_file)
|
||||
return False, "", error_msg
|
||||
else:
|
||||
# Move file to final location if no compression needed
|
||||
final_path = os.path.join(
|
||||
self.download_path,
|
||||
os.path.basename(original_file)
|
||||
)
|
||||
success = await self.file_ops.safe_move_file(original_file, final_path)
|
||||
if not success:
|
||||
await self._cleanup_files(original_file)
|
||||
return False, "", "Failed to move file to final location"
|
||||
return True, final_path, ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Download error: {str(e)}")
|
||||
await self._cleanup_files(original_file, compressed_file)
|
||||
return False, "", str(e)
|
||||
|
||||
finally:
|
||||
# Clean up tracking
|
||||
await self.process_manager.untrack_download(url)
|
||||
self.progress_handler.complete(url)
|
||||
|
||||
async def _safe_download(
|
||||
self,
|
||||
url: str,
|
||||
output_dir: str,
|
||||
progress_callback: Optional[Callable[[float], None]] = None,
|
||||
) -> Tuple[bool, str, str]:
|
||||
"""Safely download video with retries"""
|
||||
if self.process_manager.is_shutting_down:
|
||||
return False, "", "Download manager is shutting down"
|
||||
|
||||
last_error = None
|
||||
for attempt in range(5): # Max retries
|
||||
try:
|
||||
ydl_opts = self.ydl_opts.copy()
|
||||
ydl_opts["outtmpl"] = os.path.join(output_dir, ydl_opts["outtmpl"])
|
||||
|
||||
# Add progress callback
|
||||
if progress_callback:
|
||||
original_progress_hook = ydl_opts["progress_hooks"][0]
|
||||
|
||||
def combined_progress_hook(d):
|
||||
original_progress_hook(d)
|
||||
if d["status"] == "downloading":
|
||||
try:
|
||||
percent = float(
|
||||
d.get("_percent_str", "0").replace("%", "")
|
||||
)
|
||||
progress_callback(percent)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in progress callback: {e}")
|
||||
|
||||
ydl_opts["progress_hooks"] = [combined_progress_hook]
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = await asyncio.get_event_loop().run_in_executor(
|
||||
self.process_manager.download_pool,
|
||||
lambda: ydl.extract_info(url, download=True)
|
||||
)
|
||||
|
||||
if info is None:
|
||||
raise Exception("Failed to extract video information")
|
||||
|
||||
file_path = os.path.join(output_dir, ydl.prepare_filename(info))
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError("Download completed but file not found")
|
||||
|
||||
if not self.file_ops.verify_video_file(
|
||||
file_path,
|
||||
str(self.ffmpeg_mgr.get_ffprobe_path())
|
||||
):
|
||||
raise Exception("Downloaded file is not a valid video")
|
||||
|
||||
return True, file_path, ""
|
||||
|
||||
except Exception as e:
|
||||
last_error = str(e)
|
||||
logger.error(f"Download attempt {attempt + 1} failed: {str(e)}")
|
||||
if attempt < 4: # Less than max retries
|
||||
delay = 10 * (2**attempt) + (attempt * 2) # Exponential backoff
|
||||
await asyncio.sleep(delay)
|
||||
else:
|
||||
return False, "", f"All download attempts failed: {last_error}"
|
||||
|
||||
async def _cleanup_files(self, *files: str) -> None:
|
||||
"""Clean up multiple files"""
|
||||
for file in files:
|
||||
if file and os.path.exists(file):
|
||||
await self.file_ops.safe_delete_file(file)
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
"""Clean up resources"""
|
||||
await self.process_manager.cleanup()
|
||||
await self.compression_handler.cleanup()
|
||||
|
||||
async def force_cleanup(self) -> None:
|
||||
"""Force cleanup of all resources"""
|
||||
self.ytdl_logger.cancelled = True
|
||||
await self.process_m
|
||||
self.ytdl_logger.cancelled = True
|
||||
await self.process_manager.force_cleanup()
|
||||
await self.compress
|
||||
@@ -1,8 +1,44 @@
|
||||
"""Custom exceptions for VideoArchiver"""
|
||||
|
||||
from typing import Optional, Dict, Any
|
||||
from enum import Enum, auto
|
||||
|
||||
class ErrorSeverity(Enum):
|
||||
"""Severity levels for errors"""
|
||||
LOW = auto()
|
||||
MEDIUM = auto()
|
||||
HIGH = auto()
|
||||
CRITICAL = auto()
|
||||
|
||||
class ErrorContext:
|
||||
"""Context information for errors"""
|
||||
def __init__(
|
||||
self,
|
||||
component: str,
|
||||
operation: str,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
severity: ErrorSeverity = ErrorSeverity.MEDIUM
|
||||
) -> None:
|
||||
self.component = component
|
||||
self.operation = operation
|
||||
self.details = details or {}
|
||||
self.severity = severity
|
||||
|
||||
def __str__(self) -> str:
|
||||
return (
|
||||
f"[{self.severity.name}] {self.component}.{self.operation}: "
|
||||
f"{', '.join(f'{k}={v}' for k, v in self.details.items())}"
|
||||
)
|
||||
|
||||
class VideoArchiverError(Exception):
|
||||
"""Base exception for VideoArchiver errors"""
|
||||
pass
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.context = context
|
||||
super().__init__(f"{context}: {message}" if context else message)
|
||||
|
||||
class VideoDownloadError(VideoArchiverError):
|
||||
"""Error downloading video"""
|
||||
@@ -38,7 +74,17 @@ class PermissionError(VideoArchiverError):
|
||||
|
||||
class NetworkError(VideoArchiverError):
|
||||
"""Error with network operations"""
|
||||
pass
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
url: Optional[str] = None,
|
||||
status_code: Optional[int] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.url = url
|
||||
self.status_code = status_code
|
||||
details = f" (URL: {url}" + (f", Status: {status_code})" if status_code else ")")
|
||||
super().__init__(message + details, context)
|
||||
|
||||
class ResourceError(VideoArchiverError):
|
||||
"""Error with system resources"""
|
||||
@@ -54,15 +100,27 @@ class ComponentError(VideoArchiverError):
|
||||
|
||||
class DiscordAPIError(VideoArchiverError):
|
||||
"""Error with Discord API operations"""
|
||||
def __init__(self, message: str, status_code: int = None):
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
status_code: Optional[int] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.status_code = status_code
|
||||
super().__init__(f"Discord API Error: {message} (Status: {status_code if status_code else 'Unknown'})")
|
||||
details = f" (Status: {status_code})" if status_code else ""
|
||||
super().__init__(f"Discord API Error: {message}{details}", context)
|
||||
|
||||
class ResourceExhaustedError(VideoArchiverError):
|
||||
"""Error when system resources are exhausted"""
|
||||
def __init__(self, message: str, resource_type: str = None):
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
resource_type: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.resource_type = resource_type
|
||||
super().__init__(f"Resource exhausted: {message} (Type: {resource_type if resource_type else 'Unknown'})")
|
||||
details = f" (Type: {resource_type})" if resource_type else ""
|
||||
super().__init__(f"Resource exhausted: {message}{details}", context)
|
||||
|
||||
class ProcessingError(VideoArchiverError):
|
||||
"""Error during video processing"""
|
||||
@@ -74,4 +132,126 @@ class CleanupError(VideoArchiverError):
|
||||
|
||||
class FileOperationError(VideoArchiverError):
|
||||
"""Error during file operations"""
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
path: Optional[str] = None,
|
||||
operation: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.path = path
|
||||
self.operation = operation
|
||||
details = []
|
||||
if path:
|
||||
details.append(f"Path: {path}")
|
||||
if operation:
|
||||
details.append(f"Operation: {operation}")
|
||||
details_str = f" ({', '.join(details)})" if details else ""
|
||||
super().__init__(f"File operation error: {message}{details_str}", context)
|
||||
|
||||
# New exceptions for processor components
|
||||
class ProcessorError(VideoArchiverError):
|
||||
"""Error in video processor operations"""
|
||||
pass
|
||||
|
||||
class ValidationError(VideoArchiverError):
|
||||
"""Error in message or content validation"""
|
||||
pass
|
||||
|
||||
class DisplayError(VideoArchiverError):
|
||||
"""Error in status display operations"""
|
||||
pass
|
||||
|
||||
class URLExtractionError(VideoArchiverError):
|
||||
"""Error extracting URLs from content"""
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
url: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.url = url
|
||||
details = f" (URL: {url})" if url else ""
|
||||
super().__init__(f"URL extraction error: {message}{details}", context)
|
||||
|
||||
class MessageHandlerError(VideoArchiverError):
|
||||
"""Error in message handling operations"""
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
message_id: Optional[int] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.message_id = message_id
|
||||
details = f" (Message ID: {message_id})" if message_id else ""
|
||||
super().__init__(f"Message handler error: {message}{details}", context)
|
||||
|
||||
class QueueHandlerError(VideoArchiverError):
|
||||
"""Error in queue handling operations"""
|
||||
pass
|
||||
|
||||
class QueueProcessorError(VideoArchiverError):
|
||||
"""Error in queue processing operations"""
|
||||
pass
|
||||
|
||||
class FFmpegError(VideoArchiverError):
|
||||
"""Error in FFmpeg operations"""
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
command: Optional[str] = None,
|
||||
exit_code: Optional[int] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.command = command
|
||||
self.exit_code = exit_code
|
||||
details = []
|
||||
if command:
|
||||
details.append(f"Command: {command}")
|
||||
if exit_code is not None:
|
||||
details.append(f"Exit Code: {exit_code}")
|
||||
details_str = f" ({', '.join(details)})" if details else ""
|
||||
super().__init__(f"FFmpeg error: {message}{details_str}", context)
|
||||
|
||||
class DatabaseError(VideoArchiverError):
|
||||
"""Error in database operations"""
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
query: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.query = query
|
||||
details = f" (Query: {query})" if query else ""
|
||||
super().__init__(f"Database error: {message}{details}", context)
|
||||
|
||||
class HealthCheckError(VideoArchiverError):
|
||||
"""Error in health check operations"""
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
component: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.component = component
|
||||
details = f" (Component: {component})" if component else ""
|
||||
super().__init__(f"Health check error: {message}{details}", context)
|
||||
|
||||
class TrackingError(VideoArchiverError):
|
||||
"""Error in progress tracking operations"""
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
operation: Optional[str] = None,
|
||||
item_id: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> None:
|
||||
self.operation = operation
|
||||
self.item_id = item_id
|
||||
details = []
|
||||
if operation:
|
||||
details.append(f"Operation: {operation}")
|
||||
if item_id:
|
||||
details.append(f"Item ID: {item_id}")
|
||||
details_str = f" ({', '.join(details)})" if details else ""
|
||||
super().__init__(f"Progress tracking error: {message}{details_str}", context)
|
||||
|
||||
138
videoarchiver/utils/file_operations.py
Normal file
138
videoarchiver/utils/file_operations.py
Normal file
@@ -0,0 +1,138 @@
|
||||
"""Safe file operations with retry logic"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import asyncio
|
||||
import logging
|
||||
import json
|
||||
import subprocess
|
||||
from typing import Tuple
|
||||
from pathlib import Path
|
||||
|
||||
from videoarchiver.utils.exceptions import VideoVerificationError
|
||||
from videoarchiver.utils.file_deletion import secure_delete_file
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class FileOperations:
|
||||
"""Handles safe file operations with retries"""
|
||||
|
||||
def __init__(self, max_retries: int = 3, retry_delay: int = 1):
|
||||
self.max_retries = max_retries
|
||||
self.retry_delay = retry_delay
|
||||
|
||||
async def safe_delete_file(self, file_path: str) -> bool:
|
||||
"""Safely delete a file with retries"""
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
if os.path.exists(file_path):
|
||||
await secure_delete_file(file_path)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Delete attempt {attempt + 1} failed: {str(e)}")
|
||||
if attempt == self.max_retries - 1:
|
||||
return False
|
||||
await asyncio.sleep(self.retry_delay * (attempt + 1))
|
||||
return False
|
||||
|
||||
async def safe_move_file(self, src: str, dst: str) -> bool:
|
||||
"""Safely move a file with retries"""
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
||||
shutil.move(src, dst)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Move attempt {attempt + 1} failed: {str(e)}")
|
||||
if attempt == self.max_retries - 1:
|
||||
return False
|
||||
await asyncio.sleep(self.retry_delay * (attempt + 1))
|
||||
return False
|
||||
|
||||
def verify_video_file(self, file_path: str, ffprobe_path: str) -> bool:
|
||||
"""Verify video file integrity"""
|
||||
try:
|
||||
cmd = [
|
||||
ffprobe_path,
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
file_path,
|
||||
]
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise VideoVerificationError(f"FFprobe failed: {result.stderr}")
|
||||
|
||||
probe = json.loads(result.stdout)
|
||||
|
||||
# Verify video stream
|
||||
video_streams = [s for s in probe["streams"] if s["codec_type"] == "video"]
|
||||
if not video_streams:
|
||||
raise VideoVerificationError("No video streams found")
|
||||
|
||||
# Verify duration
|
||||
duration = float(probe["format"].get("duration", 0))
|
||||
if duration <= 0:
|
||||
raise VideoVerificationError("Invalid video duration")
|
||||
|
||||
# Verify file is readable
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
f.seek(0, 2)
|
||||
if f.tell() == 0:
|
||||
raise VideoVerificationError("Empty file")
|
||||
except Exception as e:
|
||||
raise VideoVerificationError(f"File read error: {str(e)}")
|
||||
|
||||
return True
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error(f"FFprobe timed out for {file_path}")
|
||||
return False
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"Invalid FFprobe output for {file_path}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error verifying video file {file_path}: {e}")
|
||||
return False
|
||||
|
||||
def get_video_duration(self, file_path: str, ffprobe_path: str) -> float:
|
||||
"""Get video duration in seconds"""
|
||||
try:
|
||||
cmd = [
|
||||
ffprobe_path,
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
file_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"FFprobe failed: {result.stderr}")
|
||||
|
||||
data = json.loads(result.stdout)
|
||||
return float(data["format"]["duration"])
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting video duration: {e}")
|
||||
return 0
|
||||
|
||||
def check_file_size(self, file_path: str, max_size_mb: int) -> Tuple[bool, int]:
|
||||
"""Check if file size is within limits"""
|
||||
try:
|
||||
if os.path.exists(file_path):
|
||||
size = os.path.getsize(file_path)
|
||||
max_size = max_size_mb * 1024 * 1024
|
||||
|
||||
111
videoarchiver/utils/process_manager.py
Normal file
111
videoarchiver/utils/process_manager.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""Process management and cleanup utilities"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import subprocess
|
||||
from typing import Set, Dict, Any
|
||||
from datetime import datetime
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class ProcessManager:
|
||||
"""Manages processes and resources for video operations"""
|
||||
|
||||
def __init__(self, concurrent_downloads: int = 2):
|
||||
self._active_processes: Set[subprocess.Popen] = set()
|
||||
self._processes_lock = asyncio.Lock()
|
||||
self._shutting_down = False
|
||||
|
||||
# Create thread pool with proper naming
|
||||
self.download_pool = ThreadPoolExecutor(
|
||||
max_workers=max(1, min(3, concurrent_downloads)),
|
||||
thread_name_prefix="videoarchiver_download"
|
||||
)
|
||||
|
||||
# Track active downloads
|
||||
self.active_downloads: Dict[str, Dict[str, Any]] = {}
|
||||
self._downloads_lock = asyncio.Lock()
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
"""Clean up resources with proper shutdown"""
|
||||
self._shutting_down = True
|
||||
|
||||
try:
|
||||
# Kill any active processes
|
||||
async with self._processes_lock:
|
||||
for process in self._active_processes:
|
||||
try:
|
||||
process.terminate()
|
||||
await asyncio.sleep(0.1) # Give process time to terminate
|
||||
if process.poll() is None:
|
||||
process.kill() # Force kill if still running
|
||||
except Exception as e:
|
||||
logger.error(f"Error killing process: {e}")
|
||||
self._active_processes.clear()
|
||||
|
||||
# Clean up thread pool
|
||||
self.download_pool.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
# Clean up active downloads
|
||||
async with self._downloads_lock:
|
||||
self.active_downloads.clear()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during process manager cleanup: {e}")
|
||||
finally:
|
||||
self._shutting_down = False
|
||||
|
||||
async def force_cleanup(self) -> None:
|
||||
"""Force cleanup of all resources"""
|
||||
try:
|
||||
# Kill all processes immediately
|
||||
async with self._processes_lock:
|
||||
for process in self._active_processes:
|
||||
try:
|
||||
process.kill()
|
||||
except Exception as e:
|
||||
logger.error(f"Error force killing process: {e}")
|
||||
self._active_processes.clear()
|
||||
|
||||
# Force shutdown thread pool
|
||||
self.download_pool.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
# Clear all tracking
|
||||
async with self._downloads_lock:
|
||||
self.active_downloads.clear()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during force cleanup: {e}")
|
||||
|
||||
async def track_download(self, url: str, file_path: str) -> None:
|
||||
"""Track a new download"""
|
||||
async with self._downloads_lock:
|
||||
self.active_downloads[url] = {
|
||||
"file_path": file_path,
|
||||
"start_time": datetime.utcnow(),
|
||||
}
|
||||
|
||||
async def untrack_download(self, url: str) -> None:
|
||||
"""Remove download from tracking"""
|
||||
async with self._downloads_lock:
|
||||
self.active_downloads.pop(url, None)
|
||||
|
||||
async def track_process(self, process: subprocess.Popen) -> None:
|
||||
"""Track a new process"""
|
||||
async with self._processes_lock:
|
||||
self._active_processes.add(process)
|
||||
|
||||
async def untrack_process(self, process: subprocess.Popen) -> None:
|
||||
"""Remove process from tracking"""
|
||||
async with self._processes_lock:
|
||||
self._active_processes.discard(process)
|
||||
|
||||
@property
|
||||
def is_shutting_down(self) -> bool:
|
||||
"""Check if manager is shutting down"""
|
||||
return self._shutting_down
|
||||
|
||||
def get_active_downloads(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Get current active downloads"""
|
||||
return self.acti
|
||||
126
videoarchiver/utils/progress_handler.py
Normal file
126
videoarchiver/utils/progress_handler.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""Progress tracking and logging utilities for video downloads"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional, Callable
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
class CancellableYTDLLogger:
|
||||
"""Custom yt-dlp logger that can handle cancellation"""
|
||||
def __init__(self):
|
||||
self.cancelled = False
|
||||
|
||||
def debug(self, msg):
|
||||
if self.cancelled:
|
||||
raise yt_dlp.utils.DownloadError("Download cancelled")
|
||||
logger.debug(msg)
|
||||
|
||||
def warning(self, msg):
|
||||
if self.cancelled:
|
||||
raise yt_dlp.utils.DownloadError("Download cancelled")
|
||||
logger.warning(msg)
|
||||
|
||||
def error(self, msg):
|
||||
if self.cancelled:
|
||||
raise yt_dlp.utils.DownloadError("Download cancelled")
|
||||
logger.error(msg)
|
||||
|
||||
class ProgressHandler:
|
||||
"""Handles progress tracking and callbacks for video operations"""
|
||||
def __init__(self):
|
||||
self.progress_data: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
def initialize_progress(self, url: str) -> None:
|
||||
"""Initialize progress tracking for a URL"""
|
||||
self.progress_data[url] = {
|
||||
"active": True,
|
||||
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"percent": 0,
|
||||
"speed": "N/A",
|
||||
"eta": "N/A",
|
||||
"downloaded_bytes": 0,
|
||||
"total_bytes": 0,
|
||||
"retries": 0,
|
||||
"fragment_count": 0,
|
||||
"fragment_index": 0,
|
||||
"video_title": "Unknown",
|
||||
"extractor": "Unknown",
|
||||
"format": "Unknown",
|
||||
"resolution": "Unknown",
|
||||
"fps": "Unknown",
|
||||
}
|
||||
|
||||
def update(self, key: str, data: Dict[str, Any]) -> None:
|
||||
"""Update progress data for a key"""
|
||||
if key in self.progress_data:
|
||||
self.progress_data[key].update(data)
|
||||
|
||||
def complete(self, key: str) -> None:
|
||||
"""Mark progress as complete for a key"""
|
||||
if key in self.progress_data:
|
||||
self.progress_data[key]["active"] = False
|
||||
self.progress_data[key]["percent"] = 100
|
||||
|
||||
def get_progress(self, key: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get progress data for a key"""
|
||||
return self.progress_data.get(key)
|
||||
|
||||
def handle_download_progress(self, d: Dict[str, Any], url: str,
|
||||
progress_callback: Optional[Callable[[float], None]] = None) -> None:
|
||||
"""Handle download progress updates"""
|
||||
try:
|
||||
if d["status"] == "downloading":
|
||||
progress_data = {
|
||||
"active": True,
|
||||
"percent": float(d.get("_percent_str", "0").replace("%", "")),
|
||||
"speed": d.get("_speed_str", "N/A"),
|
||||
"eta": d.get("_eta_str", "N/A"),
|
||||
"downloaded_bytes": d.get("downloaded_bytes", 0),
|
||||
"total_bytes": d.get("total_bytes", 0) or d.get("total_bytes_estimate", 0),
|
||||
"retries": d.get("retry_count", 0),
|
||||
"fragment_count": d.get("fragment_count", 0),
|
||||
"fragment_index": d.get("fragment_index", 0),
|
||||
"video_title": d.get("info_dict", {}).get("title", "Unknown"),
|
||||
"extractor": d.get("info_dict", {}).get("extractor", "Unknown"),
|
||||
"format": d.get("info_dict", {}).get("format", "Unknown"),
|
||||
"resolution": d.get("info_dict", {}).get("resolution", "Unknown"),
|
||||
"fps": d.get("info_dict", {}).get("fps", "Unknown"),
|
||||
}
|
||||
self.update(url, progress_data)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(progress_data["percent"])
|
||||
|
||||
logger.debug(
|
||||
f"Download progress: {progress_data['percent']}% at {progress_data['speed']}, "
|
||||
f"ETA: {progress_data['eta']}, Downloaded: {progress_data['downloaded_bytes']}/"
|
||||
f"{progress_data['total_bytes']} bytes"
|
||||
)
|
||||
elif d["status"] == "finished":
|
||||
logger.info(f"Download completed: {d.get('filename', 'unknown')}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in progress handler: {str(e)}")
|
||||
|
||||
def handle_compression_progress(self, input_file: str, current_time: float, duration: float,
|
||||
output_file: str, start_time: datetime,
|
||||
progress_callback: Optional[Callable[[float], None]] = None) -> None:
|
||||
"""Handle compression progress updates"""
|
||||
try:
|
||||
if duration > 0:
|
||||
progress = min(100, (current_time / duration) * 100)
|
||||
elapsed = datetime.utcnow() - start_time
|
||||
|
||||
self.update(input_file, {
|
||||
"percent": progress,
|
||||
"elapsed_time": str(elapsed).split(".")[0],
|
||||
"current_size": os.path.getsize(output_file) if os.path.exists(output_file) else 0,
|
||||
"current_time": current_time,
|
||||
})
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(progress)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error upda
|
||||
@@ -1,109 +1,205 @@
|
||||
"""Module for tracking download and compression progress"""
|
||||
"""Progress tracking module."""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger("ProgressTracker")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ProgressTracker:
|
||||
"""Tracks progress of downloads and compression operations"""
|
||||
"""Progress tracker singleton."""
|
||||
_instance = None
|
||||
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
self._download_progress: Dict[str, Dict[str, Any]] = {}
|
||||
self._compression_progress: Dict[str, Dict[str, Any]] = {}
|
||||
if not hasattr(self, '_initialized'):
|
||||
self._data: Dict[str, Dict[str, Any]] = {}
|
||||
self._initialized = True
|
||||
|
||||
def start_download(self, url: str) -> None:
|
||||
"""Initialize progress tracking for a download"""
|
||||
self._download_progress[url] = {
|
||||
"active": True,
|
||||
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"percent": 0,
|
||||
"speed": "N/A",
|
||||
"eta": "N/A",
|
||||
"downloaded_bytes": 0,
|
||||
"total_bytes": 0,
|
||||
"retries": 0,
|
||||
"fragment_count": 0,
|
||||
"fragment_index": 0,
|
||||
"video_title": "Unknown",
|
||||
"extractor": "Unknown",
|
||||
"format": "Unknown",
|
||||
"resolution": "Unknown",
|
||||
"fps": "Unknown",
|
||||
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
def update(self, key: str, data: Dict[str, Any]) -> None:
|
||||
"""Update progress for a key."""
|
||||
if key not in self._data:
|
||||
self._data[key] = {
|
||||
'active': True,
|
||||
'start_time': datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
'percent': 0
|
||||
}
|
||||
self._data[key].update(data)
|
||||
self._data[key]['last_update'] = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
||||
logger.debug(f"Progress for {key}: {self._data[key].get('percent', 0)}%")
|
||||
|
||||
def update_download_progress(self, data: Dict[str, Any]) -> None:
|
||||
"""Update download progress information"""
|
||||
def get(self, key: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get progress for a key."""
|
||||
if key is None:
|
||||
return self._data
|
||||
return self._data.get(key, {})
|
||||
|
||||
def complete(self, key: str) -> None:
|
||||
"""Mark progress as complete."""
|
||||
if key in self._data:
|
||||
self._data[key]['active'] = False
|
||||
logger.info(f"Operation completed for {key}")
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear all progress data."""
|
||||
self._data.clear()
|
||||
logger.info("Progress data cleared")
|
||||
|
||||
_tracker = ProgressTracker()
|
||||
|
||||
def get_compression(self, file_path: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get compression progress."""
|
||||
if file_path is None:
|
||||
return self._compressions
|
||||
return self._compressions.get(file_path, {})
|
||||
|
||||
def complete_download(self, url: str) -> None:
|
||||
"""Mark download as complete."""
|
||||
if url in self._downloads:
|
||||
self._downloads[url]['active'] = False
|
||||
logger.info(f"Download completed for {url}")
|
||||
|
||||
def complete_compression(self, file_path: str) -> None:
|
||||
"""Mark compression as complete."""
|
||||
if file_path in self._compressions:
|
||||
self._compressions[file_path]['active'] = False
|
||||
logger.info(f"Compression completed for {file_path}")
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear all progress data."""
|
||||
self._downloads.clear()
|
||||
self._compressions.clear()
|
||||
logger.info("Progress data cleared")
|
||||
|
||||
# Global instance
|
||||
_tracker = ProgressTrack
|
||||
|
||||
# Global instance
|
||||
_tracker = ProgressTracker()
|
||||
|
||||
def get_tracker() -> Progre
|
||||
"""Clear all progress tracking"""
|
||||
self._download_progress.clear()
|
||||
self._compression_progress.clear()
|
||||
logger.info("Cleared all progress tracking data")
|
||||
|
||||
# Create singleton instance
|
||||
progress_tracker = ProgressTracker()
|
||||
|
||||
def get_progress_tracker() -> ProgressTracker:
|
||||
|
||||
def mark_compression_complete(self, file_path: str) -> None:
|
||||
"""Mark a compression operation as complete"""
|
||||
if file_path in self._compression_progress:
|
||||
self._compression_progress[file_path]['active'] = False
|
||||
logger.info(f"Compression completed for {file_path}")
|
||||
|
||||
def clear_progress(self) -> None:
|
||||
"""Clear all progress tracking"""
|
||||
self._download_progress.clear()
|
||||
self._compression_progress.clear()
|
||||
logger.info("Cleared all progress tracking data")
|
||||
|
||||
# Create singleton instance
|
||||
progress_tracker = ProgressTracker()
|
||||
|
||||
# Export the singleton instance
|
||||
def get_progress_tracker() -> ProgressTracker:
|
||||
|
||||
|
||||
Args:
|
||||
data: Dictionary containing download progress data
|
||||
"""
|
||||
try:
|
||||
# Get URL from info dict
|
||||
url = data.get("info_dict", {}).get("webpage_url", "unknown")
|
||||
if url not in self._download_progress:
|
||||
info_dict = data.get("info_dict", {})
|
||||
url = info_dict.get("webpage_url")
|
||||
if not url or url not in self._download_progress:
|
||||
return
|
||||
|
||||
if data["status"] == "downloading":
|
||||
if data.get("status") == "downloading":
|
||||
percent_str = data.get("_percent_str", "0").replace("%", "")
|
||||
try:
|
||||
percent = float(percent_str)
|
||||
except ValueError:
|
||||
percent = 0.0
|
||||
|
||||
total_bytes = (
|
||||
data.get("total_bytes", 0) or
|
||||
data.get("total_bytes_estimate", 0)
|
||||
)
|
||||
|
||||
self._download_progress[url].update({
|
||||
"active": True,
|
||||
"percent": float(data.get("_percent_str", "0").replace("%", "")),
|
||||
"percent": percent,
|
||||
"speed": data.get("_speed_str", "N/A"),
|
||||
"eta": data.get("_eta_str", "N/A"),
|
||||
"downloaded_bytes": data.get("downloaded_bytes", 0),
|
||||
"total_bytes": data.get("total_bytes", 0) or data.get("total_bytes_estimate", 0),
|
||||
"total_bytes": total_bytes,
|
||||
"retries": data.get("retry_count", 0),
|
||||
"fragment_count": data.get("fragment_count", 0),
|
||||
"fragment_index": data.get("fragment_index", 0),
|
||||
"video_title": data.get("info_dict", {}).get("title", "Unknown"),
|
||||
"extractor": data.get("info_dict", {}).get("extractor", "Unknown"),
|
||||
"format": data.get("info_dict", {}).get("format", "Unknown"),
|
||||
"resolution": data.get("info_dict", {}).get("resolution", "Unknown"),
|
||||
"fps": data.get("info_dict", {}).get("fps", "Unknown"),
|
||||
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"video_title": info_dict.get("title", "Unknown"),
|
||||
"extractor": info_dict.get("extractor", "Unknown"),
|
||||
"format": info_dict.get("format", "Unknown"),
|
||||
"resolution": info_dict.get("resolution", "Unknown"),
|
||||
"fps": info_dict.get("fps", "Unknown"),
|
||||
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
||||
})
|
||||
|
||||
logger.debug(
|
||||
f"Download progress for {url}: "
|
||||
f"{self._download_progress[url]['percent']}% at {self._download_progress[url]['speed']}, "
|
||||
f"{percent:.1f}% at {self._download_progress[url]['speed']}, "
|
||||
f"ETA: {self._download_progress[url]['eta']}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating download progress: {e}")
|
||||
logger.error(f"Error updating download progress: {e}", exc_info=True)
|
||||
|
||||
def end_download(self, url: str) -> None:
|
||||
"""Mark a download as completed"""
|
||||
def end_download(self, url: str, status: ProgressStatus = ProgressStatus.COMPLETED) -> None:
|
||||
"""
|
||||
Mark a download as completed.
|
||||
|
||||
Args:
|
||||
url: The URL being downloaded
|
||||
status: The final status of the download
|
||||
"""
|
||||
if url in self._download_progress:
|
||||
self._download_progress[url]["active"] = False
|
||||
logger.info(f"Download {status.value} for {url}")
|
||||
|
||||
def start_compression(
|
||||
self,
|
||||
input_file: str,
|
||||
params: Dict[str, str],
|
||||
use_hardware: bool,
|
||||
duration: float,
|
||||
input_size: int,
|
||||
target_size: int
|
||||
) -> None:
|
||||
"""Initialize progress tracking for compression"""
|
||||
self._compression_progress[input_file] = {
|
||||
"active": True,
|
||||
"filename": input_file,
|
||||
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"percent": 0,
|
||||
"elapsed_time": "0:00",
|
||||
"input_size": input_size,
|
||||
"current_size": 0,
|
||||
"target_size": target_size,
|
||||
"codec": params.get("c:v", "unknown"),
|
||||
"hardware_accel": use_hardware,
|
||||
"preset": params.get("preset", "unknown"),
|
||||
"crf": params.get("crf", "unknown"),
|
||||
"duration": duration,
|
||||
"bitrate": params.get("b:v", "unknown"),
|
||||
"audio_codec": params.get("c:a", "unknown"),
|
||||
"audio_bitrate": params.get("b:a", "unknown"),
|
||||
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
def start_compression(self, params: CompressionParams) -> None:
|
||||
"""
|
||||
Initialize progress tracking for compression.
|
||||
|
||||
Args:
|
||||
params: Compression parameters
|
||||
"""
|
||||
current_time = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
||||
self._compression_progress[params.input_file] = CompressionProgress(
|
||||
active=True,
|
||||
filename=params.input_file,
|
||||
start_time=current_time,
|
||||
percent=0.0,
|
||||
elapsed_time="0:00",
|
||||
input_size=params.input_size,
|
||||
current_size=0,
|
||||
target_size=params.target_size,
|
||||
codec=params.codec_params.get("c:v", "unknown"),
|
||||
hardware_accel=params.use_hardware,
|
||||
preset=params.codec_params.get("preset", "unknown"),
|
||||
crf=params.codec_params.get("crf", "unknown"),
|
||||
duration=params.duration,
|
||||
bitrate=params.codec_params.get("b:v", "unknown"),
|
||||
audio_codec=params.codec_params.get("c:a", "unknown"),
|
||||
audio_bitrate=params.codec_params.get("b:a", "unknown"),
|
||||
last_update=current_time,
|
||||
current_time=None
|
||||
)
|
||||
|
||||
def update_compression_progress(
|
||||
self,
|
||||
@@ -113,14 +209,23 @@ class ProgressTracker:
|
||||
current_size: int,
|
||||
current_time: float
|
||||
) -> None:
|
||||
"""Update compression progress information"""
|
||||
"""
|
||||
Update compression progress information.
|
||||
|
||||
Args:
|
||||
input_file: The input file being compressed
|
||||
progress: Current progress percentage (0-100)
|
||||
elapsed_time: Time elapsed as string
|
||||
current_size: Current file size in bytes
|
||||
current_time: Current timestamp in seconds
|
||||
"""
|
||||
if input_file in self._compression_progress:
|
||||
self._compression_progress[input_file].update({
|
||||
"percent": progress,
|
||||
"percent": max(0.0, min(100.0, progress)),
|
||||
"elapsed_time": elapsed_time,
|
||||
"current_size": current_size,
|
||||
"current_time": current_time,
|
||||
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
||||
})
|
||||
|
||||
logger.debug(
|
||||
@@ -128,29 +233,73 @@ class ProgressTracker:
|
||||
f"{progress:.1f}%, Size: {current_size}/{self._compression_progress[input_file]['target_size']} bytes"
|
||||
)
|
||||
|
||||
def end_compression(self, input_file: str) -> None:
|
||||
"""Mark a compression operation as completed"""
|
||||
def end_compression(
|
||||
self,
|
||||
input_file: str,
|
||||
status: ProgressStatus = ProgressStatus.COMPLETED
|
||||
) -> None:
|
||||
"""
|
||||
Mark a compression operation as completed.
|
||||
|
||||
Args:
|
||||
input_file: The input file being compressed
|
||||
status: The final status of the compression
|
||||
"""
|
||||
if input_file in self._compression_progress:
|
||||
self._compression_progress[input_file]["active"] = False
|
||||
logger.info(f"Compression {status.value} for {input_file}")
|
||||
|
||||
def get_download_progress(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get progress information for a download"""
|
||||
def get_download_progress(self, url: Optional[str] = None) -> Optional[DownloadProgress]:
|
||||
"""
|
||||
Get progress information for a download.
|
||||
|
||||
Args:
|
||||
url: Optional URL to get progress for. If None, returns all progress.
|
||||
|
||||
Returns:
|
||||
Progress information for the specified download or None if not found
|
||||
"""
|
||||
if url is None:
|
||||
return self._download_progress
|
||||
return self._download_progress.get(url)
|
||||
|
||||
def get_compression_progress(self, input_file: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get progress information for a compression operation"""
|
||||
def get_compression_progress(
|
||||
self,
|
||||
input_file: Optional[str] = None
|
||||
) -> Optional[CompressionProgress]:
|
||||
"""
|
||||
Get progress information for a compression operation.
|
||||
|
||||
Args:
|
||||
input_file: Optional file to get progress for. If None, returns all progress.
|
||||
|
||||
Returns:
|
||||
Progress information for the specified compression or None if not found
|
||||
"""
|
||||
if input_file is None:
|
||||
return self._compression_progress
|
||||
return self._compression_progress.get(input_file)
|
||||
|
||||
def get_active_downloads(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Get all active downloads"""
|
||||
def get_active_downloads(self) -> Dict[str, DownloadProgress]:
|
||||
"""
|
||||
Get all active downloads.
|
||||
|
||||
Returns:
|
||||
Dictionary of active downloads and their progress
|
||||
"""
|
||||
return {
|
||||
url: progress
|
||||
for url, progress in self._download_progress.items()
|
||||
if progress.get("active", False)
|
||||
}
|
||||
|
||||
def get_active_compressions(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Get all active compression operations"""
|
||||
def get_active_compressions(self) -> Dict[str, CompressionProgress]:
|
||||
"""
|
||||
Get all active compression operations.
|
||||
|
||||
Returns:
|
||||
Dictionary of active compressions and their progress
|
||||
"""
|
||||
return {
|
||||
input_file: progress
|
||||
for input_file, progress in self._compression_progress.items()
|
||||
@@ -161,3 +310,4 @@ class ProgressTracker:
|
||||
"""Clear all progress tracking"""
|
||||
self._download_progress.clear()
|
||||
self._compression_progress.clear()
|
||||
logger.info("Cleared
|
||||
76
videoarchiver/utils/url_validator.py
Normal file
76
videoarchiver/utils/url_validator.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""URL validation utilities for video downloads"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
import yt_dlp
|
||||
from typing import List, Optional
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
def is_video_url_pattern(url: str) -> bool:
|
||||
"""Check if URL matches common video platform patterns"""
|
||||
video_patterns = [
|
||||
r"youtube\.com/watch\?v=",
|
||||
r"youtu\.be/",
|
||||
r"vimeo\.com/",
|
||||
r"tiktok\.com/",
|
||||
r"twitter\.com/.*/video/",
|
||||
r"x\.com/.*/video/",
|
||||
r"bsky\.app/",
|
||||
r"facebook\.com/.*/videos/",
|
||||
r"instagram\.com/.*/(tv|reel|p)/",
|
||||
r"twitch\.tv/.*/clip/",
|
||||
r"streamable\.com/",
|
||||
r"v\.redd\.it/",
|
||||
r"clips\.twitch\.tv/",
|
||||
r"dailymotion\.com/video/",
|
||||
r"\.mp4$",
|
||||
r"\.webm$",
|
||||
r"\.mov$",
|
||||
]
|
||||
return any(re.search(pattern, url, re.IGNORECASE) for pattern in video_patterns)
|
||||
|
||||
def check_url_support(url: str, ydl_opts: dict, enabled_sites: Optional[List[str]] = None) -> bool:
|
||||
"""Check if URL is supported by attempting a simulated download"""
|
||||
if not is_video_url_pattern(url):
|
||||
return False
|
||||
|
||||
try:
|
||||
simulate_opts = {
|
||||
**ydl_opts,
|
||||
"simulate": True,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"extract_flat": True,
|
||||
"skip_download": True,
|
||||
"format": "best",
|
||||
}
|
||||
|
||||
with yt_dlp.YoutubeDL(simulate_opts) as ydl:
|
||||
try:
|
||||
info = ydl.extract_info(url, download=False)
|
||||
if info is None:
|
||||
return False
|
||||
|
||||
if enabled_sites:
|
||||
extractor = info.get("extractor", "").lower()
|
||||
if not any(
|
||||
site.lower() in extractor for site in enabled_sites
|
||||
):
|
||||
logger.info(f"Site {extractor} not in enabled sites list")
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
f"URL supported: {url} (Extractor: {info.get('extractor', 'unknown')})"
|
||||
)
|
||||
return True
|
||||
|
||||
except yt_dlp.utils.UnsupportedError:
|
||||
return False
|
||||
except Exception as e:
|
||||
if "Unsupported URL" not in str(e):
|
||||
logger.error(f"Error checking URL {url}: {str(e)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error
|
||||
@@ -1,809 +0,0 @@
|
||||
"""Video download and processing utilities"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
import asyncio
|
||||
import ffmpeg
|
||||
import yt_dlp
|
||||
import shutil
|
||||
import subprocess
|
||||
import json
|
||||
import signal
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Dict, List, Optional, Tuple, Callable, Set
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from videoarchiver.ffmpeg.ffmpeg_manager import FFmpegManager
|
||||
from videoarchiver.ffmpeg.exceptions import (
|
||||
FFmpegError,
|
||||
CompressionError,
|
||||
VerificationError,
|
||||
FFprobeError,
|
||||
TimeoutError,
|
||||
handle_ffmpeg_error,
|
||||
)
|
||||
from videoarchiver.utils.exceptions import VideoVerificationError
|
||||
from videoarchiver.utils.file_ops import secure_delete_file
|
||||
from videoarchiver.utils.path_manager import temp_path_context
|
||||
|
||||
logger = logging.getLogger("VideoArchiver")
|
||||
|
||||
|
||||
# Add a custom yt-dlp logger to handle cancellation
|
||||
class CancellableYTDLLogger:
|
||||
def __init__(self):
|
||||
self.cancelled = False
|
||||
|
||||
def debug(self, msg):
|
||||
if self.cancelled:
|
||||
raise Exception("Download cancelled")
|
||||
logger.debug(msg)
|
||||
|
||||
def warning(self, msg):
|
||||
if self.cancelled:
|
||||
raise Exception("Download cancelled")
|
||||
logger.warning(msg)
|
||||
|
||||
def error(self, msg):
|
||||
if self.cancelled:
|
||||
raise Exception("Download cancelled")
|
||||
logger.error(msg)
|
||||
|
||||
|
||||
def is_video_url_pattern(url: str) -> bool:
|
||||
"""Check if URL matches common video platform patterns"""
|
||||
video_patterns = [
|
||||
r"youtube\.com/watch\?v=",
|
||||
r"youtu\.be/",
|
||||
r"vimeo\.com/",
|
||||
r"tiktok\.com/",
|
||||
r"twitter\.com/.*/video/",
|
||||
r"x\.com/.*/video/",
|
||||
r"bsky\.app/",
|
||||
r"facebook\.com/.*/videos/",
|
||||
r"instagram\.com/.*/(tv|reel|p)/",
|
||||
r"twitch\.tv/.*/clip/",
|
||||
r"streamable\.com/",
|
||||
r"v\.redd\.it/",
|
||||
r"clips\.twitch\.tv/",
|
||||
r"dailymotion\.com/video/",
|
||||
r"\.mp4$",
|
||||
r"\.webm$",
|
||||
r"\.mov$",
|
||||
]
|
||||
return any(re.search(pattern, url, re.IGNORECASE) for pattern in video_patterns)
|
||||
|
||||
|
||||
class VideoDownloader:
|
||||
MAX_RETRIES = 5
|
||||
RETRY_DELAY = 10
|
||||
FILE_OP_RETRIES = 3
|
||||
FILE_OP_RETRY_DELAY = 1
|
||||
SHUTDOWN_TIMEOUT = 15 # seconds
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
download_path: str,
|
||||
video_format: str,
|
||||
max_quality: int,
|
||||
max_file_size: int,
|
||||
enabled_sites: Optional[List[str]] = None,
|
||||
concurrent_downloads: int = 2,
|
||||
ffmpeg_mgr: Optional[FFmpegManager] = None,
|
||||
):
|
||||
self.download_path = Path(download_path)
|
||||
self.download_path.mkdir(parents=True, exist_ok=True)
|
||||
os.chmod(str(self.download_path), 0o755)
|
||||
|
||||
self.video_format = video_format
|
||||
self.max_quality = max_quality
|
||||
self.max_file_size = max_file_size
|
||||
self.enabled_sites = enabled_sites
|
||||
self.ffmpeg_mgr = ffmpeg_mgr or FFmpegManager()
|
||||
|
||||
# Create thread pool with proper naming
|
||||
self.download_pool = ThreadPoolExecutor(
|
||||
max_workers=max(1, min(3, concurrent_downloads)),
|
||||
thread_name_prefix="videoarchiver_download",
|
||||
)
|
||||
|
||||
# Track active downloads and processes
|
||||
self.active_downloads: Dict[str, Dict[str, Any]] = {}
|
||||
self._downloads_lock = asyncio.Lock()
|
||||
self._active_processes: Set[subprocess.Popen] = set()
|
||||
self._processes_lock = asyncio.Lock()
|
||||
self._shutting_down = False
|
||||
|
||||
# Create cancellable logger
|
||||
self.ytdl_logger = CancellableYTDLLogger()
|
||||
|
||||
# Configure yt-dlp options
|
||||
self.ydl_opts = {
|
||||
"format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best",
|
||||
"outtmpl": "%(title)s.%(ext)s",
|
||||
"merge_output_format": video_format,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"extract_flat": True,
|
||||
"concurrent_fragment_downloads": 1,
|
||||
"retries": self.MAX_RETRIES,
|
||||
"fragment_retries": self.MAX_RETRIES,
|
||||
"file_access_retries": self.FILE_OP_RETRIES,
|
||||
"extractor_retries": self.MAX_RETRIES,
|
||||
"postprocessor_hooks": [self._check_file_size],
|
||||
"progress_hooks": [self._progress_hook, self._detailed_progress_hook],
|
||||
"ffmpeg_location": str(self.ffmpeg_mgr.get_ffmpeg_path()),
|
||||
"ffprobe_location": str(self.ffmpeg_mgr.get_ffprobe_path()),
|
||||
"paths": {"home": str(self.download_path)},
|
||||
"logger": self.ytdl_logger,
|
||||
"ignoreerrors": True,
|
||||
"no_color": True,
|
||||
"geo_bypass": True,
|
||||
"socket_timeout": 60,
|
||||
"http_chunk_size": 1048576,
|
||||
"external_downloader_args": {"ffmpeg": ["-timeout", "60000000"]},
|
||||
"max_sleep_interval": 5,
|
||||
"sleep_interval": 1,
|
||||
"max_filesize": max_file_size * 1024 * 1024,
|
||||
}
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
"""Clean up resources with proper shutdown"""
|
||||
self._shutting_down = True
|
||||
|
||||
try:
|
||||
# Cancel active downloads
|
||||
self.ytdl_logger.cancelled = True
|
||||
|
||||
# Kill any active FFmpeg processes
|
||||
async with self._processes_lock:
|
||||
for process in self._active_processes:
|
||||
try:
|
||||
process.terminate()
|
||||
await asyncio.sleep(0.1) # Give process time to terminate
|
||||
if process.poll() is None:
|
||||
process.kill() # Force kill if still running
|
||||
except Exception as e:
|
||||
logger.error(f"Error killing process: {e}")
|
||||
self._active_processes.clear()
|
||||
|
||||
# Clean up thread pool
|
||||
self.download_pool.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
# Clean up active downloads
|
||||
async with self._downloads_lock:
|
||||
self.active_downloads.clear()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during downloader cleanup: {e}")
|
||||
finally:
|
||||
self._shutting_down = False
|
||||
|
||||
async def force_cleanup(self) -> None:
|
||||
"""Force cleanup of all resources"""
|
||||
try:
|
||||
# Force cancel all downloads
|
||||
self.ytdl_logger.cancelled = True
|
||||
|
||||
# Kill all processes immediately
|
||||
async with self._processes_lock:
|
||||
for process in self._active_processes:
|
||||
try:
|
||||
process.kill()
|
||||
except Exception as e:
|
||||
logger.error(f"Error force killing process: {e}")
|
||||
self._active_processes.clear()
|
||||
|
||||
# Force shutdown thread pool
|
||||
self.download_pool.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
# Clear all tracking
|
||||
async with self._downloads_lock:
|
||||
self.active_downloads.clear()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during force cleanup: {e}")
|
||||
|
||||
def _detailed_progress_hook(self, d):
|
||||
"""Handle detailed download progress tracking"""
|
||||
try:
|
||||
if d["status"] == "downloading":
|
||||
# Get URL from info dict
|
||||
url = d.get("info_dict", {}).get("webpage_url", "unknown")
|
||||
|
||||
# Update global progress tracking
|
||||
from videoarchiver.processor import _download_progress
|
||||
|
||||
if url in _download_progress:
|
||||
_download_progress[url].update(
|
||||
{
|
||||
"active": True,
|
||||
"percent": float(
|
||||
d.get("_percent_str", "0").replace("%", "")
|
||||
),
|
||||
"speed": d.get("_speed_str", "N/A"),
|
||||
"eta": d.get("_eta_str", "N/A"),
|
||||
"downloaded_bytes": d.get("downloaded_bytes", 0),
|
||||
"total_bytes": d.get("total_bytes", 0)
|
||||
or d.get("total_bytes_estimate", 0),
|
||||
"retries": d.get("retry_count", 0),
|
||||
"fragment_count": d.get("fragment_count", 0),
|
||||
"fragment_index": d.get("fragment_index", 0),
|
||||
"video_title": d.get("info_dict", {}).get(
|
||||
"title", "Unknown"
|
||||
),
|
||||
"extractor": d.get("info_dict", {}).get(
|
||||
"extractor", "Unknown"
|
||||
),
|
||||
"format": d.get("info_dict", {}).get("format", "Unknown"),
|
||||
"resolution": d.get("info_dict", {}).get(
|
||||
"resolution", "Unknown"
|
||||
),
|
||||
"fps": d.get("info_dict", {}).get("fps", "Unknown"),
|
||||
"last_update": datetime.utcnow().strftime(
|
||||
"%Y-%m-%d %H:%M:%S"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Detailed progress for {url}: "
|
||||
f"{_download_progress[url]['percent']}% at {_download_progress[url]['speed']}, "
|
||||
f"ETA: {_download_progress[url]['eta']}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in detailed progress hook: {str(e)}")
|
||||
|
||||
def _progress_hook(self, d):
|
||||
"""Handle download progress"""
|
||||
if d["status"] == "finished":
|
||||
logger.info(f"Download completed: {d['filename']}")
|
||||
elif d["status"] == "downloading":
|
||||
try:
|
||||
percent = float(d.get("_percent_str", "0").replace("%", ""))
|
||||
speed = d.get("_speed_str", "N/A")
|
||||
eta = d.get("_eta_str", "N/A")
|
||||
downloaded = d.get("downloaded_bytes", 0)
|
||||
total = d.get("total_bytes", 0) or d.get("total_bytes_estimate", 0)
|
||||
|
||||
logger.debug(
|
||||
f"Download progress: {percent}% at {speed}, "
|
||||
f"ETA: {eta}, Downloaded: {downloaded}/{total} bytes"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Error logging progress: {str(e)}")
|
||||
|
||||
def is_supported_url(self, url: str) -> bool:
|
||||
"""Check if URL is supported by attempting a simulated download"""
|
||||
if not is_video_url_pattern(url):
|
||||
return False
|
||||
|
||||
try:
|
||||
simulate_opts = {
|
||||
**self.ydl_opts,
|
||||
"simulate": True,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"extract_flat": True,
|
||||
"skip_download": True,
|
||||
"format": "best",
|
||||
}
|
||||
|
||||
with yt_dlp.YoutubeDL(simulate_opts) as ydl:
|
||||
try:
|
||||
info = ydl.extract_info(url, download=False)
|
||||
if info is None:
|
||||
return False
|
||||
|
||||
if self.enabled_sites:
|
||||
extractor = info.get("extractor", "").lower()
|
||||
if not any(
|
||||
site.lower() in extractor for site in self.enabled_sites
|
||||
):
|
||||
logger.info(f"Site {extractor} not in enabled sites list")
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
f"URL supported: {url} (Extractor: {info.get('extractor', 'unknown')})"
|
||||
)
|
||||
return True
|
||||
|
||||
except yt_dlp.utils.UnsupportedError:
|
||||
return False
|
||||
except Exception as e:
|
||||
if "Unsupported URL" not in str(e):
|
||||
logger.error(f"Error checking URL {url}: {str(e)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during URL check: {str(e)}")
|
||||
return False
|
||||
|
||||
async def download_video(
|
||||
self, url: str, progress_callback: Optional[Callable[[float], None]] = None
|
||||
) -> Tuple[bool, str, str]:
|
||||
"""Download and process a video with improved error handling"""
|
||||
if self._shutting_down:
|
||||
return False, "", "Downloader is shutting down"
|
||||
|
||||
# Initialize progress tracking for this URL
|
||||
from videoarchiver.processor import _download_progress
|
||||
|
||||
_download_progress[url] = {
|
||||
"active": True,
|
||||
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"percent": 0,
|
||||
"speed": "N/A",
|
||||
"eta": "N/A",
|
||||
"downloaded_bytes": 0,
|
||||
"total_bytes": 0,
|
||||
"retries": 0,
|
||||
"fragment_count": 0,
|
||||
"fragment_index": 0,
|
||||
"video_title": "Unknown",
|
||||
"extractor": "Unknown",
|
||||
"format": "Unknown",
|
||||
"resolution": "Unknown",
|
||||
"fps": "Unknown",
|
||||
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
|
||||
original_file = None
|
||||
compressed_file = None
|
||||
temp_dir = None
|
||||
hardware_accel_failed = False
|
||||
compression_params = None
|
||||
|
||||
try:
|
||||
with temp_path_context() as temp_dir:
|
||||
# Download the video
|
||||
success, file_path, error = await self._safe_download(
|
||||
url, temp_dir, progress_callback
|
||||
)
|
||||
if not success:
|
||||
return False, "", error
|
||||
|
||||
original_file = file_path
|
||||
|
||||
async with self._downloads_lock:
|
||||
self.active_downloads[url] = {
|
||||
"file_path": original_file,
|
||||
"start_time": datetime.utcnow(),
|
||||
}
|
||||
|
||||
# Check file size and compress if needed
|
||||
file_size = os.path.getsize(original_file)
|
||||
if file_size > (self.max_file_size * 1024 * 1024):
|
||||
logger.info(f"Compressing video: {original_file}")
|
||||
try:
|
||||
# Get optimal compression parameters
|
||||
compression_params = self.ffmpeg_mgr.get_compression_params(
|
||||
original_file, self.max_file_size
|
||||
)
|
||||
compressed_file = os.path.join(
|
||||
self.download_path,
|
||||
f"compressed_{os.path.basename(original_file)}",
|
||||
)
|
||||
|
||||
# Try hardware acceleration first
|
||||
success = await self._try_compression(
|
||||
original_file,
|
||||
compressed_file,
|
||||
compression_params,
|
||||
progress_callback,
|
||||
use_hardware=True,
|
||||
)
|
||||
|
||||
# If hardware acceleration fails, fall back to CPU
|
||||
if not success:
|
||||
hardware_accel_failed = True
|
||||
logger.warning(
|
||||
"Hardware acceleration failed, falling back to CPU encoding"
|
||||
)
|
||||
success = await self._try_compression(
|
||||
original_file,
|
||||
compressed_file,
|
||||
compression_params,
|
||||
progress_callback,
|
||||
use_hardware=False,
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise CompressionError(
|
||||
"Failed to compress with both hardware and CPU encoding",
|
||||
file_size,
|
||||
self.max_file_size * 1024 * 1024,
|
||||
)
|
||||
|
||||
# Verify compressed file
|
||||
if not self._verify_video_file(compressed_file):
|
||||
raise VideoVerificationError(
|
||||
"Compressed file verification failed"
|
||||
)
|
||||
|
||||
compressed_size = os.path.getsize(compressed_file)
|
||||
if compressed_size <= (self.max_file_size * 1024 * 1024):
|
||||
await self._safe_delete_file(original_file)
|
||||
return True, compressed_file, ""
|
||||
else:
|
||||
await self._safe_delete_file(compressed_file)
|
||||
raise CompressionError(
|
||||
"Failed to compress to target size",
|
||||
file_size,
|
||||
self.max_file_size * 1024 * 1024,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
if hardware_accel_failed:
|
||||
error_msg = f"Hardware acceleration failed, CPU fallback error: {error_msg}"
|
||||
if compressed_file and os.path.exists(compressed_file):
|
||||
await self._safe_delete_file(compressed_file)
|
||||
return False, "", error_msg
|
||||
|
||||
else:
|
||||
# Move file to final location
|
||||
final_path = os.path.join(
|
||||
self.download_path, os.path.basename(original_file)
|
||||
)
|
||||
success = await self._safe_move_file(original_file, final_path)
|
||||
if not success:
|
||||
return False, "", "Failed to move file to final location"
|
||||
return True, final_path, ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Download error: {str(e)}")
|
||||
return False, "", str(e)
|
||||
|
||||
finally:
|
||||
# Clean up
|
||||
async with self._downloads_lock:
|
||||
self.active_downloads.pop(url, None)
|
||||
if url in _download_progress:
|
||||
_download_progress[url]["active"] = False
|
||||
|
||||
try:
|
||||
if original_file and os.path.exists(original_file):
|
||||
await self._safe_delete_file(original_file)
|
||||
if (
|
||||
compressed_file
|
||||
and os.path.exists(compressed_file)
|
||||
and not compressed_file.startswith(self.download_path)
|
||||
):
|
||||
await self._safe_delete_file(compressed_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error during file cleanup: {str(e)}")
|
||||
|
||||
async def _try_compression(
|
||||
self,
|
||||
input_file: str,
|
||||
output_file: str,
|
||||
params: Dict[str, str],
|
||||
progress_callback: Optional[Callable[[float], None]] = None,
|
||||
use_hardware: bool = True,
|
||||
) -> bool:
|
||||
"""Attempt video compression with given parameters"""
|
||||
if self._shutting_down:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Build FFmpeg command
|
||||
ffmpeg_path = str(self.ffmpeg_mgr.get_ffmpeg_path())
|
||||
cmd = [ffmpeg_path, "-y", "-i", input_file]
|
||||
|
||||
# Add progress monitoring
|
||||
cmd.extend(["-progress", "pipe:1"])
|
||||
|
||||
# Modify parameters based on hardware acceleration preference
|
||||
if use_hardware:
|
||||
gpu_info = self.ffmpeg_mgr.gpu_info
|
||||
if gpu_info["nvidia"] and params.get("c:v") == "libx264":
|
||||
params["c:v"] = "h264_nvenc"
|
||||
elif gpu_info["amd"] and params.get("c:v") == "libx264":
|
||||
params["c:v"] = "h264_amf"
|
||||
elif gpu_info["intel"] and params.get("c:v") == "libx264":
|
||||
params["c:v"] = "h264_qsv"
|
||||
else:
|
||||
params["c:v"] = "libx264"
|
||||
|
||||
# Add all parameters to command
|
||||
for key, value in params.items():
|
||||
cmd.extend([f"-{key}", str(value)])
|
||||
|
||||
# Add output file
|
||||
cmd.append(output_file)
|
||||
|
||||
# Get video duration for progress calculation
|
||||
duration = self._get_video_duration(input_file)
|
||||
|
||||
# Update compression progress tracking
|
||||
from videoarchiver.processor import _compression_progress
|
||||
|
||||
# Get input file size
|
||||
input_size = os.path.getsize(input_file)
|
||||
|
||||
# Initialize compression progress
|
||||
_compression_progress[input_file] = {
|
||||
"active": True,
|
||||
"filename": os.path.basename(input_file),
|
||||
"start_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"percent": 0,
|
||||
"elapsed_time": "0:00",
|
||||
"input_size": input_size,
|
||||
"current_size": 0,
|
||||
"target_size": self.max_file_size * 1024 * 1024,
|
||||
"codec": params.get("c:v", "unknown"),
|
||||
"hardware_accel": use_hardware,
|
||||
"preset": params.get("preset", "unknown"),
|
||||
"crf": params.get("crf", "unknown"),
|
||||
"duration": duration,
|
||||
"bitrate": params.get("b:v", "unknown"),
|
||||
"audio_codec": params.get("c:a", "unknown"),
|
||||
"audio_bitrate": params.get("b:a", "unknown"),
|
||||
"last_update": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
|
||||
# Run compression with progress monitoring
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
# Track the process
|
||||
async with self._processes_lock:
|
||||
self._active_processes.add(process)
|
||||
|
||||
start_time = datetime.utcnow()
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
try:
|
||||
while True:
|
||||
if self._shutting_down:
|
||||
process.terminate()
|
||||
return False
|
||||
|
||||
line = await process.stdout.readline()
|
||||
if not line:
|
||||
break
|
||||
|
||||
try:
|
||||
line = line.decode().strip()
|
||||
if line.startswith("out_time_ms="):
|
||||
current_time = (
|
||||
int(line.split("=")[1]) / 1000000
|
||||
) # Convert microseconds to seconds
|
||||
if duration > 0:
|
||||
progress = min(100, (current_time / duration) * 100)
|
||||
|
||||
# Update compression progress
|
||||
elapsed = datetime.utcnow() - start_time
|
||||
_compression_progress[input_file].update(
|
||||
{
|
||||
"percent": progress,
|
||||
"elapsed_time": str(elapsed).split(".")[
|
||||
0
|
||||
], # Remove microseconds
|
||||
"current_size": (
|
||||
os.path.getsize(output_file)
|
||||
if os.path.exists(output_file)
|
||||
else 0
|
||||
),
|
||||
"current_time": current_time,
|
||||
"last_update": datetime.utcnow().strftime(
|
||||
"%Y-%m-%d %H:%M:%S"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
# Call the callback directly since it now handles task creation
|
||||
progress_callback(progress)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing FFmpeg progress: {e}")
|
||||
|
||||
await process.wait()
|
||||
success = os.path.exists(output_file)
|
||||
|
||||
# Update final status
|
||||
if success and input_file in _compression_progress:
|
||||
_compression_progress[input_file].update(
|
||||
{
|
||||
"active": False,
|
||||
"percent": 100,
|
||||
"current_size": os.path.getsize(output_file),
|
||||
"last_update": datetime.utcnow().strftime(
|
||||
"%Y-%m-%d %H:%M:%S"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
return success
|
||||
|
||||
finally:
|
||||
# Remove process from tracking
|
||||
async with self._processes_lock:
|
||||
self._active_processes.discard(process)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"FFmpeg compression failed: {e.stderr.decode()}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Compression attempt failed: {str(e)}")
|
||||
return False
|
||||
finally:
|
||||
# Ensure compression progress is marked as inactive
|
||||
if input_file in _compression_progress:
|
||||
_compression_progress[input_file]["active"] = False
|
||||
|
||||
def _get_video_duration(self, file_path: str) -> float:
|
||||
"""Get video duration in seconds"""
|
||||
try:
|
||||
ffprobe_path = str(self.ffmpeg_mgr.get_ffprobe_path())
|
||||
cmd = [
|
||||
ffprobe_path,
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
file_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
data = json.loads(result.stdout)
|
||||
return float(data["format"]["duration"])
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting video duration: {e}")
|
||||
return 0
|
||||
|
||||
def _check_file_size(self, info):
|
||||
"""Check if file size is within limits"""
|
||||
if info.get("filepath") and os.path.exists(info["filepath"]):
|
||||
try:
|
||||
size = os.path.getsize(info["filepath"])
|
||||
if size > (self.max_file_size * 1024 * 1024):
|
||||
logger.info(
|
||||
f"File exceeds size limit, will compress: {info['filepath']}"
|
||||
)
|
||||
except OSError as e:
|
||||
logger.error(f"Error checking file size: {str(e)}")
|
||||
|
||||
def _verify_video_file(self, file_path: str) -> bool:
|
||||
"""Verify video file integrity"""
|
||||
try:
|
||||
ffprobe_path = str(self.ffmpeg_mgr.get_ffprobe_path())
|
||||
cmd = [
|
||||
ffprobe_path,
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
file_path,
|
||||
]
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise VideoVerificationError(f"FFprobe failed: {result.stderr}")
|
||||
|
||||
probe = json.loads(result.stdout)
|
||||
|
||||
# Verify video stream
|
||||
video_streams = [s for s in probe["streams"] if s["codec_type"] == "video"]
|
||||
if not video_streams:
|
||||
raise VideoVerificationError("No video streams found")
|
||||
|
||||
# Verify duration
|
||||
duration = float(probe["format"].get("duration", 0))
|
||||
if duration <= 0:
|
||||
raise VideoVerificationError("Invalid video duration")
|
||||
|
||||
# Verify file is readable
|
||||
with open(file_path, "rb") as f:
|
||||
f.seek(0, 2)
|
||||
if f.tell() == 0:
|
||||
raise VideoVerificationError("Empty file")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error verifying video file {file_path}: {e}")
|
||||
return False
|
||||
|
||||
async def _safe_download(
|
||||
self,
|
||||
url: str,
|
||||
temp_dir: str,
|
||||
progress_callback: Optional[Callable[[float], None]] = None,
|
||||
) -> Tuple[bool, str, str]:
|
||||
"""Safely download video with retries"""
|
||||
if self._shutting_down:
|
||||
return False, "", "Downloader is shutting down"
|
||||
|
||||
last_error = None
|
||||
for attempt in range(self.MAX_RETRIES):
|
||||
try:
|
||||
ydl_opts = self.ydl_opts.copy()
|
||||
ydl_opts["outtmpl"] = os.path.join(temp_dir, ydl_opts["outtmpl"])
|
||||
|
||||
# Add progress callback
|
||||
if progress_callback:
|
||||
original_progress_hook = ydl_opts["progress_hooks"][0]
|
||||
|
||||
def combined_progress_hook(d):
|
||||
original_progress_hook(d)
|
||||
if d["status"] == "downloading":
|
||||
try:
|
||||
percent = float(
|
||||
d.get("_percent_str", "0").replace("%", "")
|
||||
)
|
||||
# Call the callback directly since it now handles task creation
|
||||
progress_callback(percent)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in progress callback: {e}")
|
||||
|
||||
ydl_opts["progress_hooks"] = [combined_progress_hook]
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = await asyncio.get_event_loop().run_in_executor(
|
||||
self.download_pool, lambda: ydl.extract_info(url, download=True)
|
||||
)
|
||||
|
||||
if info is None:
|
||||
raise Exception("Failed to extract video information")
|
||||
|
||||
file_path = os.path.join(temp_dir, ydl.prepare_filename(info))
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError("Download completed but file not found")
|
||||
|
||||
if not self._verify_video_file(file_path):
|
||||
raise VideoVerificationError("Downloaded file is not a valid video")
|
||||
|
||||
return True, file_path, ""
|
||||
|
||||
except Exception as e:
|
||||
last_error = str(e)
|
||||
logger.error(f"Download attempt {attempt + 1} failed: {str(e)}")
|
||||
if attempt < self.MAX_RETRIES - 1:
|
||||
# Exponential backoff with jitter
|
||||
delay = self.RETRY_DELAY * (2**attempt) + (attempt * 2)
|
||||
await asyncio.sleep(delay)
|
||||
else:
|
||||
return False, "", f"All download attempts failed: {last_error}"
|
||||
|
||||
async def _safe_delete_file(self, file_path: str) -> bool:
|
||||
"""Safely delete a file with retries"""
|
||||
for attempt in range(self.FILE_OP_RETRIES):
|
||||
try:
|
||||
if await secure_delete_file(file_path):
|
||||
return True
|
||||
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
|
||||
except Exception as e:
|
||||
logger.error(f"Delete attempt {attempt + 1} failed: {str(e)}")
|
||||
if attempt == self.FILE_OP_RETRIES - 1:
|
||||
return False
|
||||
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
|
||||
return False
|
||||
|
||||
async def _safe_move_file(self, src: str, dst: str) -> bool:
|
||||
"""Safely move a file with retries"""
|
||||
for attempt in range(self.FILE_OP_RETRIES):
|
||||
try:
|
||||
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
||||
shutil.move(src, dst)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Move attempt {attempt + 1} failed: {str(e)}")
|
||||
if attempt == self.FILE_OP_RETRIES - 1:
|
||||
return False
|
||||
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
|
||||
return False
|
||||
Reference in New Issue
Block a user