mirror of
https://github.com/pacnpal/Pac-cogs.git
synced 2025-12-20 10:51:05 -05:00
FFmpeg is now properly managed: Binaries are downloaded and verified on startup Permissions are properly set Hardware acceleration is detected and used when available Resources are cleaned up properly Error handling has been improved: Specific exception types for different errors Better error messages and logging Appropriate reaction indicators Enhanced component error handling Resource management has been enhanced: Failed downloads are tracked and cleaned up Temporary files are handled properly Queue management is more robust Concurrent downloads are better managed Verification has been strengthened: FFmpeg binaries are verified Video files are validated Compression results are checked Component initialization is verified
423 lines
18 KiB
Python
423 lines
18 KiB
Python
"""Video download and processing utilities"""
|
|
|
|
import os
|
|
import re
|
|
import logging
|
|
import asyncio
|
|
import ffmpeg
|
|
import yt_dlp
|
|
import shutil
|
|
import subprocess
|
|
import json
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from typing import Dict, List, Optional, Tuple
|
|
from pathlib import Path
|
|
|
|
from videoarchiver.ffmpeg.ffmpeg_manager import FFmpegManager
|
|
from videoarchiver.ffmpeg.exceptions import (
|
|
FFmpegError,
|
|
CompressionError,
|
|
VideoVerificationError,
|
|
FFprobeError,
|
|
TimeoutError,
|
|
handle_ffmpeg_error
|
|
)
|
|
from videoarchiver.utils.file_ops import secure_delete_file
|
|
from videoarchiver.utils.path_manager import temp_path_context
|
|
|
|
logger = logging.getLogger("VideoArchiver")
|
|
|
|
class VideoDownloader:
|
|
MAX_RETRIES = 3
|
|
RETRY_DELAY = 5 # seconds
|
|
FILE_OP_RETRIES = 3
|
|
FILE_OP_RETRY_DELAY = 1 # seconds
|
|
|
|
def __init__(
|
|
self,
|
|
download_path: str,
|
|
video_format: str,
|
|
max_quality: int,
|
|
max_file_size: int,
|
|
enabled_sites: Optional[List[str]] = None,
|
|
concurrent_downloads: int = 3,
|
|
ffmpeg_mgr: Optional[FFmpegManager] = None,
|
|
):
|
|
# Ensure download path exists with proper permissions
|
|
self.download_path = Path(download_path)
|
|
self.download_path.mkdir(parents=True, exist_ok=True)
|
|
os.chmod(str(self.download_path), 0o755)
|
|
logger.info(f"Initialized download directory: {self.download_path}")
|
|
|
|
self.video_format = video_format
|
|
self.max_quality = max_quality
|
|
self.max_file_size = max_file_size
|
|
self.enabled_sites = enabled_sites
|
|
|
|
# Initialize FFmpeg manager
|
|
self.ffmpeg_mgr = ffmpeg_mgr or FFmpegManager()
|
|
logger.info(f"Using FFmpeg from: {self.ffmpeg_mgr.get_ffmpeg_path()}")
|
|
|
|
# Create thread pool for this instance
|
|
self.download_pool = ThreadPoolExecutor(
|
|
max_workers=max(1, min(5, concurrent_downloads)),
|
|
thread_name_prefix="videoarchiver_download"
|
|
)
|
|
|
|
# Track active downloads for cleanup
|
|
self.active_downloads: Dict[str, str] = {}
|
|
self._downloads_lock = asyncio.Lock()
|
|
|
|
# Configure yt-dlp options
|
|
self.ydl_opts = {
|
|
"format": f"bv*[height<={max_quality}][ext=mp4]+ba[ext=m4a]/b[height<={max_quality}]/best", # More flexible format
|
|
"outtmpl": "%(title)s.%(ext)s", # Base filename only, path added later
|
|
"merge_output_format": video_format,
|
|
"quiet": False, # Enable output for debugging
|
|
"no_warnings": False, # Show warnings
|
|
"verbose": True, # Enable verbose output
|
|
"extract_flat": False,
|
|
"concurrent_fragment_downloads": concurrent_downloads,
|
|
"retries": self.MAX_RETRIES,
|
|
"fragment_retries": self.MAX_RETRIES,
|
|
"file_access_retries": self.FILE_OP_RETRIES,
|
|
"extractor_retries": self.MAX_RETRIES,
|
|
"postprocessor_hooks": [self._check_file_size],
|
|
"progress_hooks": [self._progress_hook],
|
|
"ffmpeg_location": str(self.ffmpeg_mgr.get_ffmpeg_path()), # Convert Path to string
|
|
"ffprobe_location": str(self.ffmpeg_mgr.get_ffprobe_path()), # Add ffprobe path
|
|
"paths": {
|
|
"home": str(self.download_path) # Set home directory for yt-dlp
|
|
},
|
|
"logger": logger, # Use our logger
|
|
"ignoreerrors": True, # Don't stop on download errors
|
|
"no_color": True, # Disable ANSI colors in output
|
|
"geo_bypass": True, # Try to bypass geo-restrictions
|
|
"socket_timeout": 30, # Increase timeout
|
|
}
|
|
|
|
def is_supported_url(self, url: str) -> bool:
|
|
"""Check if URL is supported by attempting a simulated download"""
|
|
try:
|
|
# Configure yt-dlp for simulation
|
|
simulate_opts = {
|
|
**self.ydl_opts,
|
|
"simulate": True, # Only simulate download
|
|
"quiet": True, # Reduce output noise
|
|
"no_warnings": True,
|
|
"extract_flat": True, # Don't download video info
|
|
"skip_download": True, # Skip actual download
|
|
"format": "best", # Don't spend time finding best format
|
|
}
|
|
|
|
# Create a new yt-dlp instance for simulation
|
|
with yt_dlp.YoutubeDL(simulate_opts) as ydl:
|
|
try:
|
|
# Try to extract info without downloading
|
|
info = ydl.extract_info(url, download=False)
|
|
if info is None:
|
|
logger.debug(f"URL not supported: {url}")
|
|
return False
|
|
|
|
# Check if site is enabled (if enabled_sites is configured)
|
|
if self.enabled_sites:
|
|
extractor = info.get('extractor', '').lower()
|
|
if not any(site.lower() in extractor for site in self.enabled_sites):
|
|
logger.info(f"Site {extractor} not in enabled sites list")
|
|
return False
|
|
|
|
logger.info(f"URL supported: {url} (Extractor: {info.get('extractor', 'unknown')})")
|
|
return True
|
|
|
|
except Exception as e:
|
|
if "Unsupported URL" not in str(e):
|
|
logger.error(f"Error checking URL {url}: {str(e)}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during URL check: {str(e)}")
|
|
return False
|
|
|
|
def _check_file_size(self, info):
|
|
"""Check if file size is within limits"""
|
|
if info.get("filepath") and os.path.exists(info["filepath"]):
|
|
try:
|
|
size = os.path.getsize(info["filepath"])
|
|
if size > (self.max_file_size * 1024 * 1024):
|
|
logger.info(
|
|
f"File exceeds size limit, will compress: {info['filepath']}"
|
|
)
|
|
except OSError as e:
|
|
logger.error(f"Error checking file size: {str(e)}")
|
|
|
|
def _progress_hook(self, d):
|
|
"""Handle download progress"""
|
|
if d["status"] == "finished":
|
|
logger.info(f"Download completed: {d['filename']}")
|
|
elif d["status"] == "downloading":
|
|
try:
|
|
percent = d.get("_percent_str", "N/A")
|
|
speed = d.get("_speed_str", "N/A")
|
|
eta = d.get("_eta_str", "N/A")
|
|
logger.debug(f"Download progress: {percent} at {speed}, ETA: {eta}")
|
|
except Exception as e:
|
|
logger.debug(f"Error logging progress: {str(e)}")
|
|
|
|
def _verify_video_file(self, file_path: str) -> bool:
|
|
"""Verify video file integrity"""
|
|
try:
|
|
# Use ffprobe from FFmpegManager
|
|
ffprobe_path = str(self.ffmpeg_mgr.get_ffprobe_path())
|
|
logger.debug(f"Using ffprobe from: {ffprobe_path}")
|
|
|
|
cmd = [
|
|
ffprobe_path,
|
|
"-v", "quiet",
|
|
"-print_format", "json",
|
|
"-show_format",
|
|
"-show_streams",
|
|
file_path
|
|
]
|
|
|
|
result = subprocess.run(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
timeout=30
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
raise VideoVerificationError(f"FFprobe failed: {result.stderr}")
|
|
|
|
probe = json.loads(result.stdout)
|
|
|
|
# Check if file has video stream
|
|
video_streams = [s for s in probe["streams"] if s["codec_type"] == "video"]
|
|
if not video_streams:
|
|
raise VideoVerificationError("No video streams found")
|
|
|
|
# Check if duration is valid
|
|
duration = float(probe["format"].get("duration", 0))
|
|
if duration <= 0:
|
|
raise VideoVerificationError("Invalid video duration")
|
|
|
|
# Check if file is readable
|
|
with open(file_path, "rb") as f:
|
|
f.seek(0, 2) # Seek to end
|
|
if f.tell() == 0:
|
|
raise VideoVerificationError("Empty file")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error verifying video file {file_path}: {e}")
|
|
return False
|
|
|
|
async def _safe_download(self, url: str, temp_dir: str) -> Tuple[bool, str, str]:
|
|
"""Safely download video with retries"""
|
|
for attempt in range(self.MAX_RETRIES):
|
|
try:
|
|
ydl_opts = self.ydl_opts.copy()
|
|
ydl_opts["outtmpl"] = os.path.join(temp_dir, ydl_opts["outtmpl"])
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = await asyncio.get_event_loop().run_in_executor(
|
|
self.download_pool, lambda: ydl.extract_info(url, download=True)
|
|
)
|
|
|
|
if info is None:
|
|
raise Exception("Failed to extract video information")
|
|
|
|
file_path = os.path.join(temp_dir, ydl.prepare_filename(info))
|
|
if not os.path.exists(file_path):
|
|
raise FileNotFoundError("Download completed but file not found")
|
|
|
|
if not self._verify_video_file(file_path):
|
|
raise VideoVerificationError("Downloaded file is not a valid video")
|
|
|
|
return True, file_path, ""
|
|
|
|
except Exception as e:
|
|
logger.error(f"Download attempt {attempt + 1} failed: {str(e)}")
|
|
if attempt < self.MAX_RETRIES - 1:
|
|
await asyncio.sleep(
|
|
self.RETRY_DELAY * (attempt + 1)
|
|
) # Exponential backoff
|
|
else:
|
|
return False, "", f"All download attempts failed: {str(e)}"
|
|
|
|
async def download_video(self, url: str) -> Tuple[bool, str, str]:
|
|
"""Download and process a video"""
|
|
original_file = None
|
|
compressed_file = None
|
|
temp_dir = None
|
|
|
|
try:
|
|
# Create temporary directory for download
|
|
with temp_path_context() as temp_dir:
|
|
# Download the video
|
|
success, file_path, error = await self._safe_download(url, temp_dir)
|
|
if not success:
|
|
return False, "", error
|
|
|
|
original_file = file_path
|
|
|
|
# Track this download
|
|
async with self._downloads_lock:
|
|
self.active_downloads[url] = original_file
|
|
|
|
# Check file size and compress if needed
|
|
file_size = os.path.getsize(original_file)
|
|
if file_size > (self.max_file_size * 1024 * 1024):
|
|
logger.info(f"Compressing video: {original_file}")
|
|
try:
|
|
# Get optimal compression parameters
|
|
params = self.ffmpeg_mgr.get_compression_params(
|
|
original_file, self.max_file_size
|
|
)
|
|
compressed_file = os.path.join(
|
|
self.download_path,
|
|
f"compressed_{os.path.basename(original_file)}",
|
|
)
|
|
|
|
# Build FFmpeg command with full path
|
|
ffmpeg_path = str(self.ffmpeg_mgr.get_ffmpeg_path())
|
|
logger.debug(f"Using FFmpeg from: {ffmpeg_path}")
|
|
|
|
# Build command with all parameters
|
|
cmd = [ffmpeg_path, "-y"] # Overwrite output file if it exists
|
|
|
|
# Add input file
|
|
cmd.extend(["-i", original_file])
|
|
|
|
# Add all compression parameters
|
|
for key, value in params.items():
|
|
if key == "c:v" and value == "libx264":
|
|
# Use hardware acceleration if available
|
|
gpu_info = self.ffmpeg_mgr.gpu_info
|
|
if gpu_info["nvidia"]:
|
|
cmd.extend(["-c:v", "h264_nvenc"])
|
|
elif gpu_info["amd"]:
|
|
cmd.extend(["-c:v", "h264_amf"])
|
|
elif gpu_info["intel"]:
|
|
cmd.extend(["-c:v", "h264_qsv"])
|
|
else:
|
|
cmd.extend(["-c:v", "libx264"])
|
|
else:
|
|
cmd.extend([f"-{key}", str(value)])
|
|
|
|
# Add output file
|
|
cmd.append(compressed_file)
|
|
|
|
# Run compression in executor
|
|
logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
|
|
try:
|
|
result = await asyncio.get_event_loop().run_in_executor(
|
|
self.download_pool,
|
|
lambda: subprocess.run(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
check=True
|
|
)
|
|
)
|
|
logger.debug(f"FFmpeg output: {result.stderr.decode()}")
|
|
except subprocess.CalledProcessError as e:
|
|
error = handle_ffmpeg_error(e.stderr.decode())
|
|
logger.error(f"FFmpeg error: {e.stderr.decode()}")
|
|
raise error
|
|
|
|
if not os.path.exists(compressed_file):
|
|
raise FileNotFoundError(
|
|
"Compression completed but file not found"
|
|
)
|
|
|
|
# Verify compressed file
|
|
if not self._verify_video_file(compressed_file):
|
|
raise VideoVerificationError(
|
|
"Compressed file is not a valid video"
|
|
)
|
|
|
|
compressed_size = os.path.getsize(compressed_file)
|
|
if compressed_size <= (self.max_file_size * 1024 * 1024):
|
|
await self._safe_delete_file(original_file)
|
|
return True, compressed_file, ""
|
|
else:
|
|
await self._safe_delete_file(compressed_file)
|
|
raise CompressionError(
|
|
"Failed to compress to target size",
|
|
input_size=file_size,
|
|
target_size=self.max_file_size * 1024 * 1024
|
|
)
|
|
except (FFmpegError, VideoVerificationError, FileNotFoundError, CompressionError) as e:
|
|
if compressed_file and os.path.exists(compressed_file):
|
|
await self._safe_delete_file(compressed_file)
|
|
return False, "", str(e)
|
|
except Exception as e:
|
|
if compressed_file and os.path.exists(compressed_file):
|
|
await self._safe_delete_file(compressed_file)
|
|
logger.error(f"Compression error: {str(e)}")
|
|
return False, "", f"Compression error: {str(e)}"
|
|
else:
|
|
# Move file to final location
|
|
final_path = os.path.join(
|
|
self.download_path, os.path.basename(original_file)
|
|
)
|
|
# Use safe move with retries
|
|
success = await self._safe_move_file(original_file, final_path)
|
|
if not success:
|
|
return False, "", "Failed to move file to final location"
|
|
return True, final_path, ""
|
|
|
|
except Exception as e:
|
|
logger.error(f"Download error: {str(e)}")
|
|
return False, "", str(e)
|
|
|
|
finally:
|
|
# Clean up
|
|
async with self._downloads_lock:
|
|
self.active_downloads.pop(url, None)
|
|
|
|
try:
|
|
if original_file and os.path.exists(original_file):
|
|
await self._safe_delete_file(original_file)
|
|
if (
|
|
compressed_file
|
|
and os.path.exists(compressed_file)
|
|
and not compressed_file.startswith(self.download_path)
|
|
):
|
|
await self._safe_delete_file(compressed_file)
|
|
except Exception as e:
|
|
logger.error(f"Error during file cleanup: {str(e)}")
|
|
|
|
async def _safe_delete_file(self, file_path: str) -> bool:
|
|
"""Safely delete a file with retries"""
|
|
for attempt in range(self.FILE_OP_RETRIES):
|
|
try:
|
|
if secure_delete_file(file_path):
|
|
return True
|
|
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
|
|
except Exception as e:
|
|
logger.error(f"Delete attempt {attempt + 1} failed: {str(e)}")
|
|
if attempt == self.FILE_OP_RETRIES - 1:
|
|
return False
|
|
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
|
|
return False
|
|
|
|
async def _safe_move_file(self, src: str, dst: str) -> bool:
|
|
"""Safely move a file with retries"""
|
|
for attempt in range(self.FILE_OP_RETRIES):
|
|
try:
|
|
# Ensure destination directory exists
|
|
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
|
# Try to move the file
|
|
shutil.move(src, dst)
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Move attempt {attempt + 1} failed: {str(e)}")
|
|
if attempt == self.FILE_OP_RETRIES - 1:
|
|
return False
|
|
await asyncio.sleep(self.FILE_OP_RETRY_DELAY * (attempt + 1))
|
|
return False
|