from django.db import transaction from django.utils import timezone from django.conf import settings from django.contrib.contenttypes.models import ContentType from typing import List, Dict, Any, Optional from datetime import timedelta import logging import json import os from .models import VersionBranch, ChangeSet from .caching import VersionHistoryCache logger = logging.getLogger('version_control') class VersionCleanup: """ Manages cleanup of old version control data through archival and deletion. """ def __init__(self): self.archive_path = getattr( settings, 'VERSION_CONTROL_ARCHIVE_PATH', 'version_archives' ) self.retention_days = getattr( settings, 'VERSION_CONTROL_RETENTION_DAYS', 90 ) self.merged_retention_days = getattr( settings, 'VERSION_CONTROL_MERGED_RETENTION_DAYS', 30 ) self.ensure_archive_directory() def ensure_archive_directory(self) -> None: """Ensure archive directory exists""" if not os.path.exists(self.archive_path): os.makedirs(self.archive_path) def get_archive_filename(self, date: timezone.datetime) -> str: """Generate archive filename for a given date""" return os.path.join( self.archive_path, f'version_archive_{date.strftime("%Y%m%d_%H%M%S")}.json' ) @transaction.atomic def archive_old_changes(self, batch_size: int = 1000) -> int: """Archive and clean up old changes""" cutoff_date = timezone.now() - timedelta(days=self.retention_days) # Get changes to archive old_changes = ChangeSet.objects.filter( created_at__lt=cutoff_date, archived=False )[:batch_size] if not old_changes: return 0 # Prepare archive data archive_data = { 'timestamp': timezone.now().isoformat(), 'changes': [ { 'id': change.id, 'branch': change.branch_id, 'content_type': change.content_type_id, 'object_id': change.object_id, 'data': change.data, 'status': change.status, 'created_at': change.created_at.isoformat(), 'applied_at': change.applied_at.isoformat() if change.applied_at else None } for change in old_changes ] } # Write to archive file archive_file = self.get_archive_filename(timezone.now()) with open(archive_file, 'w') as f: json.dump(archive_data, f, indent=2) # Mark changes as archived change_ids = [change.id for change in old_changes] ChangeSet.objects.filter(id__in=change_ids).update(archived=True) logger.info(f"Archived {len(change_ids)} changes to {archive_file}") return len(change_ids) @transaction.atomic def cleanup_merged_branches(self) -> int: """Clean up old merged branches""" cutoff_date = timezone.now() - timedelta(days=self.merged_retention_days) # Find merged branches to clean up merged_branches = VersionBranch.objects.filter( is_merged=True, merged_at__lt=cutoff_date, is_protected=False ) count = 0 for branch in merged_branches: try: # Archive branch changes self.archive_branch_changes(branch) # Delete branch branch.delete() count += 1 logger.info(f"Cleaned up merged branch: {branch.name}") except Exception as e: logger.error(f"Error cleaning up branch {branch.name}: {e}") return count def archive_branch_changes(self, branch: VersionBranch) -> None: """Archive all changes for a specific branch""" changes = ChangeSet.objects.filter( branch=branch, archived=False ) if not changes: return archive_data = { 'timestamp': timezone.now().isoformat(), 'branch': { 'id': branch.id, 'name': branch.name, 'metadata': branch.metadata, 'created_at': branch.created_at.isoformat(), 'merged_at': branch.merged_at.isoformat() if branch.merged_at else None }, 'changes': [ { 'id': change.id, 'content_type': change.content_type_id, 'object_id': change.object_id, 'data': change.data, 'status': change.status, 'created_at': change.created_at.isoformat(), 'applied_at': change.applied_at.isoformat() if change.applied_at else None } for change in changes ] } # Write to archive file archive_file = self.get_archive_filename(timezone.now()) with open(archive_file, 'w') as f: json.dump(archive_data, f, indent=2) # Mark changes as archived changes.update(archived=True) @transaction.atomic def cleanup_inactive_branches(self, days: int = 60) -> int: """Clean up inactive branches""" cutoff_date = timezone.now() - timedelta(days=days) # Find inactive branches inactive_branches = VersionBranch.objects.filter( is_active=True, is_protected=False, updated_at__lt=cutoff_date ) count = 0 for branch in inactive_branches: try: # Archive branch changes self.archive_branch_changes(branch) # Deactivate branch branch.is_active = False branch.save() count += 1 logger.info(f"Deactivated inactive branch: {branch.name}") except Exception as e: logger.error(f"Error deactivating branch {branch.name}: {e}") return count def cleanup_orphaned_changes(self) -> int: """Clean up changes without valid content objects""" count = 0 for change in ChangeSet.objects.filter(archived=False): try: # Try to get the related object obj = change.content_type.get_object_for_this_type( pk=change.object_id) if obj is None: self.archive_change(change) count += 1 except Exception: # If object doesn't exist, archive the change self.archive_change(change) count += 1 logger.info(f"Cleaned up {count} orphaned changes") return count def archive_change(self, change: ChangeSet) -> None: """Archive a single change""" archive_data = { 'timestamp': timezone.now().isoformat(), 'changes': [{ 'id': change.id, 'branch': change.branch_id, 'content_type': change.content_type_id, 'object_id': change.object_id, 'data': change.data, 'status': change.status, 'created_at': change.created_at.isoformat(), 'applied_at': change.applied_at.isoformat() if change.applied_at else None }] } # Write to archive file archive_file = self.get_archive_filename(timezone.now()) with open(archive_file, 'w') as f: json.dump(archive_data, f, indent=2) # Mark change as archived change.archived = True change.save() def run_maintenance(self) -> Dict[str, int]: """Run all cleanup operations""" results = { 'archived_changes': self.archive_old_changes(), 'cleaned_branches': self.cleanup_merged_branches(), 'deactivated_branches': self.cleanup_inactive_branches(), 'cleaned_orphans': self.cleanup_orphaned_changes() } logger.info("Version control maintenance completed", extra=results) return results