import asyncio import json from typing import List, Dict, Any, Optional from django.core.cache import cache from django.db import models from django.utils import timezone from concurrent.futures import ThreadPoolExecutor from .models import VersionTag, ChangeSet class StructuredDiff: def __init__(self, version1: str, version2: str): self.version1 = version1 self.version2 = version2 self.changes: List[Dict[str, Any]] = [] self.impact_score = 0.0 self.computation_time = 0.0 self.timestamp = timezone.now() def to_dict(self) -> Dict[str, Any]: return { 'version1': self.version1, 'version2': self.version2, 'changes': self.changes, 'impact_score': self.impact_score, 'computation_time': self.computation_time, 'timestamp': self.timestamp.isoformat() } @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'StructuredDiff': diff = cls(data['version1'], data['version2']) diff.changes = data['changes'] diff.impact_score = data['impact_score'] diff.computation_time = data['computation_time'] diff.timestamp = timezone.datetime.fromisoformat(data['timestamp']) return diff class ComparisonEngine: """Handles version comparison operations with background processing and caching""" def __init__(self, chunk_size: int = 10485760): # 10MB default chunk size self.chunk_size = chunk_size self.executor = ThreadPoolExecutor(max_workers=4) self.cache_ttl = 300 # 5 minutes cache TTL async def compare_versions(self, versions: List[str]) -> List[StructuredDiff]: """ Compare multiple versions, processing in background and using cache Args: versions: List of version identifiers to compare Returns: List of StructuredDiff objects with comparison results """ if len(versions) < 2: raise ValueError("At least two versions required for comparison") results: List[StructuredDiff] = [] cache_misses = [] # Check cache first for i in range(len(versions) - 1): for j in range(i + 1, len(versions)): cache_key = self._get_cache_key(versions[i], versions[j]) cached_result = cache.get(cache_key) if cached_result: results.append(StructuredDiff.from_dict(json.loads(cached_result))) else: cache_misses.append((versions[i], versions[j])) # Process cache misses in background if cache_misses: comparison_tasks = [ self._compare_version_pair(v1, v2) for v1, v2 in cache_misses ] new_results = await asyncio.gather(*comparison_tasks) results.extend(new_results) return sorted( results, key=lambda x: (x.version1, x.version2) ) def calculate_impact_score(self, diffs: List[StructuredDiff]) -> float: """ Calculate impact score for a set of diffs Args: diffs: List of StructuredDiff objects Returns: Float impact score (0-1) """ if not diffs: return 0.0 total_score = 0.0 weights = { 'file_count': 0.3, 'change_size': 0.3, 'structural_impact': 0.4 } for diff in diffs: # File count impact file_count = len(set(c['file'] for c in diff.changes)) file_score = min(file_count / 100, 1.0) # Normalize to max 100 files # Change size impact total_changes = sum( len(c.get('additions', [])) + len(c.get('deletions', [])) for c in diff.changes ) size_score = min(total_changes / 1000, 1.0) # Normalize to max 1000 lines # Structural impact (e.g., function/class changes) structural_changes = sum( 1 for c in diff.changes if c.get('type') in ['function', 'class', 'schema'] ) structural_score = min(structural_changes / 10, 1.0) # Normalize to max 10 structural changes # Weighted average diff.impact_score = ( weights['file_count'] * file_score + weights['change_size'] * size_score + weights['structural_impact'] * structural_score ) total_score += diff.impact_score return total_score / len(diffs) async def _compare_version_pair(self, version1: str, version2: str) -> StructuredDiff: """Compare two versions in background""" start_time = timezone.now() # Create diff structure diff = StructuredDiff(version1, version2) try: # Get version data v1_tag = await self._get_version_tag(version1) v2_tag = await self._get_version_tag(version2) if not v1_tag or not v2_tag: raise ValueError("Version tag not found") # Process in chunks if needed changes = await self._process_version_changes(v1_tag, v2_tag) diff.changes = changes # Calculate impact score diff.impact_score = self.calculate_impact_score([diff]) # Store in cache cache_key = self._get_cache_key(version1, version2) cache.set( cache_key, json.dumps(diff.to_dict()), self.cache_ttl ) except Exception as e: diff.changes = [{'error': str(e)}] diff.impact_score = 0.0 diff.computation_time = (timezone.now() - start_time).total_seconds() return diff async def _get_version_tag(self, version: str) -> Optional[VersionTag]: """Get version tag by identifier""" loop = asyncio.get_event_loop() return await loop.run_in_executor( self.executor, lambda: VersionTag.objects.filter(name=version).first() ) async def _process_version_changes( self, v1_tag: VersionTag, v2_tag: VersionTag ) -> List[Dict[str, Any]]: """Process changes between versions in chunks""" changes = [] # Get changesets between versions changesets = await self._get_changesets_between(v1_tag, v2_tag) for changeset in changesets: # Process each change in chunks if needed change_data = await self._process_changeset(changeset) changes.extend(change_data) return changes async def _get_changesets_between( self, v1_tag: VersionTag, v2_tag: VersionTag ) -> List[ChangeSet]: """Get all changesets between two versions""" loop = asyncio.get_event_loop() return await loop.run_in_executor( self.executor, lambda: list(ChangeSet.objects.filter( branch=v2_tag.branch, created_at__gt=v1_tag.created_at, created_at__lte=v2_tag.created_at, status='applied' ).order_by('created_at')) ) async def _process_changeset(self, changeset: ChangeSet) -> List[Dict[str, Any]]: """Process individual changeset for comparison""" loop = asyncio.get_event_loop() def process(): changes = [] instance = changeset.historical_instance if instance: # Get changes from historical record diff = instance.diff_against_previous if diff: for field, values in diff.items(): change = { 'type': 'field', 'file': f"{instance._meta.model_name}.{field}", 'old_value': values['old'], 'new_value': values['new'] } changes.append(change) return changes return await loop.run_in_executor(self.executor, process) def _get_cache_key(self, version1: str, version2: str) -> str: """Generate cache key for version comparison""" return f"version_diff:{version1}:{version2}"