mirror of
https://github.com/pacnpal/thrillwiki_django_no_react.git
synced 2025-12-20 18:31:09 -05:00
237 lines
8.4 KiB
Python
237 lines
8.4 KiB
Python
import asyncio
|
|
import json
|
|
from typing import List, Dict, Any, Optional
|
|
from django.core.cache import cache
|
|
from django.db import models
|
|
from django.utils import timezone
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from .models import VersionTag, ChangeSet
|
|
|
|
class StructuredDiff:
|
|
def __init__(self, version1: str, version2: str):
|
|
self.version1 = version1
|
|
self.version2 = version2
|
|
self.changes: List[Dict[str, Any]] = []
|
|
self.impact_score = 0.0
|
|
self.computation_time = 0.0
|
|
self.timestamp = timezone.now()
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'version1': self.version1,
|
|
'version2': self.version2,
|
|
'changes': self.changes,
|
|
'impact_score': self.impact_score,
|
|
'computation_time': self.computation_time,
|
|
'timestamp': self.timestamp.isoformat()
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> 'StructuredDiff':
|
|
diff = cls(data['version1'], data['version2'])
|
|
diff.changes = data['changes']
|
|
diff.impact_score = data['impact_score']
|
|
diff.computation_time = data['computation_time']
|
|
diff.timestamp = timezone.datetime.fromisoformat(data['timestamp'])
|
|
return diff
|
|
|
|
class ComparisonEngine:
|
|
"""Handles version comparison operations with background processing and caching"""
|
|
|
|
def __init__(self, chunk_size: int = 10485760): # 10MB default chunk size
|
|
self.chunk_size = chunk_size
|
|
self.executor = ThreadPoolExecutor(max_workers=4)
|
|
self.cache_ttl = 300 # 5 minutes cache TTL
|
|
|
|
async def compare_versions(self, versions: List[str]) -> List[StructuredDiff]:
|
|
"""
|
|
Compare multiple versions, processing in background and using cache
|
|
Args:
|
|
versions: List of version identifiers to compare
|
|
Returns:
|
|
List of StructuredDiff objects with comparison results
|
|
"""
|
|
if len(versions) < 2:
|
|
raise ValueError("At least two versions required for comparison")
|
|
|
|
results: List[StructuredDiff] = []
|
|
cache_misses = []
|
|
|
|
# Check cache first
|
|
for i in range(len(versions) - 1):
|
|
for j in range(i + 1, len(versions)):
|
|
cache_key = self._get_cache_key(versions[i], versions[j])
|
|
cached_result = cache.get(cache_key)
|
|
|
|
if cached_result:
|
|
results.append(StructuredDiff.from_dict(json.loads(cached_result)))
|
|
else:
|
|
cache_misses.append((versions[i], versions[j]))
|
|
|
|
# Process cache misses in background
|
|
if cache_misses:
|
|
comparison_tasks = [
|
|
self._compare_version_pair(v1, v2)
|
|
for v1, v2 in cache_misses
|
|
]
|
|
new_results = await asyncio.gather(*comparison_tasks)
|
|
results.extend(new_results)
|
|
|
|
return sorted(
|
|
results,
|
|
key=lambda x: (x.version1, x.version2)
|
|
)
|
|
|
|
def calculate_impact_score(self, diffs: List[StructuredDiff]) -> float:
|
|
"""
|
|
Calculate impact score for a set of diffs
|
|
Args:
|
|
diffs: List of StructuredDiff objects
|
|
Returns:
|
|
Float impact score (0-1)
|
|
"""
|
|
if not diffs:
|
|
return 0.0
|
|
|
|
total_score = 0.0
|
|
weights = {
|
|
'file_count': 0.3,
|
|
'change_size': 0.3,
|
|
'structural_impact': 0.4
|
|
}
|
|
|
|
for diff in diffs:
|
|
# File count impact
|
|
file_count = len(set(c['file'] for c in diff.changes))
|
|
file_score = min(file_count / 100, 1.0) # Normalize to max 100 files
|
|
|
|
# Change size impact
|
|
total_changes = sum(
|
|
len(c.get('additions', [])) + len(c.get('deletions', []))
|
|
for c in diff.changes
|
|
)
|
|
size_score = min(total_changes / 1000, 1.0) # Normalize to max 1000 lines
|
|
|
|
# Structural impact (e.g., function/class changes)
|
|
structural_changes = sum(
|
|
1 for c in diff.changes
|
|
if c.get('type') in ['function', 'class', 'schema']
|
|
)
|
|
structural_score = min(structural_changes / 10, 1.0) # Normalize to max 10 structural changes
|
|
|
|
# Weighted average
|
|
diff.impact_score = (
|
|
weights['file_count'] * file_score +
|
|
weights['change_size'] * size_score +
|
|
weights['structural_impact'] * structural_score
|
|
)
|
|
total_score += diff.impact_score
|
|
|
|
return total_score / len(diffs)
|
|
|
|
async def _compare_version_pair(self, version1: str, version2: str) -> StructuredDiff:
|
|
"""Compare two versions in background"""
|
|
start_time = timezone.now()
|
|
|
|
# Create diff structure
|
|
diff = StructuredDiff(version1, version2)
|
|
|
|
try:
|
|
# Get version data
|
|
v1_tag = await self._get_version_tag(version1)
|
|
v2_tag = await self._get_version_tag(version2)
|
|
|
|
if not v1_tag or not v2_tag:
|
|
raise ValueError("Version tag not found")
|
|
|
|
# Process in chunks if needed
|
|
changes = await self._process_version_changes(v1_tag, v2_tag)
|
|
diff.changes = changes
|
|
|
|
# Calculate impact score
|
|
diff.impact_score = self.calculate_impact_score([diff])
|
|
|
|
# Store in cache
|
|
cache_key = self._get_cache_key(version1, version2)
|
|
cache.set(
|
|
cache_key,
|
|
json.dumps(diff.to_dict()),
|
|
self.cache_ttl
|
|
)
|
|
|
|
except Exception as e:
|
|
diff.changes = [{'error': str(e)}]
|
|
diff.impact_score = 0.0
|
|
|
|
diff.computation_time = (timezone.now() - start_time).total_seconds()
|
|
return diff
|
|
|
|
async def _get_version_tag(self, version: str) -> Optional[VersionTag]:
|
|
"""Get version tag by identifier"""
|
|
loop = asyncio.get_event_loop()
|
|
return await loop.run_in_executor(
|
|
self.executor,
|
|
lambda: VersionTag.objects.filter(name=version).first()
|
|
)
|
|
|
|
async def _process_version_changes(
|
|
self,
|
|
v1_tag: VersionTag,
|
|
v2_tag: VersionTag
|
|
) -> List[Dict[str, Any]]:
|
|
"""Process changes between versions in chunks"""
|
|
changes = []
|
|
|
|
# Get changesets between versions
|
|
changesets = await self._get_changesets_between(v1_tag, v2_tag)
|
|
|
|
for changeset in changesets:
|
|
# Process each change in chunks if needed
|
|
change_data = await self._process_changeset(changeset)
|
|
changes.extend(change_data)
|
|
|
|
return changes
|
|
|
|
async def _get_changesets_between(
|
|
self,
|
|
v1_tag: VersionTag,
|
|
v2_tag: VersionTag
|
|
) -> List[ChangeSet]:
|
|
"""Get all changesets between two versions"""
|
|
loop = asyncio.get_event_loop()
|
|
return await loop.run_in_executor(
|
|
self.executor,
|
|
lambda: list(ChangeSet.objects.filter(
|
|
branch=v2_tag.branch,
|
|
created_at__gt=v1_tag.created_at,
|
|
created_at__lte=v2_tag.created_at,
|
|
status='applied'
|
|
).order_by('created_at'))
|
|
)
|
|
|
|
async def _process_changeset(self, changeset: ChangeSet) -> List[Dict[str, Any]]:
|
|
"""Process individual changeset for comparison"""
|
|
loop = asyncio.get_event_loop()
|
|
|
|
def process():
|
|
changes = []
|
|
instance = changeset.historical_instance
|
|
if instance:
|
|
# Get changes from historical record
|
|
diff = instance.diff_against_previous
|
|
if diff:
|
|
for field, values in diff.items():
|
|
change = {
|
|
'type': 'field',
|
|
'file': f"{instance._meta.model_name}.{field}",
|
|
'old_value': values['old'],
|
|
'new_value': values['new']
|
|
}
|
|
changes.append(change)
|
|
return changes
|
|
|
|
return await loop.run_in_executor(self.executor, process)
|
|
|
|
def _get_cache_key(self, version1: str, version2: str) -> str:
|
|
"""Generate cache key for version comparison"""
|
|
return f"version_diff:{version1}:{version2}" |