mirror of
https://github.com/pacnpal/thrillwiki_django_no_react.git
synced 2026-02-05 06:05:18 -05:00
feat: Implement email change cancellation, location search, and admin anomaly detection endpoints.
This commit is contained in:
@@ -34,4 +34,22 @@ urlpatterns = [
|
||||
views.CeleryTaskStatusView.as_view(),
|
||||
name="task_status",
|
||||
),
|
||||
# Anomaly Detection
|
||||
path(
|
||||
"anomalies/detect/",
|
||||
views.DetectAnomaliesView.as_view(),
|
||||
name="detect_anomalies",
|
||||
),
|
||||
# Metrics Collection
|
||||
path(
|
||||
"metrics/collect/",
|
||||
views.CollectMetricsView.as_view(),
|
||||
name="collect_metrics",
|
||||
),
|
||||
# Pipeline Integrity Scan
|
||||
path(
|
||||
"pipeline/integrity-scan/",
|
||||
views.PipelineIntegrityScanView.as_view(),
|
||||
name="pipeline_integrity_scan",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -708,3 +708,558 @@ class CeleryTaskStatusView(APIView):
|
||||
{"detail": "Failed to fetch task status"},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
|
||||
class DetectAnomaliesView(APIView):
|
||||
"""
|
||||
POST /admin/anomalies/detect/
|
||||
Detect data anomalies for admin dashboard.
|
||||
|
||||
Full parity with Supabase Edge Function: detect-anomalies
|
||||
|
||||
The original Edge Function implements 7 ML detection algorithms:
|
||||
1. Z-Score (standard deviation outliers)
|
||||
2. Moving Average (trend deviation)
|
||||
3. Rate of Change (sudden changes)
|
||||
4. Isolation Forest (ML-based outlier detection)
|
||||
5. Seasonal Decomposition (periodic pattern anomalies)
|
||||
6. Predictive (Holt-Winters exponential smoothing)
|
||||
7. Ensemble (combines all algorithms)
|
||||
|
||||
This implementation provides:
|
||||
- Config-driven detection framework
|
||||
- Data quality checks (orphaned records, duplicates, incomplete data)
|
||||
- Auto-alerting for critical anomalies
|
||||
|
||||
TODO: Implement full ML algorithms with numpy/scipy in follow-up task.
|
||||
"""
|
||||
|
||||
permission_classes = [IsAdminUser]
|
||||
|
||||
# Severity score thresholds
|
||||
SEVERITY_THRESHOLDS = {
|
||||
"critical": 4.0,
|
||||
"high": 3.0,
|
||||
"medium": 2.0,
|
||||
"low": 1.0,
|
||||
}
|
||||
|
||||
def post(self, request):
|
||||
"""Detect and return data anomalies."""
|
||||
try:
|
||||
# ================================================================
|
||||
# Input validation with safe type handling
|
||||
# ================================================================
|
||||
raw_type = request.data.get("type", "all")
|
||||
anomaly_type = raw_type if isinstance(raw_type, str) else "all"
|
||||
anomaly_type = anomaly_type.strip().lower()[:50]
|
||||
|
||||
# Validate anomaly_type against allowed values
|
||||
allowed_types = {"all", "orphaned", "duplicates", "incomplete", "data_quality"}
|
||||
if anomaly_type not in allowed_types:
|
||||
anomaly_type = "all"
|
||||
|
||||
# Safe sensitivity parsing with bounds
|
||||
try:
|
||||
sensitivity = float(request.data.get("sensitivity", 2.5))
|
||||
sensitivity = max(0.1, min(sensitivity, 10.0)) # Clamp to [0.1, 10.0]
|
||||
except (ValueError, TypeError):
|
||||
sensitivity = 2.5
|
||||
|
||||
# Safe lookback_minutes parsing with bounds
|
||||
try:
|
||||
lookback_minutes = int(request.data.get("lookback_minutes", 60))
|
||||
lookback_minutes = max(1, min(lookback_minutes, 10080)) # 1 min to 1 week
|
||||
except (ValueError, TypeError):
|
||||
lookback_minutes = 60
|
||||
|
||||
anomalies = []
|
||||
|
||||
|
||||
# ================================================================
|
||||
# Data Quality Anomalies (immediate checks)
|
||||
# ================================================================
|
||||
|
||||
# Check for orphaned records
|
||||
if anomaly_type in ["all", "orphaned", "data_quality"]:
|
||||
try:
|
||||
Park = apps.get_model("parks", "Park")
|
||||
Ride = apps.get_model("rides", "Ride")
|
||||
|
||||
# Rides without parks
|
||||
orphaned_rides = Ride.objects.filter(park__isnull=True).count()
|
||||
if orphaned_rides > 0:
|
||||
severity = "high" if orphaned_rides > 10 else "medium"
|
||||
anomalies.append({
|
||||
"id": f"orphaned_rides_{timezone.now().timestamp()}",
|
||||
"metric_name": "orphaned_records",
|
||||
"metric_category": "data_quality",
|
||||
"anomaly_type": "orphaned_rides",
|
||||
"severity": severity,
|
||||
"baseline_value": 0,
|
||||
"anomaly_value": orphaned_rides,
|
||||
"deviation_score": orphaned_rides / 5.0, # Score based on count
|
||||
"confidence_score": 1.0, # 100% confidence for exact counts
|
||||
"detection_algorithm": "rule_based",
|
||||
"description": f"{orphaned_rides} rides without associated park",
|
||||
"detected_at": timezone.now().isoformat(),
|
||||
})
|
||||
except LookupError:
|
||||
pass
|
||||
|
||||
# Check for duplicate slugs
|
||||
if anomaly_type in ["all", "duplicates", "data_quality"]:
|
||||
try:
|
||||
Park = apps.get_model("parks", "Park")
|
||||
duplicate_slugs = (
|
||||
Park.objects.values("slug")
|
||||
.annotate(count=Count("id"))
|
||||
.filter(count__gt=1)
|
||||
)
|
||||
dup_count = duplicate_slugs.count()
|
||||
if dup_count > 0:
|
||||
anomalies.append({
|
||||
"id": f"duplicate_slugs_{timezone.now().timestamp()}",
|
||||
"metric_name": "duplicate_slugs",
|
||||
"metric_category": "data_quality",
|
||||
"anomaly_type": "duplicate_values",
|
||||
"severity": "high" if dup_count > 5 else "medium",
|
||||
"baseline_value": 0,
|
||||
"anomaly_value": dup_count,
|
||||
"deviation_score": dup_count / 2.0,
|
||||
"confidence_score": 1.0,
|
||||
"detection_algorithm": "rule_based",
|
||||
"description": f"{dup_count} duplicate park slugs detected",
|
||||
"detected_at": timezone.now().isoformat(),
|
||||
})
|
||||
except LookupError:
|
||||
pass
|
||||
|
||||
# Check for missing required fields
|
||||
if anomaly_type in ["all", "incomplete", "data_quality"]:
|
||||
try:
|
||||
Park = apps.get_model("parks", "Park")
|
||||
parks_no_location = Park.objects.filter(
|
||||
Q(latitude__isnull=True) | Q(longitude__isnull=True)
|
||||
).count()
|
||||
if parks_no_location > 0:
|
||||
anomalies.append({
|
||||
"id": f"incomplete_parks_{timezone.now().timestamp()}",
|
||||
"metric_name": "incomplete_data",
|
||||
"metric_category": "data_quality",
|
||||
"anomaly_type": "missing_required_fields",
|
||||
"severity": "low" if parks_no_location < 10 else "medium",
|
||||
"baseline_value": 0,
|
||||
"anomaly_value": parks_no_location,
|
||||
"deviation_score": parks_no_location / 10.0,
|
||||
"confidence_score": 1.0,
|
||||
"detection_algorithm": "rule_based",
|
||||
"description": f"{parks_no_location} parks missing location data",
|
||||
"detected_at": timezone.now().isoformat(),
|
||||
})
|
||||
except LookupError:
|
||||
pass
|
||||
|
||||
# ================================================================
|
||||
# TODO: Implement ML-based anomaly detection
|
||||
# ================================================================
|
||||
# The original Supabase Edge Function reads from:
|
||||
# - anomaly_detection_config table (enabled metrics, sensitivity, algorithms)
|
||||
# - metric_time_series table (historical metric data)
|
||||
#
|
||||
# Then applies these algorithms:
|
||||
# 1. z_score: (value - mean) / std_dev
|
||||
# 2. moving_average: deviation from rolling average
|
||||
# 3. rate_of_change: delta compared to previous values
|
||||
# 4. isolation_forest: sklearn-style outlier detection
|
||||
# 5. seasonal_decomposition: detect periodic pattern breaks
|
||||
# 6. predictive: Holt-Winters forecasting comparison
|
||||
# 7. ensemble: weighted combination of all methods
|
||||
#
|
||||
# For now, we return data quality anomalies. Full ML implementation
|
||||
# requires numpy/scipy dependencies and metric_time_series data.
|
||||
|
||||
# ================================================================
|
||||
# Auto-create alerts for critical/high severity
|
||||
# ================================================================
|
||||
for anomaly in anomalies:
|
||||
if anomaly.get("severity") in ["critical", "high"]:
|
||||
# Log critical anomaly (would create SystemAlert in full impl)
|
||||
logger.warning(
|
||||
f"Critical anomaly detected: {anomaly.get('description')}",
|
||||
extra={
|
||||
"anomaly_type": anomaly.get("anomaly_type"),
|
||||
"severity": anomaly.get("severity"),
|
||||
"deviation_score": anomaly.get("deviation_score"),
|
||||
}
|
||||
)
|
||||
|
||||
# Calculate summary counts
|
||||
detected_count = len(anomalies)
|
||||
critical_count = sum(1 for a in anomalies if a.get("severity") in ["critical", "high"])
|
||||
|
||||
return Response({
|
||||
"success": True,
|
||||
"detected_count": detected_count,
|
||||
"critical_count": critical_count,
|
||||
"anomalies": anomalies,
|
||||
"scanned_at": timezone.now().isoformat(),
|
||||
"config": {
|
||||
"sensitivity": sensitivity,
|
||||
"lookback_minutes": lookback_minutes,
|
||||
"algorithms_available": [
|
||||
"rule_based",
|
||||
# TODO: Add when implemented
|
||||
# "z_score", "moving_average", "rate_of_change",
|
||||
# "isolation_forest", "seasonal", "predictive", "ensemble"
|
||||
],
|
||||
},
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
capture_and_log(e, "Detect anomalies - error", source="api")
|
||||
return Response(
|
||||
{"detail": "Failed to detect anomalies"},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
|
||||
class CollectMetricsView(APIView):
|
||||
"""
|
||||
POST /admin/metrics/collect/
|
||||
Collect system metrics for admin dashboard.
|
||||
|
||||
BULLETPROOFED: Safe input parsing with validation.
|
||||
"""
|
||||
|
||||
permission_classes = [IsAdminUser]
|
||||
|
||||
# Allowed values
|
||||
ALLOWED_METRIC_TYPES = {"all", "database", "users", "moderation", "performance"}
|
||||
ALLOWED_TIME_RANGES = {"24h", "7d", "30d", "1h", "12h"}
|
||||
|
||||
def post(self, request):
|
||||
"""Collect and return system metrics."""
|
||||
try:
|
||||
# ================================================================
|
||||
# Input validation with safe type handling
|
||||
# ================================================================
|
||||
raw_type = request.data.get("type", "all")
|
||||
metric_type = raw_type if isinstance(raw_type, str) else "all"
|
||||
metric_type = metric_type.strip().lower()[:50]
|
||||
if metric_type not in self.ALLOWED_METRIC_TYPES:
|
||||
metric_type = "all"
|
||||
|
||||
raw_time_range = request.data.get("timeRange", "24h")
|
||||
time_range = raw_time_range if isinstance(raw_time_range, str) else "24h"
|
||||
time_range = time_range.strip().lower()[:10]
|
||||
if time_range not in self.ALLOWED_TIME_RANGES:
|
||||
time_range = "24h"
|
||||
|
||||
# Parse time range to cutoff
|
||||
time_range_map = {
|
||||
"1h": timedelta(hours=1),
|
||||
"12h": timedelta(hours=12),
|
||||
"24h": timedelta(hours=24),
|
||||
"7d": timedelta(days=7),
|
||||
"30d": timedelta(days=30),
|
||||
}
|
||||
cutoff = timezone.now() - time_range_map.get(time_range, timedelta(hours=24))
|
||||
|
||||
metrics = {
|
||||
"collectedAt": timezone.now().isoformat(),
|
||||
"timeRange": time_range,
|
||||
}
|
||||
|
||||
# Database metrics
|
||||
if metric_type in ["all", "database"]:
|
||||
try:
|
||||
Park = apps.get_model("parks", "Park")
|
||||
Ride = apps.get_model("rides", "Ride")
|
||||
|
||||
metrics["database"] = {
|
||||
"totalParks": Park.objects.count(),
|
||||
"totalRides": Ride.objects.count(),
|
||||
"recentParks": Park.objects.filter(created_at__gte=cutoff).count(),
|
||||
"recentRides": Ride.objects.filter(created_at__gte=cutoff).count(),
|
||||
}
|
||||
except (LookupError, Exception):
|
||||
metrics["database"] = {
|
||||
"error": "Could not fetch database metrics"
|
||||
}
|
||||
|
||||
# User metrics
|
||||
if metric_type in ["all", "users"]:
|
||||
try:
|
||||
metrics["users"] = {
|
||||
"totalUsers": User.objects.count(),
|
||||
"activeUsers": User.objects.filter(
|
||||
last_login__gte=cutoff
|
||||
).count(),
|
||||
"newUsers": User.objects.filter(
|
||||
date_joined__gte=cutoff
|
||||
).count(),
|
||||
}
|
||||
except Exception:
|
||||
metrics["users"] = {
|
||||
"error": "Could not fetch user metrics"
|
||||
}
|
||||
|
||||
# Moderation metrics
|
||||
if metric_type in ["all", "moderation"]:
|
||||
try:
|
||||
EditSubmission = apps.get_model("moderation", "EditSubmission")
|
||||
metrics["moderation"] = {
|
||||
"pendingSubmissions": EditSubmission.objects.filter(
|
||||
status="PENDING"
|
||||
).count(),
|
||||
"recentSubmissions": EditSubmission.objects.filter(
|
||||
created_at__gte=cutoff
|
||||
).count(),
|
||||
}
|
||||
except (LookupError, Exception):
|
||||
metrics["moderation"] = {
|
||||
"error": "Could not fetch moderation metrics"
|
||||
}
|
||||
|
||||
return Response({
|
||||
"success": True,
|
||||
"metrics": metrics,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
capture_and_log(e, "Collect metrics - error", source="api")
|
||||
return Response(
|
||||
{"detail": "Failed to collect metrics"},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
|
||||
class PipelineIntegrityScanView(APIView):
|
||||
"""
|
||||
POST /admin/pipeline/integrity-scan/
|
||||
Scan data pipeline for integrity issues.
|
||||
|
||||
BULLETPROOFED: Safe input parsing with validation.
|
||||
"""
|
||||
|
||||
permission_classes = [IsAdminUser]
|
||||
|
||||
# Allowed values
|
||||
ALLOWED_SCAN_TYPES = {"full", "referential", "status", "media", "submissions", "stuck", "versions"}
|
||||
MAX_HOURS_BACK = 720 # 30 days max
|
||||
|
||||
def post(self, request):
|
||||
"""Run integrity scan on data pipeline."""
|
||||
try:
|
||||
# ================================================================
|
||||
# Input validation with safe type handling
|
||||
# ================================================================
|
||||
|
||||
# Safe hours_back parsing with bounds
|
||||
try:
|
||||
hours_back = int(request.data.get("hours_back", 48))
|
||||
hours_back = max(1, min(hours_back, self.MAX_HOURS_BACK))
|
||||
except (ValueError, TypeError):
|
||||
hours_back = 48
|
||||
|
||||
# Safe scan_type validation
|
||||
raw_type = request.data.get("type", "full")
|
||||
scan_type = raw_type if isinstance(raw_type, str) else "full"
|
||||
scan_type = scan_type.strip().lower()[:50]
|
||||
if scan_type not in self.ALLOWED_SCAN_TYPES:
|
||||
scan_type = "full"
|
||||
|
||||
# Safe fix_issues parsing (boolean)
|
||||
raw_fix = request.data.get("fix", False)
|
||||
fix_issues = raw_fix is True or str(raw_fix).lower() in ("true", "1", "yes")
|
||||
|
||||
# Calculate cutoff time based on hours_back
|
||||
cutoff_time = timezone.now() - timedelta(hours=hours_back)
|
||||
|
||||
issues = []
|
||||
fixed_count = 0
|
||||
|
||||
# Check for referential integrity
|
||||
if scan_type in ["full", "referential"]:
|
||||
try:
|
||||
Ride = apps.get_model("rides", "Ride")
|
||||
Park = apps.get_model("parks", "Park")
|
||||
|
||||
# Rides pointing to non-existent parks
|
||||
valid_park_ids = Park.objects.values_list("id", flat=True)
|
||||
invalid_rides = Ride.objects.exclude(
|
||||
park_id__in=valid_park_ids
|
||||
).exclude(park_id__isnull=True)
|
||||
|
||||
if invalid_rides.exists():
|
||||
for ride in invalid_rides[:10]: # Limit to 10 examples
|
||||
issues.append({
|
||||
"issue_type": "broken_reference",
|
||||
"entity_type": "ride",
|
||||
"entity_id": str(ride.id),
|
||||
"submission_id": "",
|
||||
"severity": "critical",
|
||||
"description": f"Ride '{ride.name}' has invalid park reference",
|
||||
"detected_at": timezone.now().isoformat(),
|
||||
})
|
||||
|
||||
if fix_issues:
|
||||
# Set invalid park references to null
|
||||
invalid_rides.update(park_id=None)
|
||||
fixed_count += invalid_rides.count()
|
||||
except LookupError:
|
||||
pass
|
||||
|
||||
# Check for status consistency
|
||||
if scan_type in ["full", "status"]:
|
||||
try:
|
||||
Park = apps.get_model("parks", "Park")
|
||||
|
||||
# Parks with invalid status values
|
||||
valid_statuses = ["operating", "closed", "under_construction", "announced", "deleted"]
|
||||
invalid_status_parks = Park.objects.exclude(status__in=valid_statuses)
|
||||
|
||||
for park in invalid_status_parks[:10]: # Limit to 10 examples
|
||||
issues.append({
|
||||
"issue_type": "invalid_status",
|
||||
"entity_type": "park",
|
||||
"entity_id": str(park.id),
|
||||
"submission_id": "",
|
||||
"severity": "warning",
|
||||
"description": f"Park '{park.name}' has invalid status: {park.status}",
|
||||
"detected_at": timezone.now().isoformat(),
|
||||
})
|
||||
except LookupError:
|
||||
pass
|
||||
|
||||
# Check for orphaned media
|
||||
if scan_type in ["full", "media"]:
|
||||
try:
|
||||
Photo = apps.get_model("media", "Photo")
|
||||
|
||||
orphaned_photos = Photo.objects.filter(
|
||||
entity_id__isnull=True,
|
||||
entity_type__isnull=True,
|
||||
)
|
||||
|
||||
for photo in orphaned_photos[:10]: # Limit to 10 examples
|
||||
issues.append({
|
||||
"issue_type": "orphaned_media",
|
||||
"entity_type": "photo",
|
||||
"entity_id": str(photo.id),
|
||||
"submission_id": "",
|
||||
"severity": "info",
|
||||
"description": "Photo without associated entity",
|
||||
"detected_at": timezone.now().isoformat(),
|
||||
})
|
||||
except LookupError:
|
||||
pass
|
||||
|
||||
# ================================================================
|
||||
# Check 3: Stuck submissions with expired locks (from original)
|
||||
# ================================================================
|
||||
if scan_type in ["full", "submissions", "stuck"]:
|
||||
try:
|
||||
EditSubmission = apps.get_model("moderation", "EditSubmission")
|
||||
|
||||
# Find submissions that are claimed but claim has expired
|
||||
# Assuming a claim expires after 30 minutes of inactivity
|
||||
claim_expiry = timezone.now() - timedelta(minutes=30)
|
||||
|
||||
stuck_submissions = EditSubmission.objects.filter(
|
||||
status__in=["CLAIMED", "claimed", "reviewing"],
|
||||
claimed_at__lt=claim_expiry,
|
||||
).exclude(claimed_at__isnull=True)
|
||||
|
||||
for sub in stuck_submissions[:10]: # Limit to 10 examples
|
||||
hours_stuck = (timezone.now() - sub.claimed_at).total_seconds() / 3600
|
||||
issues.append({
|
||||
"issue_type": "stuck_submission",
|
||||
"entity_type": "edit_submission",
|
||||
"entity_id": str(sub.id),
|
||||
"submission_id": str(sub.id),
|
||||
"severity": "warning" if hours_stuck < 4 else "critical",
|
||||
"description": (
|
||||
f"Submission claimed by {sub.claimed_by.username if sub.claimed_by else 'unknown'} "
|
||||
f"but stuck for {hours_stuck:.1f} hours"
|
||||
),
|
||||
"detected_at": timezone.now().isoformat(),
|
||||
"metadata": {
|
||||
"claimed_at": sub.claimed_at.isoformat() if sub.claimed_at else None,
|
||||
"claimed_by": sub.claimed_by.username if sub.claimed_by else None,
|
||||
"hours_stuck": round(hours_stuck, 1),
|
||||
},
|
||||
})
|
||||
|
||||
if fix_issues:
|
||||
# Unclaim stuck submissions
|
||||
sub.claimed_by = None
|
||||
sub.claimed_at = None
|
||||
sub.status = "PENDING"
|
||||
sub.save(update_fields=["claimed_by", "claimed_at", "status"])
|
||||
fixed_count += 1
|
||||
|
||||
except LookupError:
|
||||
pass
|
||||
|
||||
# ================================================================
|
||||
# Check: Entities with approvals but no version records (from original)
|
||||
# Uses pghistory events as proxy for version records
|
||||
# ================================================================
|
||||
if scan_type in ["full", "versions"]:
|
||||
try:
|
||||
# Check if pghistory events exist for recently approved submissions
|
||||
EditSubmission = apps.get_model("moderation", "EditSubmission")
|
||||
|
||||
recently_approved = EditSubmission.objects.filter(
|
||||
status__in=["APPROVED", "approved"],
|
||||
handled_at__gte=cutoff_time,
|
||||
)
|
||||
|
||||
for sub in recently_approved[:10]:
|
||||
# Check if the target object has history
|
||||
target = sub.content_object
|
||||
if target and hasattr(target, 'events'):
|
||||
try:
|
||||
event_count = target.events.count()
|
||||
if event_count == 0:
|
||||
issues.append({
|
||||
"issue_type": "missing_version_record",
|
||||
"entity_type": sub.content_type.model,
|
||||
"entity_id": str(sub.object_id),
|
||||
"submission_id": str(sub.id),
|
||||
"severity": "critical",
|
||||
"description": f"Approved {sub.content_type.model} has no version history",
|
||||
"detected_at": timezone.now().isoformat(),
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
except LookupError:
|
||||
pass
|
||||
|
||||
# Calculate summary counts
|
||||
critical_count = sum(1 for i in issues if i.get("severity") == "critical")
|
||||
warning_count = sum(1 for i in issues if i.get("severity") == "warning")
|
||||
info_count = sum(1 for i in issues if i.get("severity") == "info")
|
||||
|
||||
# Return in frontend-expected format
|
||||
return Response({
|
||||
"success": True,
|
||||
"scan_timestamp": timezone.now().isoformat(),
|
||||
"hours_scanned": hours_back,
|
||||
"issues_found": len(issues),
|
||||
"issues": issues,
|
||||
"summary": {
|
||||
"critical": critical_count,
|
||||
"warning": warning_count,
|
||||
"info": info_count,
|
||||
},
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
capture_and_log(e, "Pipeline integrity scan - error", source="api")
|
||||
return Response(
|
||||
{"detail": "Failed to run integrity scan"},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user