Revert "update"

This reverts commit 75cc618c2b.
This commit is contained in:
pacnpal
2025-09-21 20:11:00 -04:00
parent 75cc618c2b
commit 540f40e689
610 changed files with 4812 additions and 1715 deletions

View File

@@ -1 +0,0 @@
# Core utilities

View File

@@ -1,429 +0,0 @@
"""
Database query optimization utilities and helpers.
"""
import time
import logging
from contextlib import contextmanager
from typing import Optional, Dict, Any, List, Type
from django.db import connection, models
from django.db.models import QuerySet, Prefetch, Count, Avg, Max
from django.conf import settings
from django.core.cache import cache
logger = logging.getLogger("query_optimization")
@contextmanager
def track_queries(
operation_name: str, warn_threshold: int = 10, time_threshold: float = 1.0
):
"""
Context manager to track database queries for specific operations
Args:
operation_name: Name of the operation being tracked
warn_threshold: Number of queries that triggers a warning
time_threshold: Execution time in seconds that triggers a warning
"""
if not settings.DEBUG:
yield
return
initial_queries = len(connection.queries)
start_time = time.time()
try:
yield
finally:
end_time = time.time()
total_queries = len(connection.queries) - initial_queries
execution_time = end_time - start_time
# Collect query details
query_details = []
if hasattr(connection, "queries") and total_queries > 0:
recent_queries = connection.queries[-total_queries:]
query_details = [
{
"sql": (
query["sql"][:500] + "..."
if len(query["sql"]) > 500
else query["sql"]
),
"time": float(query["time"]),
"duplicate_count": sum(
1 for q in recent_queries if q["sql"] == query["sql"]
),
}
for query in recent_queries
]
performance_data = {
"operation": operation_name,
"query_count": total_queries,
"execution_time": execution_time,
"queries": query_details if settings.DEBUG else [],
"slow_queries": [
q for q in query_details if q["time"] > 0.1
], # Queries slower than 100ms
}
# Log warnings for performance issues
if total_queries > warn_threshold or execution_time > time_threshold:
logger.warning(
f"Performance concern in {operation_name}: "
f"{total_queries} queries, {execution_time:.2f}s",
extra=performance_data,
)
else:
logger.debug(
f"Query tracking for {operation_name}: "
f"{total_queries} queries, {execution_time:.2f}s",
extra=performance_data,
)
class QueryOptimizer:
"""Utility class for common query optimization patterns"""
@staticmethod
def optimize_park_queryset(queryset: QuerySet) -> QuerySet:
"""
Optimize Park queryset with proper select_related and prefetch_related
"""
return (
queryset.select_related("location", "operator", "created_by")
.prefetch_related("areas", "rides__manufacturer", "reviews__user")
.annotate(
ride_count=Count("rides"),
average_rating=Avg("reviews__rating"),
latest_review_date=Max("reviews__created_at"),
)
)
@staticmethod
def optimize_ride_queryset(queryset: QuerySet) -> QuerySet:
"""
Optimize Ride queryset with proper relationships
"""
return (
queryset.select_related(
"park", "park__location", "manufacturer", "created_by"
)
.prefetch_related("reviews__user", "media_items")
.annotate(
review_count=Count("reviews"),
average_rating=Avg("reviews__rating"),
latest_review_date=Max("reviews__created_at"),
)
)
@staticmethod
def optimize_user_queryset(queryset: QuerySet) -> QuerySet:
"""
Optimize User queryset for profile views
"""
return queryset.prefetch_related(
Prefetch("park_reviews", to_attr="cached_park_reviews"),
Prefetch("ride_reviews", to_attr="cached_ride_reviews"),
"authored_parks",
"authored_rides",
).annotate(
total_reviews=Count("park_reviews") + Count("ride_reviews"),
parks_authored=Count("authored_parks"),
rides_authored=Count("authored_rides"),
)
@staticmethod
def create_bulk_queryset(model: Type[models.Model], ids: List[int]) -> QuerySet:
"""
Create an optimized queryset for bulk operations
"""
queryset = model.objects.filter(id__in=ids)
# Apply model-specific optimizations
if hasattr(model, "_meta") and model._meta.model_name == "park":
return QueryOptimizer.optimize_park_queryset(queryset)
elif hasattr(model, "_meta") and model._meta.model_name == "ride":
return QueryOptimizer.optimize_ride_queryset(queryset)
elif hasattr(model, "_meta") and model._meta.model_name == "user":
return QueryOptimizer.optimize_user_queryset(queryset)
return queryset
class QueryCache:
"""Caching utilities for expensive queries"""
@staticmethod
def cache_queryset_result(
cache_key: str, queryset_func, timeout: int = 3600, **kwargs
):
"""
Cache the result of an expensive queryset operation
Args:
cache_key: Unique key for caching
queryset_func: Function that returns the queryset result
timeout: Cache timeout in seconds
**kwargs: Arguments to pass to queryset_func
"""
# Try to get from cache first
cached_result = cache.get(cache_key)
if cached_result is not None:
logger.debug(f"Cache hit for queryset: {cache_key}")
return cached_result
# Execute the expensive operation
with track_queries(f"cache_miss_{cache_key}"):
result = queryset_func(**kwargs)
# Cache the result
cache.set(cache_key, result, timeout)
logger.debug(f"Cached queryset result: {cache_key}")
return result
@staticmethod
def invalidate_model_cache(model_name: str, instance_id: Optional[int] = None):
"""
Invalidate cache keys related to a specific model
Args:
model_name: Name of the model (e.g., 'park', 'ride')
instance_id: Specific instance ID, if applicable
"""
# Pattern-based cache invalidation (works with Redis)
if instance_id:
pattern = f"*{model_name}_{instance_id}*"
else:
pattern = f"*{model_name}*"
try:
# For Redis cache backends that support pattern deletion
if hasattr(cache, "delete_pattern"):
deleted_count = cache.delete_pattern(pattern)
logger.info(
f"Invalidated {deleted_count} cache keys for pattern: {pattern}"
)
else:
logger.warning(
f"Cache backend does not support pattern deletion: {pattern}"
)
except Exception as e:
logger.error(f"Error invalidating cache pattern {pattern}: {e}")
class IndexAnalyzer:
"""Analyze and suggest database indexes"""
@staticmethod
def analyze_slow_queries(min_time: float = 0.1) -> List[Dict[str, Any]]:
"""
Analyze slow queries from the current request
Args:
min_time: Minimum query time in seconds to consider "slow"
"""
if not hasattr(connection, "queries"):
return []
slow_queries = []
for query in connection.queries:
query_time = float(query.get("time", 0))
if query_time >= min_time:
slow_queries.append(
{
"sql": query["sql"],
"time": query_time,
"analysis": IndexAnalyzer._analyze_query_sql(query["sql"]),
}
)
return slow_queries
@staticmethod
def _analyze_query_sql(sql: str) -> Dict[str, Any]:
"""
Analyze SQL to suggest potential optimizations
"""
sql_upper = sql.upper()
analysis = {
"has_where_clause": "WHERE" in sql_upper,
"has_join": any(
join in sql_upper
for join in ["JOIN", "INNER JOIN", "LEFT JOIN", "RIGHT JOIN"]
),
"has_order_by": "ORDER BY" in sql_upper,
"has_group_by": "GROUP BY" in sql_upper,
"has_like": "LIKE" in sql_upper,
"table_scans": [],
"suggestions": [],
}
# Detect potential table scans
if "WHERE" not in sql_upper and "SELECT COUNT(*) FROM" not in sql_upper:
analysis["table_scans"].append("Query may be doing a full table scan")
# Suggest indexes based on patterns
if analysis["has_where_clause"] and not analysis["has_join"]:
analysis["suggestions"].append(
"Consider adding indexes on WHERE clause columns"
)
if analysis["has_order_by"]:
analysis["suggestions"].append(
"Consider adding indexes on ORDER BY columns"
)
if analysis["has_like"] and "%" not in sql[: sql.find("LIKE") + 10]:
analysis["suggestions"].append(
"LIKE queries with leading wildcards cannot use indexes efficiently"
)
return analysis
@staticmethod
def suggest_model_indexes(model: Type[models.Model]) -> List[str]:
"""
Suggest database indexes for a Django model based on its fields
"""
suggestions = []
opts = model._meta
# Foreign key fields should have indexes (Django adds these
# automatically)
for field in opts.fields:
if isinstance(field, models.ForeignKey):
suggestions.append(
f"Index on {field.name} (automatically created by Django)"
)
# Suggest composite indexes for common query patterns
date_fields = [
f.name
for f in opts.fields
if isinstance(f, (models.DateField, models.DateTimeField))
]
status_fields = [
f.name
for f in opts.fields
if f.name in ["status", "is_active", "is_published"]
]
if date_fields and status_fields:
for date_field in date_fields:
for status_field in status_fields:
suggestions.append(
f"Composite index on ({status_field}, {date_field}) for filtered date queries"
)
# Suggest indexes for fields commonly used in WHERE clauses
common_filter_fields = ["slug", "name", "created_at", "updated_at"]
for field in opts.fields:
if field.name in common_filter_fields and not field.db_index:
suggestions.append(f"Consider adding db_index=True to {field.name}")
return suggestions
def log_query_performance():
"""Decorator to log query performance for a function"""
def decorator(func):
def wrapper(*args, **kwargs):
operation_name = f"{func.__module__}.{func.__name__}"
with track_queries(operation_name):
return func(*args, **kwargs)
return wrapper
return decorator
def optimize_queryset_for_serialization(
queryset: QuerySet, fields: List[str]
) -> QuerySet:
"""
Optimize a queryset for API serialization by only selecting needed fields
Args:
queryset: The queryset to optimize
fields: List of field names that will be serialized
"""
# Extract foreign key fields that need select_related
model = queryset.model
opts = model._meta
select_related_fields = []
prefetch_related_fields = []
for field_name in fields:
try:
field = opts.get_field(field_name)
if isinstance(field, models.ForeignKey):
select_related_fields.append(field_name)
elif isinstance(
field, (models.ManyToManyField, models.reverse.ManyToManyRel)
):
prefetch_related_fields.append(field_name)
except models.FieldDoesNotExist:
# Field might be a property or method, skip optimization
continue
# Apply optimizations
if select_related_fields:
queryset = queryset.select_related(*select_related_fields)
if prefetch_related_fields:
queryset = queryset.prefetch_related(*prefetch_related_fields)
return queryset
# Query performance monitoring context manager
@contextmanager
def monitor_db_performance(operation_name: str):
"""
Context manager that monitors database performance for an operation
"""
initial_queries = len(connection.queries) if hasattr(connection, "queries") else 0
start_time = time.time()
try:
yield
finally:
end_time = time.time()
duration = end_time - start_time
if hasattr(connection, "queries"):
total_queries = len(connection.queries) - initial_queries
# Analyze queries for performance issues
slow_queries = IndexAnalyzer.analyze_slow_queries(0.05) # 50ms threshold
performance_data = {
"operation": operation_name,
"duration": duration,
"query_count": total_queries,
"slow_query_count": len(slow_queries),
# Limit to top 5 slow queries
"slow_queries": slow_queries[:5],
}
# Log performance data
if duration > 1.0 or total_queries > 15 or slow_queries:
logger.warning(
f"Performance issue in {operation_name}: "
f"{duration:.3f}s, {total_queries} queries, {
len(slow_queries)
} slow",
extra=performance_data,
)
else:
logger.debug(
f"DB performance for {operation_name}: "
f"{duration:.3f}s, {total_queries} queries",
extra=performance_data,
)