Refactor test utilities and enhance ASGI settings

- Cleaned up and standardized assertions in ApiTestMixin for API response validation.
- Updated ASGI settings to use os.environ for setting the DJANGO_SETTINGS_MODULE.
- Removed unused imports and improved formatting in settings.py.
- Refactored URL patterns in urls.py for better readability and organization.
- Enhanced view functions in views.py for consistency and clarity.
- Added .flake8 configuration for linting and style enforcement.
- Introduced type stubs for django-environ to improve type checking with Pylance.
This commit is contained in:
pacnpal
2025-08-20 19:51:59 -04:00
parent 69c07d1381
commit 66ed4347a9
230 changed files with 15094 additions and 11578 deletions

View File

@@ -7,18 +7,20 @@ import logging
from contextlib import contextmanager
from typing import Optional, Dict, Any, List, Type
from django.db import connection, models
from django.db.models import QuerySet, Prefetch, Count, Avg, Max, Min
from django.db.models import QuerySet, Prefetch, Count, Avg, Max
from django.conf import settings
from django.core.cache import cache
logger = logging.getLogger('query_optimization')
logger = logging.getLogger("query_optimization")
@contextmanager
def track_queries(operation_name: str, warn_threshold: int = 10, time_threshold: float = 1.0):
def track_queries(
operation_name: str, warn_threshold: int = 10, time_threshold: float = 1.0
):
"""
Context manager to track database queries for specific operations
Args:
operation_name: Name of the operation being tracked
warn_threshold: Number of queries that triggers a warning
@@ -27,136 +29,140 @@ def track_queries(operation_name: str, warn_threshold: int = 10, time_threshold:
if not settings.DEBUG:
yield
return
initial_queries = len(connection.queries)
start_time = time.time()
try:
yield
finally:
end_time = time.time()
total_queries = len(connection.queries) - initial_queries
execution_time = end_time - start_time
# Collect query details
query_details = []
if hasattr(connection, 'queries') and total_queries > 0:
if hasattr(connection, "queries") and total_queries > 0:
recent_queries = connection.queries[-total_queries:]
query_details = [
{
'sql': query['sql'][:500] + '...' if len(query['sql']) > 500 else query['sql'],
'time': float(query['time']),
'duplicate_count': sum(1 for q in recent_queries if q['sql'] == query['sql'])
"sql": (
query["sql"][:500] + "..."
if len(query["sql"]) > 500
else query["sql"]
),
"time": float(query["time"]),
"duplicate_count": sum(
1 for q in recent_queries if q["sql"] == query["sql"]
),
}
for query in recent_queries
]
performance_data = {
'operation': operation_name,
'query_count': total_queries,
'execution_time': execution_time,
'queries': query_details if settings.DEBUG else [],
'slow_queries': [q for q in query_details if q['time'] > 0.1], # Queries slower than 100ms
"operation": operation_name,
"query_count": total_queries,
"execution_time": execution_time,
"queries": query_details if settings.DEBUG else [],
"slow_queries": [
q for q in query_details if q["time"] > 0.1
], # Queries slower than 100ms
}
# Log warnings for performance issues
if total_queries > warn_threshold or execution_time > time_threshold:
logger.warning(
f"Performance concern in {operation_name}: "
f"{total_queries} queries, {execution_time:.2f}s",
extra=performance_data
extra=performance_data,
)
else:
logger.debug(
f"Query tracking for {operation_name}: "
f"{total_queries} queries, {execution_time:.2f}s",
extra=performance_data
extra=performance_data,
)
class QueryOptimizer:
"""Utility class for common query optimization patterns"""
@staticmethod
def optimize_park_queryset(queryset: QuerySet) -> QuerySet:
"""
Optimize Park queryset with proper select_related and prefetch_related
"""
return queryset.select_related(
'location',
'operator',
'created_by'
).prefetch_related(
'areas',
'rides__manufacturer',
'reviews__user'
).annotate(
ride_count=Count('rides'),
average_rating=Avg('reviews__rating'),
latest_review_date=Max('reviews__created_at')
return (
queryset.select_related("location", "operator", "created_by")
.prefetch_related("areas", "rides__manufacturer", "reviews__user")
.annotate(
ride_count=Count("rides"),
average_rating=Avg("reviews__rating"),
latest_review_date=Max("reviews__created_at"),
)
)
@staticmethod
def optimize_ride_queryset(queryset: QuerySet) -> QuerySet:
"""
Optimize Ride queryset with proper relationships
"""
return queryset.select_related(
'park',
'park__location',
'manufacturer',
'created_by'
).prefetch_related(
'reviews__user',
'media_items'
).annotate(
review_count=Count('reviews'),
average_rating=Avg('reviews__rating'),
latest_review_date=Max('reviews__created_at')
return (
queryset.select_related(
"park", "park__location", "manufacturer", "created_by"
)
.prefetch_related("reviews__user", "media_items")
.annotate(
review_count=Count("reviews"),
average_rating=Avg("reviews__rating"),
latest_review_date=Max("reviews__created_at"),
)
)
@staticmethod
def optimize_user_queryset(queryset: QuerySet) -> QuerySet:
"""
Optimize User queryset for profile views
"""
return queryset.prefetch_related(
Prefetch('park_reviews', to_attr='cached_park_reviews'),
Prefetch('ride_reviews', to_attr='cached_ride_reviews'),
'authored_parks',
'authored_rides'
Prefetch("park_reviews", to_attr="cached_park_reviews"),
Prefetch("ride_reviews", to_attr="cached_ride_reviews"),
"authored_parks",
"authored_rides",
).annotate(
total_reviews=Count('park_reviews') + Count('ride_reviews'),
parks_authored=Count('authored_parks'),
rides_authored=Count('authored_rides')
total_reviews=Count("park_reviews") + Count("ride_reviews"),
parks_authored=Count("authored_parks"),
rides_authored=Count("authored_rides"),
)
@staticmethod
def create_bulk_queryset(model: Type[models.Model], ids: List[int]) -> QuerySet:
"""
Create an optimized queryset for bulk operations
"""
queryset = model.objects.filter(id__in=ids)
# Apply model-specific optimizations
if hasattr(model, '_meta') and model._meta.model_name == 'park':
if hasattr(model, "_meta") and model._meta.model_name == "park":
return QueryOptimizer.optimize_park_queryset(queryset)
elif hasattr(model, '_meta') and model._meta.model_name == 'ride':
elif hasattr(model, "_meta") and model._meta.model_name == "ride":
return QueryOptimizer.optimize_ride_queryset(queryset)
elif hasattr(model, '_meta') and model._meta.model_name == 'user':
elif hasattr(model, "_meta") and model._meta.model_name == "user":
return QueryOptimizer.optimize_user_queryset(queryset)
return queryset
class QueryCache:
"""Caching utilities for expensive queries"""
@staticmethod
def cache_queryset_result(cache_key: str, queryset_func, timeout: int = 3600, **kwargs):
def cache_queryset_result(
cache_key: str, queryset_func, timeout: int = 3600, **kwargs
):
"""
Cache the result of an expensive queryset operation
Args:
cache_key: Unique key for caching
queryset_func: Function that returns the queryset result
@@ -168,22 +174,22 @@ class QueryCache:
if cached_result is not None:
logger.debug(f"Cache hit for queryset: {cache_key}")
return cached_result
# Execute the expensive operation
with track_queries(f"cache_miss_{cache_key}"):
result = queryset_func(**kwargs)
# Cache the result
cache.set(cache_key, result, timeout)
logger.debug(f"Cached queryset result: {cache_key}")
return result
@staticmethod
def invalidate_model_cache(model_name: str, instance_id: Optional[int] = None):
"""
Invalidate cache keys related to a specific model
Args:
model_name: Name of the model (e.g., 'park', 'ride')
instance_id: Specific instance ID, if applicable
@@ -193,44 +199,50 @@ class QueryCache:
pattern = f"*{model_name}_{instance_id}*"
else:
pattern = f"*{model_name}*"
try:
# For Redis cache backends that support pattern deletion
if hasattr(cache, 'delete_pattern'):
if hasattr(cache, "delete_pattern"):
deleted_count = cache.delete_pattern(pattern)
logger.info(f"Invalidated {deleted_count} cache keys for pattern: {pattern}")
logger.info(
f"Invalidated {deleted_count} cache keys for pattern: {pattern}"
)
else:
logger.warning(f"Cache backend does not support pattern deletion: {pattern}")
logger.warning(
f"Cache backend does not support pattern deletion: {pattern}"
)
except Exception as e:
logger.error(f"Error invalidating cache pattern {pattern}: {e}")
class IndexAnalyzer:
"""Analyze and suggest database indexes"""
@staticmethod
def analyze_slow_queries(min_time: float = 0.1) -> List[Dict[str, Any]]:
"""
Analyze slow queries from the current request
Args:
min_time: Minimum query time in seconds to consider "slow"
"""
if not hasattr(connection, 'queries'):
if not hasattr(connection, "queries"):
return []
slow_queries = []
for query in connection.queries:
query_time = float(query.get('time', 0))
query_time = float(query.get("time", 0))
if query_time >= min_time:
slow_queries.append({
'sql': query['sql'],
'time': query_time,
'analysis': IndexAnalyzer._analyze_query_sql(query['sql'])
})
slow_queries.append(
{
"sql": query["sql"],
"time": query_time,
"analysis": IndexAnalyzer._analyze_query_sql(query["sql"]),
}
)
return slow_queries
@staticmethod
def _analyze_query_sql(sql: str) -> Dict[str, Any]:
"""
@@ -238,31 +250,40 @@ class IndexAnalyzer:
"""
sql_upper = sql.upper()
analysis = {
'has_where_clause': 'WHERE' in sql_upper,
'has_join': any(join in sql_upper for join in ['JOIN', 'INNER JOIN', 'LEFT JOIN', 'RIGHT JOIN']),
'has_order_by': 'ORDER BY' in sql_upper,
'has_group_by': 'GROUP BY' in sql_upper,
'has_like': 'LIKE' in sql_upper,
'table_scans': [],
'suggestions': []
"has_where_clause": "WHERE" in sql_upper,
"has_join": any(
join in sql_upper
for join in ["JOIN", "INNER JOIN", "LEFT JOIN", "RIGHT JOIN"]
),
"has_order_by": "ORDER BY" in sql_upper,
"has_group_by": "GROUP BY" in sql_upper,
"has_like": "LIKE" in sql_upper,
"table_scans": [],
"suggestions": [],
}
# Detect potential table scans
if 'WHERE' not in sql_upper and 'SELECT COUNT(*) FROM' not in sql_upper:
analysis['table_scans'].append("Query may be doing a full table scan")
if "WHERE" not in sql_upper and "SELECT COUNT(*) FROM" not in sql_upper:
analysis["table_scans"].append("Query may be doing a full table scan")
# Suggest indexes based on patterns
if analysis['has_where_clause'] and not analysis['has_join']:
analysis['suggestions'].append("Consider adding indexes on WHERE clause columns")
if analysis['has_order_by']:
analysis['suggestions'].append("Consider adding indexes on ORDER BY columns")
if analysis['has_like'] and '%' not in sql[:sql.find('LIKE') + 10]:
analysis['suggestions'].append("LIKE queries with leading wildcards cannot use indexes efficiently")
if analysis["has_where_clause"] and not analysis["has_join"]:
analysis["suggestions"].append(
"Consider adding indexes on WHERE clause columns"
)
if analysis["has_order_by"]:
analysis["suggestions"].append(
"Consider adding indexes on ORDER BY columns"
)
if analysis["has_like"] and "%" not in sql[: sql.find("LIKE") + 10]:
analysis["suggestions"].append(
"LIKE queries with leading wildcards cannot use indexes efficiently"
)
return analysis
@staticmethod
def suggest_model_indexes(model: Type[models.Model]) -> List[str]:
"""
@@ -270,45 +291,66 @@ class IndexAnalyzer:
"""
suggestions = []
opts = model._meta
# Foreign key fields should have indexes (Django adds these automatically)
# Foreign key fields should have indexes (Django adds these
# automatically)
for field in opts.fields:
if isinstance(field, models.ForeignKey):
suggestions.append(f"Index on {field.name} (automatically created by Django)")
suggestions.append(
f"Index on {field.name} (automatically created by Django)"
)
# Suggest composite indexes for common query patterns
date_fields = [f.name for f in opts.fields if isinstance(f, (models.DateField, models.DateTimeField))]
status_fields = [f.name for f in opts.fields if f.name in ['status', 'is_active', 'is_published']]
date_fields = [
f.name
for f in opts.fields
if isinstance(f, (models.DateField, models.DateTimeField))
]
status_fields = [
f.name
for f in opts.fields
if f.name in ["status", "is_active", "is_published"]
]
if date_fields and status_fields:
for date_field in date_fields:
for status_field in status_fields:
suggestions.append(f"Composite index on ({status_field}, {date_field}) for filtered date queries")
suggestions.append(
f"Composite index on ({status_field}, {date_field}) for filtered date queries"
)
# Suggest indexes for fields commonly used in WHERE clauses
common_filter_fields = ['slug', 'name', 'created_at', 'updated_at']
common_filter_fields = ["slug", "name", "created_at", "updated_at"]
for field in opts.fields:
if field.name in common_filter_fields and not field.db_index:
suggestions.append(f"Consider adding db_index=True to {field.name}")
suggestions.append(
f"Consider adding db_index=True to {
field.name}"
)
return suggestions
def log_query_performance():
"""Decorator to log query performance for a function"""
def decorator(func):
def wrapper(*args, **kwargs):
operation_name = f"{func.__module__}.{func.__name__}"
with track_queries(operation_name):
return func(*args, **kwargs)
return wrapper
return decorator
def optimize_queryset_for_serialization(queryset: QuerySet, fields: List[str]) -> QuerySet:
def optimize_queryset_for_serialization(
queryset: QuerySet, fields: List[str]
) -> QuerySet:
"""
Optimize a queryset for API serialization by only selecting needed fields
Args:
queryset: The queryset to optimize
fields: List of field names that will be serialized
@@ -316,28 +358,30 @@ def optimize_queryset_for_serialization(queryset: QuerySet, fields: List[str]) -
# Extract foreign key fields that need select_related
model = queryset.model
opts = model._meta
select_related_fields = []
prefetch_related_fields = []
for field_name in fields:
try:
field = opts.get_field(field_name)
if isinstance(field, models.ForeignKey):
select_related_fields.append(field_name)
elif isinstance(field, (models.ManyToManyField, models.reverse.ManyToManyRel)):
elif isinstance(
field, (models.ManyToManyField, models.reverse.ManyToManyRel)
):
prefetch_related_fields.append(field_name)
except models.FieldDoesNotExist:
# Field might be a property or method, skip optimization
continue
# Apply optimizations
if select_related_fields:
queryset = queryset.select_related(*select_related_fields)
if prefetch_related_fields:
queryset = queryset.prefetch_related(*prefetch_related_fields)
return queryset
@@ -347,39 +391,42 @@ def monitor_db_performance(operation_name: str):
"""
Context manager that monitors database performance for an operation
"""
initial_queries = len(connection.queries) if hasattr(connection, 'queries') else 0
initial_queries = len(connection.queries) if hasattr(connection, "queries") else 0
start_time = time.time()
try:
yield
finally:
end_time = time.time()
duration = end_time - start_time
if hasattr(connection, 'queries'):
if hasattr(connection, "queries"):
total_queries = len(connection.queries) - initial_queries
# Analyze queries for performance issues
slow_queries = IndexAnalyzer.analyze_slow_queries(0.05) # 50ms threshold
performance_data = {
'operation': operation_name,
'duration': duration,
'query_count': total_queries,
'slow_query_count': len(slow_queries),
'slow_queries': slow_queries[:5] # Limit to top 5 slow queries
"operation": operation_name,
"duration": duration,
"query_count": total_queries,
"slow_query_count": len(slow_queries),
# Limit to top 5 slow queries
"slow_queries": slow_queries[:5],
}
# Log performance data
if duration > 1.0 or total_queries > 15 or slow_queries:
logger.warning(
f"Performance issue in {operation_name}: "
f"{duration:.3f}s, {total_queries} queries, {len(slow_queries)} slow",
extra=performance_data
f"{
duration:.3f}s, {total_queries} queries, {
len(slow_queries)} slow",
extra=performance_data,
)
else:
logger.debug(
f"DB performance for {operation_name}: "
f"{duration:.3f}s, {total_queries} queries",
extra=performance_data
extra=performance_data,
)