Files
thrillwiki_django_no_react/backend/apps/parks/services/hybrid_loader.py
pacnpal 35f8d0ef8f Implement hybrid filtering strategy for parks and rides
- Added comprehensive documentation for hybrid filtering implementation, including architecture, API endpoints, performance characteristics, and usage examples.
- Developed a hybrid pagination and client-side filtering recommendation, detailing server-side responsibilities and client-side logic.
- Created a test script for hybrid filtering endpoints, covering various test cases including basic filtering, search functionality, pagination, and edge cases.
2025-09-14 21:07:17 -04:00

426 lines
15 KiB
Python

"""
Smart Park Loader for Hybrid Filtering Strategy
This module provides intelligent data loading capabilities for the hybrid filtering approach,
optimizing database queries and implementing progressive loading strategies.
"""
from typing import Dict, List, Optional, Any, Tuple
from django.db import models
from django.core.cache import cache
from django.conf import settings
from apps.parks.models import Park
class SmartParkLoader:
"""
Intelligent park data loader that optimizes queries based on filtering requirements.
Implements progressive loading and smart caching strategies.
"""
# Cache configuration
CACHE_TIMEOUT = getattr(settings, 'HYBRID_FILTER_CACHE_TIMEOUT', 300) # 5 minutes
CACHE_KEY_PREFIX = 'hybrid_parks'
# Progressive loading thresholds
INITIAL_LOAD_SIZE = 50
PROGRESSIVE_LOAD_SIZE = 25
MAX_CLIENT_SIDE_RECORDS = 200
def __init__(self):
self.base_queryset = self._get_optimized_queryset()
def _get_optimized_queryset(self) -> models.QuerySet:
"""Get optimized base queryset with all necessary prefetches."""
return Park.objects.select_related(
'operator',
'property_owner',
'banner_image',
'card_image',
).prefetch_related(
'location', # ParkLocation relationship
).filter(
# Only include operating and temporarily closed parks by default
status__in=['OPERATING', 'CLOSED_TEMP']
).order_by('name')
def get_initial_load(self, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Get initial park data load with smart filtering decisions.
Args:
filters: Optional filters to apply
Returns:
Dictionary containing parks data and metadata
"""
cache_key = self._generate_cache_key('initial', filters)
cached_result = cache.get(cache_key)
if cached_result:
return cached_result
# Apply filters if provided
queryset = self.base_queryset
if filters:
queryset = self._apply_filters(queryset, filters)
# Get total count for pagination decisions
total_count = queryset.count()
# Determine loading strategy
if total_count <= self.MAX_CLIENT_SIDE_RECORDS:
# Load all data for client-side filtering
parks = list(queryset.all())
strategy = 'client_side'
has_more = False
else:
# Load initial batch for server-side pagination
parks = list(queryset[:self.INITIAL_LOAD_SIZE])
strategy = 'server_side'
has_more = total_count > self.INITIAL_LOAD_SIZE
result = {
'parks': parks,
'total_count': total_count,
'strategy': strategy,
'has_more': has_more,
'next_offset': len(parks) if has_more else None,
'filter_metadata': self._get_filter_metadata(queryset),
}
# Cache the result
cache.set(cache_key, result, self.CACHE_TIMEOUT)
return result
def get_progressive_load(
self,
offset: int,
filters: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Get next batch of parks for progressive loading.
Args:
offset: Starting offset for the batch
filters: Optional filters to apply
Returns:
Dictionary containing parks data and metadata
"""
cache_key = self._generate_cache_key(f'progressive_{offset}', filters)
cached_result = cache.get(cache_key)
if cached_result:
return cached_result
# Apply filters if provided
queryset = self.base_queryset
if filters:
queryset = self._apply_filters(queryset, filters)
# Get the batch
end_offset = offset + self.PROGRESSIVE_LOAD_SIZE
parks = list(queryset[offset:end_offset])
# Check if there are more records
total_count = queryset.count()
has_more = end_offset < total_count
result = {
'parks': parks,
'total_count': total_count,
'has_more': has_more,
'next_offset': end_offset if has_more else None,
}
# Cache the result
cache.set(cache_key, result, self.CACHE_TIMEOUT)
return result
def get_filter_metadata(self, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Get metadata about available filter options.
Args:
filters: Current filters to scope the metadata
Returns:
Dictionary containing filter metadata
"""
cache_key = self._generate_cache_key('metadata', filters)
cached_result = cache.get(cache_key)
if cached_result:
return cached_result
# Apply filters if provided
queryset = self.base_queryset
if filters:
queryset = self._apply_filters(queryset, filters)
result = self._get_filter_metadata(queryset)
# Cache the result
cache.set(cache_key, result, self.CACHE_TIMEOUT)
return result
def _apply_filters(self, queryset: models.QuerySet, filters: Dict[str, Any]) -> models.QuerySet:
"""Apply filters to the queryset."""
# Status filter
if 'status' in filters and filters['status']:
if isinstance(filters['status'], list):
queryset = queryset.filter(status__in=filters['status'])
else:
queryset = queryset.filter(status=filters['status'])
# Park type filter
if 'park_type' in filters and filters['park_type']:
if isinstance(filters['park_type'], list):
queryset = queryset.filter(park_type__in=filters['park_type'])
else:
queryset = queryset.filter(park_type=filters['park_type'])
# Country filter
if 'country' in filters and filters['country']:
queryset = queryset.filter(location__country__in=filters['country'])
# State filter
if 'state' in filters and filters['state']:
queryset = queryset.filter(location__state__in=filters['state'])
# Opening year range
if 'opening_year_min' in filters and filters['opening_year_min']:
queryset = queryset.filter(opening_year__gte=filters['opening_year_min'])
if 'opening_year_max' in filters and filters['opening_year_max']:
queryset = queryset.filter(opening_year__lte=filters['opening_year_max'])
# Size range
if 'size_min' in filters and filters['size_min']:
queryset = queryset.filter(size_acres__gte=filters['size_min'])
if 'size_max' in filters and filters['size_max']:
queryset = queryset.filter(size_acres__lte=filters['size_max'])
# Rating range
if 'rating_min' in filters and filters['rating_min']:
queryset = queryset.filter(average_rating__gte=filters['rating_min'])
if 'rating_max' in filters and filters['rating_max']:
queryset = queryset.filter(average_rating__lte=filters['rating_max'])
# Ride count range
if 'ride_count_min' in filters and filters['ride_count_min']:
queryset = queryset.filter(ride_count__gte=filters['ride_count_min'])
if 'ride_count_max' in filters and filters['ride_count_max']:
queryset = queryset.filter(ride_count__lte=filters['ride_count_max'])
# Coaster count range
if 'coaster_count_min' in filters and filters['coaster_count_min']:
queryset = queryset.filter(coaster_count__gte=filters['coaster_count_min'])
if 'coaster_count_max' in filters and filters['coaster_count_max']:
queryset = queryset.filter(coaster_count__lte=filters['coaster_count_max'])
# Operator filter
if 'operator' in filters and filters['operator']:
if isinstance(filters['operator'], list):
queryset = queryset.filter(operator__slug__in=filters['operator'])
else:
queryset = queryset.filter(operator__slug=filters['operator'])
# Search query
if 'search' in filters and filters['search']:
search_term = filters['search'].lower()
queryset = queryset.filter(search_text__icontains=search_term)
return queryset
def _get_filter_metadata(self, queryset: models.QuerySet) -> Dict[str, Any]:
"""Generate filter metadata from the current queryset."""
# Get distinct values for categorical filters with counts
countries_data = list(
queryset.values('location__country')
.exclude(location__country__isnull=True)
.annotate(count=models.Count('id'))
.order_by('location__country')
)
states_data = list(
queryset.values('location__state')
.exclude(location__state__isnull=True)
.annotate(count=models.Count('id'))
.order_by('location__state')
)
park_types_data = list(
queryset.values('park_type')
.exclude(park_type__isnull=True)
.annotate(count=models.Count('id'))
.order_by('park_type')
)
statuses_data = list(
queryset.values('status')
.annotate(count=models.Count('id'))
.order_by('status')
)
operators_data = list(
queryset.select_related('operator')
.values('operator__id', 'operator__name', 'operator__slug')
.exclude(operator__isnull=True)
.annotate(count=models.Count('id'))
.order_by('operator__name')
)
# Convert to frontend-expected format with value/label/count
countries = [
{
'value': item['location__country'],
'label': item['location__country'],
'count': item['count']
}
for item in countries_data
]
states = [
{
'value': item['location__state'],
'label': item['location__state'],
'count': item['count']
}
for item in states_data
]
park_types = [
{
'value': item['park_type'],
'label': item['park_type'],
'count': item['count']
}
for item in park_types_data
]
statuses = [
{
'value': item['status'],
'label': self._get_status_label(item['status']),
'count': item['count']
}
for item in statuses_data
]
operators = [
{
'value': item['operator__slug'],
'label': item['operator__name'],
'count': item['count']
}
for item in operators_data
]
# Get ranges for numerical filters
aggregates = queryset.aggregate(
opening_year_min=models.Min('opening_year'),
opening_year_max=models.Max('opening_year'),
size_min=models.Min('size_acres'),
size_max=models.Max('size_acres'),
rating_min=models.Min('average_rating'),
rating_max=models.Max('average_rating'),
ride_count_min=models.Min('ride_count'),
ride_count_max=models.Max('ride_count'),
coaster_count_min=models.Min('coaster_count'),
coaster_count_max=models.Max('coaster_count'),
)
return {
'categorical': {
'countries': countries,
'states': states,
'park_types': park_types,
'statuses': statuses,
'operators': operators,
},
'ranges': {
'opening_year': {
'min': aggregates['opening_year_min'],
'max': aggregates['opening_year_max'],
'step': 1,
'unit': 'year'
},
'size_acres': {
'min': float(aggregates['size_min']) if aggregates['size_min'] else None,
'max': float(aggregates['size_max']) if aggregates['size_max'] else None,
'step': 1.0,
'unit': 'acres'
},
'average_rating': {
'min': float(aggregates['rating_min']) if aggregates['rating_min'] else None,
'max': float(aggregates['rating_max']) if aggregates['rating_max'] else None,
'step': 0.1,
'unit': 'stars'
},
'ride_count': {
'min': aggregates['ride_count_min'],
'max': aggregates['ride_count_max'],
'step': 1,
'unit': 'rides'
},
'coaster_count': {
'min': aggregates['coaster_count_min'],
'max': aggregates['coaster_count_max'],
'step': 1,
'unit': 'coasters'
},
},
'total_count': queryset.count(),
}
def _get_status_label(self, status: str) -> str:
"""Convert status code to human-readable label."""
status_labels = {
'OPERATING': 'Operating',
'CLOSED_TEMP': 'Temporarily Closed',
'CLOSED_PERM': 'Permanently Closed',
'UNDER_CONSTRUCTION': 'Under Construction',
}
return status_labels.get(status, status)
def _generate_cache_key(self, operation: str, filters: Optional[Dict[str, Any]] = None) -> str:
"""Generate cache key for the given operation and filters."""
key_parts = [self.CACHE_KEY_PREFIX, operation]
if filters:
# Create a consistent string representation of filters
filter_str = '_'.join(f"{k}:{v}" for k, v in sorted(filters.items()) if v)
key_parts.append(filter_str)
return '_'.join(key_parts)
def invalidate_cache(self, filters: Optional[Dict[str, Any]] = None) -> None:
"""Invalidate cached data for the given filters."""
# This is a simplified implementation
# In production, you might want to use cache versioning or tags
cache_keys = [
self._generate_cache_key('initial', filters),
self._generate_cache_key('metadata', filters),
]
# Also invalidate progressive load caches
for offset in range(0, 1000, self.PROGRESSIVE_LOAD_SIZE):
cache_keys.append(self._generate_cache_key(f'progressive_{offset}', filters))
cache.delete_many(cache_keys)
# Singleton instance
smart_park_loader = SmartParkLoader()