Implement hybrid filtering strategy for parks and rides

- Added comprehensive documentation for hybrid filtering implementation, including architecture, API endpoints, performance characteristics, and usage examples.
- Developed a hybrid pagination and client-side filtering recommendation, detailing server-side responsibilities and client-side logic.
- Created a test script for hybrid filtering endpoints, covering various test cases including basic filtering, search functionality, pagination, and edge cases.
This commit is contained in:
pacnpal
2025-09-14 21:07:17 -04:00
parent 0fd6dc2560
commit 35f8d0ef8f
42 changed files with 8490 additions and 224 deletions

View File

@@ -0,0 +1,88 @@
# Generated by Django 5.2.5 on 2025-09-14 19:01
import pgtrigger.compiler
import pgtrigger.migrations
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("parks", "0013_remove_park_insert_insert_remove_park_update_update_and_more"),
]
operations = [
pgtrigger.migrations.RemoveTrigger(
model_name="park",
name="insert_insert",
),
pgtrigger.migrations.RemoveTrigger(
model_name="park",
name="update_update",
),
migrations.AddField(
model_name="park",
name="opening_year",
field=models.IntegerField(
blank=True,
db_index=True,
help_text="Year the park opened (computed from opening_date)",
null=True,
),
),
migrations.AddField(
model_name="park",
name="search_text",
field=models.TextField(
blank=True,
db_index=True,
help_text="Searchable text combining name, description, location, and operator",
),
),
migrations.AddField(
model_name="parkevent",
name="opening_year",
field=models.IntegerField(
blank=True,
help_text="Year the park opened (computed from opening_date)",
null=True,
),
),
migrations.AddField(
model_name="parkevent",
name="search_text",
field=models.TextField(
blank=True,
help_text="Searchable text combining name, description, location, and operator",
),
),
pgtrigger.migrations.AddTrigger(
model_name="park",
trigger=pgtrigger.compiler.Trigger(
name="insert_insert",
sql=pgtrigger.compiler.UpsertTriggerSql(
func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'insert\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
hash="39ac89dc193467b8b41f06ff15903f0a3e22f6b0",
operation="INSERT",
pgid="pgtrigger_insert_insert_66883",
table="parks_park",
when="AFTER",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="park",
trigger=pgtrigger.compiler.Trigger(
name="update_update",
sql=pgtrigger.compiler.UpsertTriggerSql(
condition="WHEN (OLD.* IS DISTINCT FROM NEW.*)",
func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'update\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
hash="af7925b4ef24b42c66b7795b9e0c6c8f510e597c",
operation="UPDATE",
pgid="pgtrigger_update_update_19f56",
table="parks_park",
when="AFTER",
),
),
),
]

View File

@@ -0,0 +1,64 @@
# Generated by Django 5.2.5 on 2025-09-14 19:01
from django.db import migrations
def populate_computed_fields(apps, schema_editor):
"""Populate computed fields for existing parks using raw SQL with disabled triggers"""
# Temporarily disable pghistory triggers
schema_editor.execute("ALTER TABLE parks_park DISABLE TRIGGER ALL;")
try:
# Use raw SQL to update opening_year from opening_date
schema_editor.execute("""
UPDATE parks_park
SET opening_year = EXTRACT(YEAR FROM opening_date)
WHERE opening_date IS NOT NULL;
""")
# Use raw SQL to populate search_text
# This is a simplified version - we'll populate it with just name and description
schema_editor.execute("""
UPDATE parks_park
SET search_text = LOWER(
COALESCE(name, '') || ' ' ||
COALESCE(description, '')
);
""")
# Update search_text to include operator names using a join
schema_editor.execute("""
UPDATE parks_park
SET search_text = LOWER(
COALESCE(parks_park.name, '') || ' ' ||
COALESCE(parks_park.description, '') || ' ' ||
COALESCE(parks_company.name, '')
)
FROM parks_company
WHERE parks_park.operator_id = parks_company.id;
""")
finally:
# Re-enable pghistory triggers
schema_editor.execute("ALTER TABLE parks_park ENABLE TRIGGER ALL;")
def reverse_populate_computed_fields(apps, schema_editor):
"""Clear computed fields (reverse operation)"""
Park = apps.get_model('parks', 'Park')
Park.objects.update(opening_year=None, search_text='')
class Migration(migrations.Migration):
dependencies = [
("parks", "0014_add_hybrid_filtering_fields"),
]
operations = [
migrations.RunPython(
populate_computed_fields,
reverse_populate_computed_fields,
),
]

View File

@@ -0,0 +1,85 @@
# Generated by Django 5.2.5 on 2025-09-14 19:12
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("parks", "0015_populate_hybrid_filtering_fields"),
]
operations = [
# Composite indexes for common filter combinations
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_status_park_type_idx ON parks_park (status, park_type);",
reverse_sql="DROP INDEX IF EXISTS parks_park_status_park_type_idx;"
),
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_opening_year_status_idx ON parks_park (opening_year, status) WHERE opening_year IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS parks_park_opening_year_status_idx;"
),
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_size_rating_idx ON parks_park (size_acres, average_rating) WHERE size_acres IS NOT NULL AND average_rating IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS parks_park_size_rating_idx;"
),
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_ride_coaster_count_idx ON parks_park (ride_count, coaster_count) WHERE ride_count IS NOT NULL AND coaster_count IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS parks_park_ride_coaster_count_idx;"
),
# Full-text search index for search_text field
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_search_text_gin_idx ON parks_park USING gin(to_tsvector('english', search_text));",
reverse_sql="DROP INDEX IF EXISTS parks_park_search_text_gin_idx;"
),
# Trigram index for fuzzy search on search_text
migrations.RunSQL(
"CREATE EXTENSION IF NOT EXISTS pg_trgm;",
reverse_sql="-- Cannot drop extension as it might be used elsewhere"
),
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_search_text_trgm_idx ON parks_park USING gin(search_text gin_trgm_ops);",
reverse_sql="DROP INDEX IF EXISTS parks_park_search_text_trgm_idx;"
),
# Indexes for location-based filtering (assuming location relationship exists)
migrations.RunSQL(
"""
CREATE INDEX IF NOT EXISTS parks_parklocation_country_state_idx
ON parks_parklocation (country, state)
WHERE country IS NOT NULL AND state IS NOT NULL;
""",
reverse_sql="DROP INDEX IF EXISTS parks_parklocation_country_state_idx;"
),
# Index for operator-based filtering
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_operator_status_idx ON parks_park (operator_id, status);",
reverse_sql="DROP INDEX IF EXISTS parks_park_operator_status_idx;"
),
# Partial indexes for common status filters
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_operating_parks_idx ON parks_park (name, opening_year) WHERE status IN ('OPERATING', 'CLOSED_TEMP');",
reverse_sql="DROP INDEX IF EXISTS parks_park_operating_parks_idx;"
),
# Index for ordering by name (already exists but ensuring it's optimized)
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_name_lower_idx ON parks_park (LOWER(name));",
reverse_sql="DROP INDEX IF EXISTS parks_park_name_lower_idx;"
),
# Covering index for common query patterns
migrations.RunSQL(
"""
CREATE INDEX IF NOT EXISTS parks_park_hybrid_covering_idx
ON parks_park (status, park_type, opening_year)
INCLUDE (name, slug, size_acres, average_rating, ride_count, coaster_count, operator_id)
WHERE status IN ('OPERATING', 'CLOSED_TEMP');
""",
reverse_sql="DROP INDEX IF EXISTS parks_park_hybrid_covering_idx;"
),
]

View File

@@ -0,0 +1,73 @@
# Generated by Django 5.2.5 on 2025-09-15 00:50
import django.utils.timezone
import pgtrigger.compiler
import pgtrigger.migrations
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("parks", "0016_add_hybrid_filtering_indexes"),
]
operations = [
pgtrigger.migrations.RemoveTrigger(
model_name="park",
name="insert_insert",
),
pgtrigger.migrations.RemoveTrigger(
model_name="park",
name="update_update",
),
migrations.AddField(
model_name="park",
name="timezone",
field=models.CharField(
default=django.utils.timezone.now,
help_text="Timezone identifier for park operations (e.g., 'America/New_York')",
max_length=50,
),
preserve_default=False,
),
migrations.AddField(
model_name="parkevent",
name="timezone",
field=models.CharField(
default=django.utils.timezone.now,
help_text="Timezone identifier for park operations (e.g., 'America/New_York')",
max_length=50,
),
preserve_default=False,
),
pgtrigger.migrations.AddTrigger(
model_name="park",
trigger=pgtrigger.compiler.Trigger(
name="insert_insert",
sql=pgtrigger.compiler.UpsertTriggerSql(
func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "timezone", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'insert\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."timezone", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
hash="9da686bd8a1881fe7a3fdfebc14411680fe47527",
operation="INSERT",
pgid="pgtrigger_insert_insert_66883",
table="parks_park",
when="AFTER",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="park",
trigger=pgtrigger.compiler.Trigger(
name="update_update",
sql=pgtrigger.compiler.UpsertTriggerSql(
condition="WHEN (OLD.* IS DISTINCT FROM NEW.*)",
func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "timezone", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'update\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."timezone", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
hash="787e3176b96b506020f056ee1122d90d25e4cb0d",
operation="UPDATE",
pgid="pgtrigger_update_update_19f56",
table="parks_park",
when="AFTER",
),
),
),
]

View File

@@ -0,0 +1,12 @@
# Generated by Django 5.2.5 on 2025-09-15 01:03
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("parks", "0017_add_timezone_to_pghistory_triggers"),
]
operations = []

View File

@@ -0,0 +1,52 @@
# Generated manually to fix pghistory timezone issue
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("parks", "0018_auto_20250914_2103"),
]
operations = [
migrations.RunSQL(
sql="""
-- Drop the existing trigger function
DROP FUNCTION IF EXISTS pgtrigger_insert_insert_66883() CASCADE;
-- Recreate the trigger function with timezone field
CREATE OR REPLACE FUNCTION pgtrigger_insert_insert_66883()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO "parks_parkevent" (
"average_rating", "banner_image_id", "card_image_id", "closing_date",
"coaster_count", "created_at", "description", "id", "name", "opening_date",
"opening_year", "operating_season", "operator_id", "park_type",
"pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id",
"property_owner_id", "ride_count", "search_text", "size_acres",
"slug", "status", "updated_at", "url", "website", "timezone"
) VALUES (
NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date",
NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date",
NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type",
_pgh_attach_context(), NOW(), 'insert', NEW."id",
NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres",
NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website", NEW."timezone"
);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Recreate the trigger
CREATE TRIGGER pgtrigger_insert_insert_66883
AFTER INSERT ON parks_park
FOR EACH ROW
EXECUTE FUNCTION pgtrigger_insert_insert_66883();
""",
reverse_sql="""
-- This is irreversible, but we can drop and recreate without timezone
DROP FUNCTION IF EXISTS pgtrigger_insert_insert_66883() CASCADE;
"""
),
]

View File

@@ -0,0 +1,52 @@
# Generated manually to fix pghistory UPDATE timezone issue
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("parks", "0019_fix_pghistory_timezone"),
]
operations = [
migrations.RunSQL(
sql="""
-- Drop the existing UPDATE trigger function
DROP FUNCTION IF EXISTS pgtrigger_update_update_19f56() CASCADE;
-- Recreate the UPDATE trigger function with timezone field
CREATE OR REPLACE FUNCTION pgtrigger_update_update_19f56()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO "parks_parkevent" (
"average_rating", "banner_image_id", "card_image_id", "closing_date",
"coaster_count", "created_at", "description", "id", "name", "opening_date",
"opening_year", "operating_season", "operator_id", "park_type",
"pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id",
"property_owner_id", "ride_count", "search_text", "size_acres",
"slug", "status", "updated_at", "url", "website", "timezone"
) VALUES (
NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date",
NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date",
NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type",
_pgh_attach_context(), NOW(), 'update', NEW."id",
NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres",
NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website", NEW."timezone"
);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Recreate the UPDATE trigger
CREATE TRIGGER pgtrigger_update_update_19f56
AFTER UPDATE ON parks_park
FOR EACH ROW
EXECUTE FUNCTION pgtrigger_update_update_19f56();
""",
reverse_sql="""
-- This is irreversible, but we can drop and recreate without timezone
DROP FUNCTION IF EXISTS pgtrigger_update_update_19f56() CASCADE;
"""
),
]

View File

@@ -124,6 +124,25 @@ class Park(TrackedModel):
# Frontend URL
url = models.URLField(blank=True, help_text="Frontend URL for this park")
# Computed fields for hybrid filtering
opening_year = models.IntegerField(
null=True,
blank=True,
db_index=True,
help_text="Year the park opened (computed from opening_date)"
)
search_text = models.TextField(
blank=True,
db_index=True,
help_text="Searchable text combining name, description, location, and operator"
)
# Timezone for park operations
timezone = models.CharField(
max_length=50,
help_text="Timezone identifier for park operations (e.g., 'America/New_York')"
)
class Meta:
ordering = ["name"]
constraints = [
@@ -198,6 +217,9 @@ class Park(TrackedModel):
frontend_domain = getattr(settings, "FRONTEND_DOMAIN", "https://thrillwiki.com")
self.url = f"{frontend_domain}/parks/{self.slug}/"
# Populate computed fields for hybrid filtering
self._populate_computed_fields()
# Save the model
super().save(*args, **kwargs)
@@ -209,6 +231,44 @@ class Park(TrackedModel):
slug=old_slug,
)
def _populate_computed_fields(self) -> None:
"""Populate computed fields for hybrid filtering"""
# Populate opening_year from opening_date
if self.opening_date:
self.opening_year = self.opening_date.year
else:
self.opening_year = None
# Populate search_text for client-side filtering
search_parts = [self.name]
if self.description:
search_parts.append(self.description)
# Add location information if available
try:
if hasattr(self, 'location') and self.location:
if self.location.city:
search_parts.append(self.location.city)
if self.location.state:
search_parts.append(self.location.state)
if self.location.country:
search_parts.append(self.location.country)
except Exception:
# Handle case where location relationship doesn't exist yet
pass
# Add operator information
if self.operator:
search_parts.append(self.operator.name)
# Add property owner information if different
if self.property_owner and self.property_owner != self.operator:
search_parts.append(self.property_owner.name)
# Combine all parts into searchable text
self.search_text = ' '.join(filter(None, search_parts)).lower()
def clean(self):
super().clean()
if self.operator and "OPERATOR" not in self.operator.roles:

View File

@@ -0,0 +1,425 @@
"""
Smart Park Loader for Hybrid Filtering Strategy
This module provides intelligent data loading capabilities for the hybrid filtering approach,
optimizing database queries and implementing progressive loading strategies.
"""
from typing import Dict, List, Optional, Any, Tuple
from django.db import models
from django.core.cache import cache
from django.conf import settings
from apps.parks.models import Park
class SmartParkLoader:
"""
Intelligent park data loader that optimizes queries based on filtering requirements.
Implements progressive loading and smart caching strategies.
"""
# Cache configuration
CACHE_TIMEOUT = getattr(settings, 'HYBRID_FILTER_CACHE_TIMEOUT', 300) # 5 minutes
CACHE_KEY_PREFIX = 'hybrid_parks'
# Progressive loading thresholds
INITIAL_LOAD_SIZE = 50
PROGRESSIVE_LOAD_SIZE = 25
MAX_CLIENT_SIDE_RECORDS = 200
def __init__(self):
self.base_queryset = self._get_optimized_queryset()
def _get_optimized_queryset(self) -> models.QuerySet:
"""Get optimized base queryset with all necessary prefetches."""
return Park.objects.select_related(
'operator',
'property_owner',
'banner_image',
'card_image',
).prefetch_related(
'location', # ParkLocation relationship
).filter(
# Only include operating and temporarily closed parks by default
status__in=['OPERATING', 'CLOSED_TEMP']
).order_by('name')
def get_initial_load(self, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Get initial park data load with smart filtering decisions.
Args:
filters: Optional filters to apply
Returns:
Dictionary containing parks data and metadata
"""
cache_key = self._generate_cache_key('initial', filters)
cached_result = cache.get(cache_key)
if cached_result:
return cached_result
# Apply filters if provided
queryset = self.base_queryset
if filters:
queryset = self._apply_filters(queryset, filters)
# Get total count for pagination decisions
total_count = queryset.count()
# Determine loading strategy
if total_count <= self.MAX_CLIENT_SIDE_RECORDS:
# Load all data for client-side filtering
parks = list(queryset.all())
strategy = 'client_side'
has_more = False
else:
# Load initial batch for server-side pagination
parks = list(queryset[:self.INITIAL_LOAD_SIZE])
strategy = 'server_side'
has_more = total_count > self.INITIAL_LOAD_SIZE
result = {
'parks': parks,
'total_count': total_count,
'strategy': strategy,
'has_more': has_more,
'next_offset': len(parks) if has_more else None,
'filter_metadata': self._get_filter_metadata(queryset),
}
# Cache the result
cache.set(cache_key, result, self.CACHE_TIMEOUT)
return result
def get_progressive_load(
self,
offset: int,
filters: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Get next batch of parks for progressive loading.
Args:
offset: Starting offset for the batch
filters: Optional filters to apply
Returns:
Dictionary containing parks data and metadata
"""
cache_key = self._generate_cache_key(f'progressive_{offset}', filters)
cached_result = cache.get(cache_key)
if cached_result:
return cached_result
# Apply filters if provided
queryset = self.base_queryset
if filters:
queryset = self._apply_filters(queryset, filters)
# Get the batch
end_offset = offset + self.PROGRESSIVE_LOAD_SIZE
parks = list(queryset[offset:end_offset])
# Check if there are more records
total_count = queryset.count()
has_more = end_offset < total_count
result = {
'parks': parks,
'total_count': total_count,
'has_more': has_more,
'next_offset': end_offset if has_more else None,
}
# Cache the result
cache.set(cache_key, result, self.CACHE_TIMEOUT)
return result
def get_filter_metadata(self, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Get metadata about available filter options.
Args:
filters: Current filters to scope the metadata
Returns:
Dictionary containing filter metadata
"""
cache_key = self._generate_cache_key('metadata', filters)
cached_result = cache.get(cache_key)
if cached_result:
return cached_result
# Apply filters if provided
queryset = self.base_queryset
if filters:
queryset = self._apply_filters(queryset, filters)
result = self._get_filter_metadata(queryset)
# Cache the result
cache.set(cache_key, result, self.CACHE_TIMEOUT)
return result
def _apply_filters(self, queryset: models.QuerySet, filters: Dict[str, Any]) -> models.QuerySet:
"""Apply filters to the queryset."""
# Status filter
if 'status' in filters and filters['status']:
if isinstance(filters['status'], list):
queryset = queryset.filter(status__in=filters['status'])
else:
queryset = queryset.filter(status=filters['status'])
# Park type filter
if 'park_type' in filters and filters['park_type']:
if isinstance(filters['park_type'], list):
queryset = queryset.filter(park_type__in=filters['park_type'])
else:
queryset = queryset.filter(park_type=filters['park_type'])
# Country filter
if 'country' in filters and filters['country']:
queryset = queryset.filter(location__country__in=filters['country'])
# State filter
if 'state' in filters and filters['state']:
queryset = queryset.filter(location__state__in=filters['state'])
# Opening year range
if 'opening_year_min' in filters and filters['opening_year_min']:
queryset = queryset.filter(opening_year__gte=filters['opening_year_min'])
if 'opening_year_max' in filters and filters['opening_year_max']:
queryset = queryset.filter(opening_year__lte=filters['opening_year_max'])
# Size range
if 'size_min' in filters and filters['size_min']:
queryset = queryset.filter(size_acres__gte=filters['size_min'])
if 'size_max' in filters and filters['size_max']:
queryset = queryset.filter(size_acres__lte=filters['size_max'])
# Rating range
if 'rating_min' in filters and filters['rating_min']:
queryset = queryset.filter(average_rating__gte=filters['rating_min'])
if 'rating_max' in filters and filters['rating_max']:
queryset = queryset.filter(average_rating__lte=filters['rating_max'])
# Ride count range
if 'ride_count_min' in filters and filters['ride_count_min']:
queryset = queryset.filter(ride_count__gte=filters['ride_count_min'])
if 'ride_count_max' in filters and filters['ride_count_max']:
queryset = queryset.filter(ride_count__lte=filters['ride_count_max'])
# Coaster count range
if 'coaster_count_min' in filters and filters['coaster_count_min']:
queryset = queryset.filter(coaster_count__gte=filters['coaster_count_min'])
if 'coaster_count_max' in filters and filters['coaster_count_max']:
queryset = queryset.filter(coaster_count__lte=filters['coaster_count_max'])
# Operator filter
if 'operator' in filters and filters['operator']:
if isinstance(filters['operator'], list):
queryset = queryset.filter(operator__slug__in=filters['operator'])
else:
queryset = queryset.filter(operator__slug=filters['operator'])
# Search query
if 'search' in filters and filters['search']:
search_term = filters['search'].lower()
queryset = queryset.filter(search_text__icontains=search_term)
return queryset
def _get_filter_metadata(self, queryset: models.QuerySet) -> Dict[str, Any]:
"""Generate filter metadata from the current queryset."""
# Get distinct values for categorical filters with counts
countries_data = list(
queryset.values('location__country')
.exclude(location__country__isnull=True)
.annotate(count=models.Count('id'))
.order_by('location__country')
)
states_data = list(
queryset.values('location__state')
.exclude(location__state__isnull=True)
.annotate(count=models.Count('id'))
.order_by('location__state')
)
park_types_data = list(
queryset.values('park_type')
.exclude(park_type__isnull=True)
.annotate(count=models.Count('id'))
.order_by('park_type')
)
statuses_data = list(
queryset.values('status')
.annotate(count=models.Count('id'))
.order_by('status')
)
operators_data = list(
queryset.select_related('operator')
.values('operator__id', 'operator__name', 'operator__slug')
.exclude(operator__isnull=True)
.annotate(count=models.Count('id'))
.order_by('operator__name')
)
# Convert to frontend-expected format with value/label/count
countries = [
{
'value': item['location__country'],
'label': item['location__country'],
'count': item['count']
}
for item in countries_data
]
states = [
{
'value': item['location__state'],
'label': item['location__state'],
'count': item['count']
}
for item in states_data
]
park_types = [
{
'value': item['park_type'],
'label': item['park_type'],
'count': item['count']
}
for item in park_types_data
]
statuses = [
{
'value': item['status'],
'label': self._get_status_label(item['status']),
'count': item['count']
}
for item in statuses_data
]
operators = [
{
'value': item['operator__slug'],
'label': item['operator__name'],
'count': item['count']
}
for item in operators_data
]
# Get ranges for numerical filters
aggregates = queryset.aggregate(
opening_year_min=models.Min('opening_year'),
opening_year_max=models.Max('opening_year'),
size_min=models.Min('size_acres'),
size_max=models.Max('size_acres'),
rating_min=models.Min('average_rating'),
rating_max=models.Max('average_rating'),
ride_count_min=models.Min('ride_count'),
ride_count_max=models.Max('ride_count'),
coaster_count_min=models.Min('coaster_count'),
coaster_count_max=models.Max('coaster_count'),
)
return {
'categorical': {
'countries': countries,
'states': states,
'park_types': park_types,
'statuses': statuses,
'operators': operators,
},
'ranges': {
'opening_year': {
'min': aggregates['opening_year_min'],
'max': aggregates['opening_year_max'],
'step': 1,
'unit': 'year'
},
'size_acres': {
'min': float(aggregates['size_min']) if aggregates['size_min'] else None,
'max': float(aggregates['size_max']) if aggregates['size_max'] else None,
'step': 1.0,
'unit': 'acres'
},
'average_rating': {
'min': float(aggregates['rating_min']) if aggregates['rating_min'] else None,
'max': float(aggregates['rating_max']) if aggregates['rating_max'] else None,
'step': 0.1,
'unit': 'stars'
},
'ride_count': {
'min': aggregates['ride_count_min'],
'max': aggregates['ride_count_max'],
'step': 1,
'unit': 'rides'
},
'coaster_count': {
'min': aggregates['coaster_count_min'],
'max': aggregates['coaster_count_max'],
'step': 1,
'unit': 'coasters'
},
},
'total_count': queryset.count(),
}
def _get_status_label(self, status: str) -> str:
"""Convert status code to human-readable label."""
status_labels = {
'OPERATING': 'Operating',
'CLOSED_TEMP': 'Temporarily Closed',
'CLOSED_PERM': 'Permanently Closed',
'UNDER_CONSTRUCTION': 'Under Construction',
}
return status_labels.get(status, status)
def _generate_cache_key(self, operation: str, filters: Optional[Dict[str, Any]] = None) -> str:
"""Generate cache key for the given operation and filters."""
key_parts = [self.CACHE_KEY_PREFIX, operation]
if filters:
# Create a consistent string representation of filters
filter_str = '_'.join(f"{k}:{v}" for k, v in sorted(filters.items()) if v)
key_parts.append(filter_str)
return '_'.join(key_parts)
def invalidate_cache(self, filters: Optional[Dict[str, Any]] = None) -> None:
"""Invalidate cached data for the given filters."""
# This is a simplified implementation
# In production, you might want to use cache versioning or tags
cache_keys = [
self._generate_cache_key('initial', filters),
self._generate_cache_key('metadata', filters),
]
# Also invalidate progressive load caches
for offset in range(0, 1000, self.PROGRESSIVE_LOAD_SIZE):
cache_keys.append(self._generate_cache_key(f'progressive_{offset}', filters))
cache.delete_many(cache_keys)
# Singleton instance
smart_park_loader = SmartParkLoader()