Implement hybrid filtering strategy for parks and rides

- Added comprehensive documentation for hybrid filtering implementation, including architecture, API endpoints, performance characteristics, and usage examples. - Developed a hybrid pagination and client-side filtering recommendation, detailing server-side responsibilities and client-side logic. - Created a test script for hybrid filtering endpoints, covering various test cases including basic filtering, search functionality, pagination, and edge cases.
2026-02-05 15:15:18 -05:00 · 2025-09-14 21:07:17 -04:00
parent 0fd6dc2560
commit 35f8d0ef8f
42 changed files with 8490 additions and 224 deletions
--- a/backend/apps/parks/migrations/0014_add_hybrid_filtering_fields.py
+++ b/backend/apps/parks/migrations/0014_add_hybrid_filtering_fields.py
@@ -0,0 +1,88 @@
+# Generated by Django 5.2.5 on 2025-09-14 19:01
+
+import pgtrigger.compiler
+import pgtrigger.migrations
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("parks", "0013_remove_park_insert_insert_remove_park_update_update_and_more"),
+    ]
+
+    operations = [
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="park",
+            name="insert_insert",
+        ),
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="park",
+            name="update_update",
+        ),
+        migrations.AddField(
+            model_name="park",
+            name="opening_year",
+            field=models.IntegerField(
+                blank=True,
+                db_index=True,
+                help_text="Year the park opened (computed from opening_date)",
+                null=True,
+            ),
+        ),
+        migrations.AddField(
+            model_name="park",
+            name="search_text",
+            field=models.TextField(
+                blank=True,
+                db_index=True,
+                help_text="Searchable text combining name, description, location, and operator",
+            ),
+        ),
+        migrations.AddField(
+            model_name="parkevent",
+            name="opening_year",
+            field=models.IntegerField(
+                blank=True,
+                help_text="Year the park opened (computed from opening_date)",
+                null=True,
+            ),
+        ),
+        migrations.AddField(
+            model_name="parkevent",
+            name="search_text",
+            field=models.TextField(
+                blank=True,
+                help_text="Searchable text combining name, description, location, and operator",
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="park",
+            trigger=pgtrigger.compiler.Trigger(
+                name="insert_insert",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'insert\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
+                    hash="39ac89dc193467b8b41f06ff15903f0a3e22f6b0",
+                    operation="INSERT",
+                    pgid="pgtrigger_insert_insert_66883",
+                    table="parks_park",
+                    when="AFTER",
+                ),
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="park",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_update",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    condition="WHEN (OLD.* IS DISTINCT FROM NEW.*)",
+                    func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'update\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
+                    hash="af7925b4ef24b42c66b7795b9e0c6c8f510e597c",
+                    operation="UPDATE",
+                    pgid="pgtrigger_update_update_19f56",
+                    table="parks_park",
+                    when="AFTER",
+                ),
+            ),
+        ),
+    ]
--- a/backend/apps/parks/migrations/0015_populate_hybrid_filtering_fields.py
+++ b/backend/apps/parks/migrations/0015_populate_hybrid_filtering_fields.py
@@ -0,0 +1,64 @@
+# Generated by Django 5.2.5 on 2025-09-14 19:01
+
+from django.db import migrations
+
+
+def populate_computed_fields(apps, schema_editor):
+    """Populate computed fields for existing parks using raw SQL with disabled triggers"""
+    
+    # Temporarily disable pghistory triggers
+    schema_editor.execute("ALTER TABLE parks_park DISABLE TRIGGER ALL;")
+    
+    try:
+        # Use raw SQL to update opening_year from opening_date
+        schema_editor.execute("""
+            UPDATE parks_park 
+            SET opening_year = EXTRACT(YEAR FROM opening_date)
+            WHERE opening_date IS NOT NULL;
+        """)
+        
+        # Use raw SQL to populate search_text
+        # This is a simplified version - we'll populate it with just name and description
+        schema_editor.execute("""
+            UPDATE parks_park 
+            SET search_text = LOWER(
+                COALESCE(name, '') || ' ' || 
+                COALESCE(description, '')
+            );
+        """)
+        
+        # Update search_text to include operator names using a join
+        schema_editor.execute("""
+            UPDATE parks_park 
+            SET search_text = LOWER(
+                COALESCE(parks_park.name, '') || ' ' || 
+                COALESCE(parks_park.description, '') || ' ' ||
+                COALESCE(parks_company.name, '')
+            )
+            FROM parks_company
+            WHERE parks_park.operator_id = parks_company.id;
+        """)
+        
+    finally:
+        # Re-enable pghistory triggers
+        schema_editor.execute("ALTER TABLE parks_park ENABLE TRIGGER ALL;")
+
+
+def reverse_populate_computed_fields(apps, schema_editor):
+    """Clear computed fields (reverse operation)"""
+    Park = apps.get_model('parks', 'Park')
+    Park.objects.update(opening_year=None, search_text='')
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("parks", "0014_add_hybrid_filtering_fields"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            populate_computed_fields,
+            reverse_populate_computed_fields,
+        ),
+    ]
--- a/backend/apps/parks/migrations/0016_add_hybrid_filtering_indexes.py
+++ b/backend/apps/parks/migrations/0016_add_hybrid_filtering_indexes.py
@@ -0,0 +1,85 @@
+# Generated by Django 5.2.5 on 2025-09-14 19:12
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("parks", "0015_populate_hybrid_filtering_fields"),
+    ]
+
+    operations = [
+        # Composite indexes for common filter combinations
+        migrations.RunSQL(
+            "CREATE INDEX IF NOT EXISTS parks_park_status_park_type_idx ON parks_park (status, park_type);",
+            reverse_sql="DROP INDEX IF EXISTS parks_park_status_park_type_idx;"
+        ),
+        migrations.RunSQL(
+            "CREATE INDEX IF NOT EXISTS parks_park_opening_year_status_idx ON parks_park (opening_year, status) WHERE opening_year IS NOT NULL;",
+            reverse_sql="DROP INDEX IF EXISTS parks_park_opening_year_status_idx;"
+        ),
+        migrations.RunSQL(
+            "CREATE INDEX IF NOT EXISTS parks_park_size_rating_idx ON parks_park (size_acres, average_rating) WHERE size_acres IS NOT NULL AND average_rating IS NOT NULL;",
+            reverse_sql="DROP INDEX IF EXISTS parks_park_size_rating_idx;"
+        ),
+        migrations.RunSQL(
+            "CREATE INDEX IF NOT EXISTS parks_park_ride_coaster_count_idx ON parks_park (ride_count, coaster_count) WHERE ride_count IS NOT NULL AND coaster_count IS NOT NULL;",
+            reverse_sql="DROP INDEX IF EXISTS parks_park_ride_coaster_count_idx;"
+        ),
+        
+        # Full-text search index for search_text field
+        migrations.RunSQL(
+            "CREATE INDEX IF NOT EXISTS parks_park_search_text_gin_idx ON parks_park USING gin(to_tsvector('english', search_text));",
+            reverse_sql="DROP INDEX IF EXISTS parks_park_search_text_gin_idx;"
+        ),
+        
+        # Trigram index for fuzzy search on search_text
+        migrations.RunSQL(
+            "CREATE EXTENSION IF NOT EXISTS pg_trgm;",
+            reverse_sql="-- Cannot drop extension as it might be used elsewhere"
+        ),
+        migrations.RunSQL(
+            "CREATE INDEX IF NOT EXISTS parks_park_search_text_trgm_idx ON parks_park USING gin(search_text gin_trgm_ops);",
+            reverse_sql="DROP INDEX IF EXISTS parks_park_search_text_trgm_idx;"
+        ),
+        
+        # Indexes for location-based filtering (assuming location relationship exists)
+        migrations.RunSQL(
+            """
+            CREATE INDEX IF NOT EXISTS parks_parklocation_country_state_idx 
+            ON parks_parklocation (country, state) 
+            WHERE country IS NOT NULL AND state IS NOT NULL;
+            """,
+            reverse_sql="DROP INDEX IF EXISTS parks_parklocation_country_state_idx;"
+        ),
+        
+        # Index for operator-based filtering
+        migrations.RunSQL(
+            "CREATE INDEX IF NOT EXISTS parks_park_operator_status_idx ON parks_park (operator_id, status);",
+            reverse_sql="DROP INDEX IF EXISTS parks_park_operator_status_idx;"
+        ),
+        
+        # Partial indexes for common status filters
+        migrations.RunSQL(
+            "CREATE INDEX IF NOT EXISTS parks_park_operating_parks_idx ON parks_park (name, opening_year) WHERE status IN ('OPERATING', 'CLOSED_TEMP');",
+            reverse_sql="DROP INDEX IF EXISTS parks_park_operating_parks_idx;"
+        ),
+        
+        # Index for ordering by name (already exists but ensuring it's optimized)
+        migrations.RunSQL(
+            "CREATE INDEX IF NOT EXISTS parks_park_name_lower_idx ON parks_park (LOWER(name));",
+            reverse_sql="DROP INDEX IF EXISTS parks_park_name_lower_idx;"
+        ),
+        
+        # Covering index for common query patterns
+        migrations.RunSQL(
+            """
+            CREATE INDEX IF NOT EXISTS parks_park_hybrid_covering_idx 
+            ON parks_park (status, park_type, opening_year) 
+            INCLUDE (name, slug, size_acres, average_rating, ride_count, coaster_count, operator_id)
+            WHERE status IN ('OPERATING', 'CLOSED_TEMP');
+            """,
+            reverse_sql="DROP INDEX IF EXISTS parks_park_hybrid_covering_idx;"
+        ),
+    ]
--- a/backend/apps/parks/migrations/0017_add_timezone_to_pghistory_triggers.py
+++ b/backend/apps/parks/migrations/0017_add_timezone_to_pghistory_triggers.py
@@ -0,0 +1,73 @@
+# Generated by Django 5.2.5 on 2025-09-15 00:50
+
+import django.utils.timezone
+import pgtrigger.compiler
+import pgtrigger.migrations
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("parks", "0016_add_hybrid_filtering_indexes"),
+    ]
+
+    operations = [
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="park",
+            name="insert_insert",
+        ),
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="park",
+            name="update_update",
+        ),
+        migrations.AddField(
+            model_name="park",
+            name="timezone",
+            field=models.CharField(
+                default=django.utils.timezone.now,
+                help_text="Timezone identifier for park operations (e.g., 'America/New_York')",
+                max_length=50,
+            ),
+            preserve_default=False,
+        ),
+        migrations.AddField(
+            model_name="parkevent",
+            name="timezone",
+            field=models.CharField(
+                default=django.utils.timezone.now,
+                help_text="Timezone identifier for park operations (e.g., 'America/New_York')",
+                max_length=50,
+            ),
+            preserve_default=False,
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="park",
+            trigger=pgtrigger.compiler.Trigger(
+                name="insert_insert",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "timezone", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'insert\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."timezone", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
+                    hash="9da686bd8a1881fe7a3fdfebc14411680fe47527",
+                    operation="INSERT",
+                    pgid="pgtrigger_insert_insert_66883",
+                    table="parks_park",
+                    when="AFTER",
+                ),
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="park",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_update",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    condition="WHEN (OLD.* IS DISTINCT FROM NEW.*)",
+                    func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "timezone", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'update\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."timezone", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
+                    hash="787e3176b96b506020f056ee1122d90d25e4cb0d",
+                    operation="UPDATE",
+                    pgid="pgtrigger_update_update_19f56",
+                    table="parks_park",
+                    when="AFTER",
+                ),
+            ),
+        ),
+    ]
--- a/backend/apps/parks/migrations/0018_auto_20250914_2103.py
+++ b/backend/apps/parks/migrations/0018_auto_20250914_2103.py
@@ -0,0 +1,12 @@
+# Generated by Django 5.2.5 on 2025-09-15 01:03
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("parks", "0017_add_timezone_to_pghistory_triggers"),
+    ]
+
+    operations = []
--- a/backend/apps/parks/migrations/0019_fix_pghistory_timezone.py
+++ b/backend/apps/parks/migrations/0019_fix_pghistory_timezone.py
@@ -0,0 +1,52 @@
+# Generated manually to fix pghistory timezone issue
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("parks", "0018_auto_20250914_2103"),
+    ]
+
+    operations = [
+        migrations.RunSQL(
+            sql="""
+            -- Drop the existing trigger function
+            DROP FUNCTION IF EXISTS pgtrigger_insert_insert_66883() CASCADE;
+            
+            -- Recreate the trigger function with timezone field
+            CREATE OR REPLACE FUNCTION pgtrigger_insert_insert_66883()
+            RETURNS TRIGGER AS $$
+            BEGIN
+                INSERT INTO "parks_parkevent" (
+                    "average_rating", "banner_image_id", "card_image_id", "closing_date", 
+                    "coaster_count", "created_at", "description", "id", "name", "opening_date", 
+                    "opening_year", "operating_season", "operator_id", "park_type", 
+                    "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", 
+                    "property_owner_id", "ride_count", "search_text", "size_acres", 
+                    "slug", "status", "updated_at", "url", "website", "timezone"
+                ) VALUES (
+                    NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date",
+                    NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date",
+                    NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type",
+                    _pgh_attach_context(), NOW(), 'insert', NEW."id",
+                    NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres",
+                    NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website", NEW."timezone"
+                );
+                RETURN NEW;
+            END;
+            $$ LANGUAGE plpgsql;
+            
+            -- Recreate the trigger
+            CREATE TRIGGER pgtrigger_insert_insert_66883
+                AFTER INSERT ON parks_park
+                FOR EACH ROW
+                EXECUTE FUNCTION pgtrigger_insert_insert_66883();
+            """,
+            reverse_sql="""
+            -- This is irreversible, but we can drop and recreate without timezone
+            DROP FUNCTION IF EXISTS pgtrigger_insert_insert_66883() CASCADE;
+            """
+        ),
+    ]
--- a/backend/apps/parks/migrations/0020_fix_pghistory_update_timezone.py
+++ b/backend/apps/parks/migrations/0020_fix_pghistory_update_timezone.py
@@ -0,0 +1,52 @@
+# Generated manually to fix pghistory UPDATE timezone issue
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("parks", "0019_fix_pghistory_timezone"),
+    ]
+
+    operations = [
+        migrations.RunSQL(
+            sql="""
+            -- Drop the existing UPDATE trigger function
+            DROP FUNCTION IF EXISTS pgtrigger_update_update_19f56() CASCADE;
+            
+            -- Recreate the UPDATE trigger function with timezone field
+            CREATE OR REPLACE FUNCTION pgtrigger_update_update_19f56()
+            RETURNS TRIGGER AS $$
+            BEGIN
+                INSERT INTO "parks_parkevent" (
+                    "average_rating", "banner_image_id", "card_image_id", "closing_date", 
+                    "coaster_count", "created_at", "description", "id", "name", "opening_date", 
+                    "opening_year", "operating_season", "operator_id", "park_type", 
+                    "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", 
+                    "property_owner_id", "ride_count", "search_text", "size_acres", 
+                    "slug", "status", "updated_at", "url", "website", "timezone"
+                ) VALUES (
+                    NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date",
+                    NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date",
+                    NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type",
+                    _pgh_attach_context(), NOW(), 'update', NEW."id",
+                    NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres",
+                    NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website", NEW."timezone"
+                );
+                RETURN NEW;
+            END;
+            $$ LANGUAGE plpgsql;
+            
+            -- Recreate the UPDATE trigger
+            CREATE TRIGGER pgtrigger_update_update_19f56
+                AFTER UPDATE ON parks_park
+                FOR EACH ROW
+                EXECUTE FUNCTION pgtrigger_update_update_19f56();
+            """,
+            reverse_sql="""
+            -- This is irreversible, but we can drop and recreate without timezone
+            DROP FUNCTION IF EXISTS pgtrigger_update_update_19f56() CASCADE;
+            """
+        ),
+    ]
--- a/backend/apps/parks/models/parks.py
+++ b/backend/apps/parks/models/parks.py
@@ -124,6 +124,25 @@ class Park(TrackedModel):
    # Frontend URL
    url = models.URLField(blank=True, help_text="Frontend URL for this park")

+    # Computed fields for hybrid filtering
+    opening_year = models.IntegerField(
+        null=True, 
+        blank=True, 
+        db_index=True,
+        help_text="Year the park opened (computed from opening_date)"
+    )
+    search_text = models.TextField(
+        blank=True, 
+        db_index=True,
+        help_text="Searchable text combining name, description, location, and operator"
+    )
+    
+    # Timezone for park operations
+    timezone = models.CharField(
+        max_length=50,
+        help_text="Timezone identifier for park operations (e.g., 'America/New_York')"
+    )
+
    class Meta:
        ordering = ["name"]
        constraints = [
@@ -198,6 +217,9 @@ class Park(TrackedModel):
        frontend_domain = getattr(settings, "FRONTEND_DOMAIN", "https://thrillwiki.com")
        self.url = f"{frontend_domain}/parks/{self.slug}/"

+        # Populate computed fields for hybrid filtering
+        self._populate_computed_fields()
+
        # Save the model
        super().save(*args, **kwargs)

@@ -209,6 +231,44 @@ class Park(TrackedModel):
                slug=old_slug,
            )

+    def _populate_computed_fields(self) -> None:
+        """Populate computed fields for hybrid filtering"""
+        # Populate opening_year from opening_date
+        if self.opening_date:
+            self.opening_year = self.opening_date.year
+        else:
+            self.opening_year = None
+        
+        # Populate search_text for client-side filtering
+        search_parts = [self.name]
+        
+        if self.description:
+            search_parts.append(self.description)
+        
+        # Add location information if available
+        try:
+            if hasattr(self, 'location') and self.location:
+                if self.location.city:
+                    search_parts.append(self.location.city)
+                if self.location.state:
+                    search_parts.append(self.location.state)
+                if self.location.country:
+                    search_parts.append(self.location.country)
+        except Exception:
+            # Handle case where location relationship doesn't exist yet
+            pass
+        
+        # Add operator information
+        if self.operator:
+            search_parts.append(self.operator.name)
+        
+        # Add property owner information if different
+        if self.property_owner and self.property_owner != self.operator:
+            search_parts.append(self.property_owner.name)
+        
+        # Combine all parts into searchable text
+        self.search_text = ' '.join(filter(None, search_parts)).lower()
+
    def clean(self):
        super().clean()
        if self.operator and "OPERATOR" not in self.operator.roles:
--- a/backend/apps/parks/services/hybrid_loader.py
+++ b/backend/apps/parks/services/hybrid_loader.py
@@ -0,0 +1,425 @@
+"""
+Smart Park Loader for Hybrid Filtering Strategy
+
+This module provides intelligent data loading capabilities for the hybrid filtering approach,
+optimizing database queries and implementing progressive loading strategies.
+"""
+
+from typing import Dict, List, Optional, Any, Tuple
+from django.db import models
+from django.core.cache import cache
+from django.conf import settings
+from apps.parks.models import Park
+
+
+class SmartParkLoader:
+    """
+    Intelligent park data loader that optimizes queries based on filtering requirements.
+    Implements progressive loading and smart caching strategies.
+    """
+    
+    # Cache configuration
+    CACHE_TIMEOUT = getattr(settings, 'HYBRID_FILTER_CACHE_TIMEOUT', 300)  # 5 minutes
+    CACHE_KEY_PREFIX = 'hybrid_parks'
+    
+    # Progressive loading thresholds
+    INITIAL_LOAD_SIZE = 50
+    PROGRESSIVE_LOAD_SIZE = 25
+    MAX_CLIENT_SIDE_RECORDS = 200
+    
+    def __init__(self):
+        self.base_queryset = self._get_optimized_queryset()
+    
+    def _get_optimized_queryset(self) -> models.QuerySet:
+        """Get optimized base queryset with all necessary prefetches."""
+        return Park.objects.select_related(
+            'operator',
+            'property_owner',
+            'banner_image',
+            'card_image',
+        ).prefetch_related(
+            'location',  # ParkLocation relationship
+        ).filter(
+            # Only include operating and temporarily closed parks by default
+            status__in=['OPERATING', 'CLOSED_TEMP']
+        ).order_by('name')
+    
+    def get_initial_load(self, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        """
+        Get initial park data load with smart filtering decisions.
+        
+        Args:
+            filters: Optional filters to apply
+            
+        Returns:
+            Dictionary containing parks data and metadata
+        """
+        cache_key = self._generate_cache_key('initial', filters)
+        cached_result = cache.get(cache_key)
+        
+        if cached_result:
+            return cached_result
+        
+        # Apply filters if provided
+        queryset = self.base_queryset
+        if filters:
+            queryset = self._apply_filters(queryset, filters)
+        
+        # Get total count for pagination decisions
+        total_count = queryset.count()
+        
+        # Determine loading strategy
+        if total_count <= self.MAX_CLIENT_SIDE_RECORDS:
+            # Load all data for client-side filtering
+            parks = list(queryset.all())
+            strategy = 'client_side'
+            has_more = False
+        else:
+            # Load initial batch for server-side pagination
+            parks = list(queryset[:self.INITIAL_LOAD_SIZE])
+            strategy = 'server_side'
+            has_more = total_count > self.INITIAL_LOAD_SIZE
+        
+        result = {
+            'parks': parks,
+            'total_count': total_count,
+            'strategy': strategy,
+            'has_more': has_more,
+            'next_offset': len(parks) if has_more else None,
+            'filter_metadata': self._get_filter_metadata(queryset),
+        }
+        
+        # Cache the result
+        cache.set(cache_key, result, self.CACHE_TIMEOUT)
+        
+        return result
+    
+    def get_progressive_load(
+        self, 
+        offset: int, 
+        filters: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        Get next batch of parks for progressive loading.
+        
+        Args:
+            offset: Starting offset for the batch
+            filters: Optional filters to apply
+            
+        Returns:
+            Dictionary containing parks data and metadata
+        """
+        cache_key = self._generate_cache_key(f'progressive_{offset}', filters)
+        cached_result = cache.get(cache_key)
+        
+        if cached_result:
+            return cached_result
+        
+        # Apply filters if provided
+        queryset = self.base_queryset
+        if filters:
+            queryset = self._apply_filters(queryset, filters)
+        
+        # Get the batch
+        end_offset = offset + self.PROGRESSIVE_LOAD_SIZE
+        parks = list(queryset[offset:end_offset])
+        
+        # Check if there are more records
+        total_count = queryset.count()
+        has_more = end_offset < total_count
+        
+        result = {
+            'parks': parks,
+            'total_count': total_count,
+            'has_more': has_more,
+            'next_offset': end_offset if has_more else None,
+        }
+        
+        # Cache the result
+        cache.set(cache_key, result, self.CACHE_TIMEOUT)
+        
+        return result
+    
+    def get_filter_metadata(self, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        """
+        Get metadata about available filter options.
+        
+        Args:
+            filters: Current filters to scope the metadata
+            
+        Returns:
+            Dictionary containing filter metadata
+        """
+        cache_key = self._generate_cache_key('metadata', filters)
+        cached_result = cache.get(cache_key)
+        
+        if cached_result:
+            return cached_result
+        
+        # Apply filters if provided
+        queryset = self.base_queryset
+        if filters:
+            queryset = self._apply_filters(queryset, filters)
+        
+        result = self._get_filter_metadata(queryset)
+        
+        # Cache the result
+        cache.set(cache_key, result, self.CACHE_TIMEOUT)
+        
+        return result
+    
+    def _apply_filters(self, queryset: models.QuerySet, filters: Dict[str, Any]) -> models.QuerySet:
+        """Apply filters to the queryset."""
+        
+        # Status filter
+        if 'status' in filters and filters['status']:
+            if isinstance(filters['status'], list):
+                queryset = queryset.filter(status__in=filters['status'])
+            else:
+                queryset = queryset.filter(status=filters['status'])
+        
+        # Park type filter
+        if 'park_type' in filters and filters['park_type']:
+            if isinstance(filters['park_type'], list):
+                queryset = queryset.filter(park_type__in=filters['park_type'])
+            else:
+                queryset = queryset.filter(park_type=filters['park_type'])
+        
+        # Country filter
+        if 'country' in filters and filters['country']:
+            queryset = queryset.filter(location__country__in=filters['country'])
+        
+        # State filter
+        if 'state' in filters and filters['state']:
+            queryset = queryset.filter(location__state__in=filters['state'])
+        
+        # Opening year range
+        if 'opening_year_min' in filters and filters['opening_year_min']:
+            queryset = queryset.filter(opening_year__gte=filters['opening_year_min'])
+        
+        if 'opening_year_max' in filters and filters['opening_year_max']:
+            queryset = queryset.filter(opening_year__lte=filters['opening_year_max'])
+        
+        # Size range
+        if 'size_min' in filters and filters['size_min']:
+            queryset = queryset.filter(size_acres__gte=filters['size_min'])
+        
+        if 'size_max' in filters and filters['size_max']:
+            queryset = queryset.filter(size_acres__lte=filters['size_max'])
+        
+        # Rating range
+        if 'rating_min' in filters and filters['rating_min']:
+            queryset = queryset.filter(average_rating__gte=filters['rating_min'])
+        
+        if 'rating_max' in filters and filters['rating_max']:
+            queryset = queryset.filter(average_rating__lte=filters['rating_max'])
+        
+        # Ride count range
+        if 'ride_count_min' in filters and filters['ride_count_min']:
+            queryset = queryset.filter(ride_count__gte=filters['ride_count_min'])
+        
+        if 'ride_count_max' in filters and filters['ride_count_max']:
+            queryset = queryset.filter(ride_count__lte=filters['ride_count_max'])
+        
+        # Coaster count range
+        if 'coaster_count_min' in filters and filters['coaster_count_min']:
+            queryset = queryset.filter(coaster_count__gte=filters['coaster_count_min'])
+        
+        if 'coaster_count_max' in filters and filters['coaster_count_max']:
+            queryset = queryset.filter(coaster_count__lte=filters['coaster_count_max'])
+        
+        # Operator filter
+        if 'operator' in filters and filters['operator']:
+            if isinstance(filters['operator'], list):
+                queryset = queryset.filter(operator__slug__in=filters['operator'])
+            else:
+                queryset = queryset.filter(operator__slug=filters['operator'])
+        
+        # Search query
+        if 'search' in filters and filters['search']:
+            search_term = filters['search'].lower()
+            queryset = queryset.filter(search_text__icontains=search_term)
+        
+        return queryset
+    
+    def _get_filter_metadata(self, queryset: models.QuerySet) -> Dict[str, Any]:
+        """Generate filter metadata from the current queryset."""
+        
+        # Get distinct values for categorical filters with counts
+        countries_data = list(
+            queryset.values('location__country')
+            .exclude(location__country__isnull=True)
+            .annotate(count=models.Count('id'))
+            .order_by('location__country')
+        )
+        
+        states_data = list(
+            queryset.values('location__state')
+            .exclude(location__state__isnull=True)
+            .annotate(count=models.Count('id'))
+            .order_by('location__state')
+        )
+        
+        park_types_data = list(
+            queryset.values('park_type')
+            .exclude(park_type__isnull=True)
+            .annotate(count=models.Count('id'))
+            .order_by('park_type')
+        )
+        
+        statuses_data = list(
+            queryset.values('status')
+            .annotate(count=models.Count('id'))
+            .order_by('status')
+        )
+        
+        operators_data = list(
+            queryset.select_related('operator')
+            .values('operator__id', 'operator__name', 'operator__slug')
+            .exclude(operator__isnull=True)
+            .annotate(count=models.Count('id'))
+            .order_by('operator__name')
+        )
+        
+        # Convert to frontend-expected format with value/label/count
+        countries = [
+            {
+                'value': item['location__country'],
+                'label': item['location__country'],
+                'count': item['count']
+            }
+            for item in countries_data
+        ]
+        
+        states = [
+            {
+                'value': item['location__state'],
+                'label': item['location__state'],
+                'count': item['count']
+            }
+            for item in states_data
+        ]
+        
+        park_types = [
+            {
+                'value': item['park_type'],
+                'label': item['park_type'],
+                'count': item['count']
+            }
+            for item in park_types_data
+        ]
+        
+        statuses = [
+            {
+                'value': item['status'],
+                'label': self._get_status_label(item['status']),
+                'count': item['count']
+            }
+            for item in statuses_data
+        ]
+        
+        operators = [
+            {
+                'value': item['operator__slug'],
+                'label': item['operator__name'],
+                'count': item['count']
+            }
+            for item in operators_data
+        ]
+        
+        # Get ranges for numerical filters
+        aggregates = queryset.aggregate(
+            opening_year_min=models.Min('opening_year'),
+            opening_year_max=models.Max('opening_year'),
+            size_min=models.Min('size_acres'),
+            size_max=models.Max('size_acres'),
+            rating_min=models.Min('average_rating'),
+            rating_max=models.Max('average_rating'),
+            ride_count_min=models.Min('ride_count'),
+            ride_count_max=models.Max('ride_count'),
+            coaster_count_min=models.Min('coaster_count'),
+            coaster_count_max=models.Max('coaster_count'),
+        )
+        
+        return {
+            'categorical': {
+                'countries': countries,
+                'states': states,
+                'park_types': park_types,
+                'statuses': statuses,
+                'operators': operators,
+            },
+            'ranges': {
+                'opening_year': {
+                    'min': aggregates['opening_year_min'],
+                    'max': aggregates['opening_year_max'],
+                    'step': 1,
+                    'unit': 'year'
+                },
+                'size_acres': {
+                    'min': float(aggregates['size_min']) if aggregates['size_min'] else None,
+                    'max': float(aggregates['size_max']) if aggregates['size_max'] else None,
+                    'step': 1.0,
+                    'unit': 'acres'
+                },
+                'average_rating': {
+                    'min': float(aggregates['rating_min']) if aggregates['rating_min'] else None,
+                    'max': float(aggregates['rating_max']) if aggregates['rating_max'] else None,
+                    'step': 0.1,
+                    'unit': 'stars'
+                },
+                'ride_count': {
+                    'min': aggregates['ride_count_min'],
+                    'max': aggregates['ride_count_max'],
+                    'step': 1,
+                    'unit': 'rides'
+                },
+                'coaster_count': {
+                    'min': aggregates['coaster_count_min'],
+                    'max': aggregates['coaster_count_max'],
+                    'step': 1,
+                    'unit': 'coasters'
+                },
+            },
+            'total_count': queryset.count(),
+        }
+    
+    def _get_status_label(self, status: str) -> str:
+        """Convert status code to human-readable label."""
+        status_labels = {
+            'OPERATING': 'Operating',
+            'CLOSED_TEMP': 'Temporarily Closed',
+            'CLOSED_PERM': 'Permanently Closed',
+            'UNDER_CONSTRUCTION': 'Under Construction',
+        }
+        return status_labels.get(status, status)
+    
+    def _generate_cache_key(self, operation: str, filters: Optional[Dict[str, Any]] = None) -> str:
+        """Generate cache key for the given operation and filters."""
+        key_parts = [self.CACHE_KEY_PREFIX, operation]
+        
+        if filters:
+            # Create a consistent string representation of filters
+            filter_str = '_'.join(f"{k}:{v}" for k, v in sorted(filters.items()) if v)
+            key_parts.append(filter_str)
+        
+        return '_'.join(key_parts)
+    
+    def invalidate_cache(self, filters: Optional[Dict[str, Any]] = None) -> None:
+        """Invalidate cached data for the given filters."""
+        # This is a simplified implementation
+        # In production, you might want to use cache versioning or tags
+        cache_keys = [
+            self._generate_cache_key('initial', filters),
+            self._generate_cache_key('metadata', filters),
+        ]
+        
+        # Also invalidate progressive load caches
+        for offset in range(0, 1000, self.PROGRESSIVE_LOAD_SIZE):
+            cache_keys.append(self._generate_cache_key(f'progressive_{offset}', filters))
+        
+        cache.delete_many(cache_keys)
+
+
+# Singleton instance
+smart_park_loader = SmartParkLoader()