Implement hybrid filtering strategy for parks and rides

- Added comprehensive documentation for hybrid filtering implementation, including architecture, API endpoints, performance characteristics, and usage examples.
- Developed a hybrid pagination and client-side filtering recommendation, detailing server-side responsibilities and client-side logic.
- Created a test script for hybrid filtering endpoints, covering various test cases including basic filtering, search functionality, pagination, and edge cases.
This commit is contained in:
pacnpal
2025-09-14 21:07:17 -04:00
parent 0fd6dc2560
commit 35f8d0ef8f
42 changed files with 8490 additions and 224 deletions

View File

@@ -0,0 +1,72 @@
# Generated by Django 5.2.5 on 2025-09-14 19:18
import pgtrigger.compiler
import pgtrigger.migrations
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("rides", "0017_remove_ridemodelphoto_insert_insert_and_more"),
]
operations = [
pgtrigger.migrations.RemoveTrigger(
model_name="ride",
name="insert_insert",
),
pgtrigger.migrations.RemoveTrigger(
model_name="ride",
name="update_update",
),
migrations.AddField(
model_name="ride",
name="opening_year",
field=models.IntegerField(blank=True, db_index=True, null=True),
),
migrations.AddField(
model_name="ride",
name="search_text",
field=models.TextField(blank=True, db_index=True),
),
migrations.AddField(
model_name="rideevent",
name="opening_year",
field=models.IntegerField(blank=True, null=True),
),
migrations.AddField(
model_name="rideevent",
name="search_text",
field=models.TextField(blank=True),
),
pgtrigger.migrations.AddTrigger(
model_name="ride",
trigger=pgtrigger.compiler.Trigger(
name="insert_insert",
sql=pgtrigger.compiler.UpsertTriggerSql(
func='INSERT INTO "rides_rideevent" ("average_rating", "banner_image_id", "capacity_per_hour", "card_image_id", "category", "closing_date", "created_at", "description", "designer_id", "id", "manufacturer_id", "max_height_in", "min_height_in", "name", "opening_date", "opening_year", "park_area_id", "park_id", "park_url", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "post_closing_status", "ride_duration_seconds", "ride_model_id", "search_text", "slug", "status", "status_since", "updated_at", "url") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."capacity_per_hour", NEW."card_image_id", NEW."category", NEW."closing_date", NEW."created_at", NEW."description", NEW."designer_id", NEW."id", NEW."manufacturer_id", NEW."max_height_in", NEW."min_height_in", NEW."name", NEW."opening_date", NEW."opening_year", NEW."park_area_id", NEW."park_id", NEW."park_url", _pgh_attach_context(), NOW(), \'insert\', NEW."id", NEW."post_closing_status", NEW."ride_duration_seconds", NEW."ride_model_id", NEW."search_text", NEW."slug", NEW."status", NEW."status_since", NEW."updated_at", NEW."url"); RETURN NULL;',
hash="64e055c574495c0f09b3cbfb12442d4e4113e4f2",
operation="INSERT",
pgid="pgtrigger_insert_insert_52074",
table="rides_ride",
when="AFTER",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="ride",
trigger=pgtrigger.compiler.Trigger(
name="update_update",
sql=pgtrigger.compiler.UpsertTriggerSql(
condition="WHEN (OLD.* IS DISTINCT FROM NEW.*)",
func='INSERT INTO "rides_rideevent" ("average_rating", "banner_image_id", "capacity_per_hour", "card_image_id", "category", "closing_date", "created_at", "description", "designer_id", "id", "manufacturer_id", "max_height_in", "min_height_in", "name", "opening_date", "opening_year", "park_area_id", "park_id", "park_url", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "post_closing_status", "ride_duration_seconds", "ride_model_id", "search_text", "slug", "status", "status_since", "updated_at", "url") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."capacity_per_hour", NEW."card_image_id", NEW."category", NEW."closing_date", NEW."created_at", NEW."description", NEW."designer_id", NEW."id", NEW."manufacturer_id", NEW."max_height_in", NEW."min_height_in", NEW."name", NEW."opening_date", NEW."opening_year", NEW."park_area_id", NEW."park_id", NEW."park_url", _pgh_attach_context(), NOW(), \'update\', NEW."id", NEW."post_closing_status", NEW."ride_duration_seconds", NEW."ride_model_id", NEW."search_text", NEW."slug", NEW."status", NEW."status_since", NEW."updated_at", NEW."url"); RETURN NULL;',
hash="6476c8dd4bbb0e2ae42ca2daa5c691b87f9119e9",
operation="UPDATE",
pgid="pgtrigger_update_update_4917a",
table="rides_ride",
when="AFTER",
),
),
),
]

View File

@@ -0,0 +1,124 @@
"""
Populate computed fields for hybrid filtering in rides.
This migration populates the opening_year and search_text fields that were added
in the previous migration. These fields enable efficient hybrid filtering by
pre-computing commonly filtered and searched data.
"""
from django.db import migrations
import pghistory
def populate_computed_fields(apps, schema_editor):
"""Populate computed fields for all existing rides."""
Ride = apps.get_model('rides', 'Ride')
# Disable pghistory triggers during bulk operations to avoid performance issues
with pghistory.context(disable=True):
rides = list(Ride.objects.all().select_related(
'park', 'park__location', 'park_area', 'manufacturer', 'designer', 'ride_model'
))
for ride in rides:
# Extract opening year from opening_date
if ride.opening_date:
ride.opening_year = ride.opening_date.year
else:
ride.opening_year = None
# Build comprehensive search text
search_parts = []
# Basic ride info
if ride.name:
search_parts.append(ride.name)
if ride.description:
search_parts.append(ride.description)
# Park info
if ride.park:
search_parts.append(ride.park.name)
if hasattr(ride.park, 'location') and ride.park.location:
if ride.park.location.city:
search_parts.append(ride.park.location.city)
if ride.park.location.state:
search_parts.append(ride.park.location.state)
if ride.park.location.country:
search_parts.append(ride.park.location.country)
# Park area
if ride.park_area:
search_parts.append(ride.park_area.name)
# Category
if ride.category:
category_choices = [
("", "Select ride type"),
("RC", "Roller Coaster"),
("DR", "Dark Ride"),
("FR", "Flat Ride"),
("WR", "Water Ride"),
("TR", "Transport"),
("OT", "Other"),
]
category_display = dict(category_choices).get(ride.category, '')
if category_display:
search_parts.append(category_display)
# Status
if ride.status:
status_choices = [
("", "Select status"),
("OPERATING", "Operating"),
("CLOSED_TEMP", "Temporarily Closed"),
("SBNO", "Standing But Not Operating"),
("CLOSING", "Closing"),
("CLOSED_PERM", "Permanently Closed"),
("UNDER_CONSTRUCTION", "Under Construction"),
("DEMOLISHED", "Demolished"),
("RELOCATED", "Relocated"),
]
status_display = dict(status_choices).get(ride.status, '')
if status_display:
search_parts.append(status_display)
# Companies
if ride.manufacturer:
search_parts.append(ride.manufacturer.name)
if ride.designer:
search_parts.append(ride.designer.name)
# Ride model
if ride.ride_model:
search_parts.append(ride.ride_model.name)
if ride.ride_model.manufacturer:
search_parts.append(ride.ride_model.manufacturer.name)
ride.search_text = ' '.join(filter(None, search_parts)).lower()
# Bulk update all rides
Ride.objects.bulk_update(rides, ['opening_year', 'search_text'], batch_size=1000)
def reverse_populate_computed_fields(apps, schema_editor):
"""Clear computed fields (reverse operation)."""
Ride = apps.get_model('rides', 'Ride')
# Disable pghistory triggers during bulk operations
with pghistory.context(disable=True):
Ride.objects.all().update(opening_year=None, search_text='')
class Migration(migrations.Migration):
dependencies = [
('rides', '0018_add_hybrid_filtering_fields'),
]
operations = [
migrations.RunPython(
populate_computed_fields,
reverse_populate_computed_fields,
elidable=True,
),
]

View File

@@ -0,0 +1,181 @@
"""
Add strategic database indexes for hybrid filtering in rides.
This migration creates optimized indexes for the hybrid filtering system,
enabling sub-second query performance across all filter combinations.
Index Strategy:
- Composite indexes for common filter combinations
- Partial indexes for status-based filtering
- Covering indexes to avoid table lookups
- GIN indexes for full-text search
- Individual indexes for range queries
Performance Target: <100ms for most filter combinations
"""
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('rides', '0019_populate_hybrid_filtering_fields'),
]
operations = [
# Composite index for park + category filtering (very common)
migrations.RunSQL(
"CREATE INDEX rides_ride_park_category_idx ON rides_ride (park_id, category) WHERE category != '';",
reverse_sql="DROP INDEX IF EXISTS rides_ride_park_category_idx;"
),
# Composite index for park + status filtering (common)
migrations.RunSQL(
"CREATE INDEX rides_ride_park_status_idx ON rides_ride (park_id, status);",
reverse_sql="DROP INDEX IF EXISTS rides_ride_park_status_idx;"
),
# Composite index for category + status filtering
migrations.RunSQL(
"CREATE INDEX rides_ride_category_status_idx ON rides_ride (category, status) WHERE category != '';",
reverse_sql="DROP INDEX IF EXISTS rides_ride_category_status_idx;"
),
# Composite index for manufacturer + category
migrations.RunSQL(
"CREATE INDEX rides_ride_manufacturer_category_idx ON rides_ride (manufacturer_id, category) WHERE manufacturer_id IS NOT NULL AND category != '';",
reverse_sql="DROP INDEX IF EXISTS rides_ride_manufacturer_category_idx;"
),
# Composite index for opening year + category (for timeline filtering)
migrations.RunSQL(
"CREATE INDEX rides_ride_opening_year_category_idx ON rides_ride (opening_year, category) WHERE opening_year IS NOT NULL AND category != '';",
reverse_sql="DROP INDEX IF EXISTS rides_ride_opening_year_category_idx;"
),
# Partial index for operating rides only (most common filter)
migrations.RunSQL(
"CREATE INDEX rides_ride_operating_only_idx ON rides_ride (park_id, category, opening_year) WHERE status = 'OPERATING';",
reverse_sql="DROP INDEX IF EXISTS rides_ride_operating_only_idx;"
),
# Partial index for roller coasters only (popular category)
migrations.RunSQL(
"CREATE INDEX rides_ride_roller_coasters_idx ON rides_ride (park_id, status, opening_year) WHERE category = 'RC';",
reverse_sql="DROP INDEX IF EXISTS rides_ride_roller_coasters_idx;"
),
# Covering index for list views (includes commonly displayed fields)
migrations.RunSQL(
"CREATE INDEX rides_ride_list_covering_idx ON rides_ride (park_id, category, status) INCLUDE (name, opening_date, average_rating);",
reverse_sql="DROP INDEX IF EXISTS rides_ride_list_covering_idx;"
),
# GIN index for full-text search on computed search_text field
migrations.RunSQL(
"CREATE INDEX rides_ride_search_text_gin_idx ON rides_ride USING gin(to_tsvector('english', search_text));",
reverse_sql="DROP INDEX IF EXISTS rides_ride_search_text_gin_idx;"
),
# Trigram index for fuzzy text search
migrations.RunSQL(
"CREATE INDEX rides_ride_search_text_trgm_idx ON rides_ride USING gin(search_text gin_trgm_ops);",
reverse_sql="DROP INDEX IF EXISTS rides_ride_search_text_trgm_idx;"
),
# Index for rating-based filtering
migrations.RunSQL(
"CREATE INDEX rides_ride_rating_idx ON rides_ride (average_rating) WHERE average_rating IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS rides_ride_rating_idx;"
),
# Index for capacity-based filtering
migrations.RunSQL(
"CREATE INDEX rides_ride_capacity_idx ON rides_ride (capacity_per_hour) WHERE capacity_per_hour IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS rides_ride_capacity_idx;"
),
# Index for height requirement filtering
migrations.RunSQL(
"CREATE INDEX rides_ride_height_req_idx ON rides_ride (min_height_in, max_height_in) WHERE min_height_in IS NOT NULL OR max_height_in IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS rides_ride_height_req_idx;"
),
# Composite index for ride model filtering
migrations.RunSQL(
"CREATE INDEX rides_ride_model_manufacturer_idx ON rides_ride (ride_model_id, manufacturer_id) WHERE ride_model_id IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS rides_ride_model_manufacturer_idx;"
),
# Index for designer filtering
migrations.RunSQL(
"CREATE INDEX rides_ride_designer_idx ON rides_ride (designer_id, category) WHERE designer_id IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS rides_ride_designer_idx;"
),
# Index for park area filtering
migrations.RunSQL(
"CREATE INDEX rides_ride_park_area_idx ON rides_ride (park_area_id, status) WHERE park_area_id IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS rides_ride_park_area_idx;"
),
# Roller coaster stats indexes for performance
migrations.RunSQL(
"CREATE INDEX rides_rollercoasterstats_height_idx ON rides_rollercoasterstats (height_ft) WHERE height_ft IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS rides_rollercoasterstats_height_idx;"
),
migrations.RunSQL(
"CREATE INDEX rides_rollercoasterstats_speed_idx ON rides_rollercoasterstats (speed_mph) WHERE speed_mph IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS rides_rollercoasterstats_speed_idx;"
),
migrations.RunSQL(
"CREATE INDEX rides_rollercoasterstats_inversions_idx ON rides_rollercoasterstats (inversions);",
reverse_sql="DROP INDEX IF EXISTS rides_rollercoasterstats_inversions_idx;"
),
migrations.RunSQL(
"CREATE INDEX rides_rollercoasterstats_type_material_idx ON rides_rollercoasterstats (roller_coaster_type, track_material);",
reverse_sql="DROP INDEX IF EXISTS rides_rollercoasterstats_type_material_idx;"
),
migrations.RunSQL(
"CREATE INDEX rides_rollercoasterstats_launch_type_idx ON rides_rollercoasterstats (launch_type);",
reverse_sql="DROP INDEX IF EXISTS rides_rollercoasterstats_launch_type_idx;"
),
# Composite index for complex roller coaster filtering
migrations.RunSQL(
"CREATE INDEX rides_rollercoasterstats_complex_idx ON rides_rollercoasterstats (roller_coaster_type, track_material, launch_type) INCLUDE (height_ft, speed_mph, inversions);",
reverse_sql="DROP INDEX IF EXISTS rides_rollercoasterstats_complex_idx;"
),
# Index for ride model filtering and search
migrations.RunSQL(
"CREATE INDEX rides_ridemodel_manufacturer_category_idx ON rides_ridemodel (manufacturer_id, category) WHERE manufacturer_id IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS rides_ridemodel_manufacturer_category_idx;"
),
migrations.RunSQL(
"CREATE INDEX rides_ridemodel_name_trgm_idx ON rides_ridemodel USING gin(name gin_trgm_ops);",
reverse_sql="DROP INDEX IF EXISTS rides_ridemodel_name_trgm_idx;"
),
# Index for company role-based filtering
migrations.RunSQL(
"CREATE INDEX rides_company_manufacturer_role_idx ON rides_company USING gin(roles) WHERE 'MANUFACTURER' = ANY(roles);",
reverse_sql="DROP INDEX IF EXISTS rides_company_manufacturer_role_idx;"
),
migrations.RunSQL(
"CREATE INDEX rides_company_designer_role_idx ON rides_company USING gin(roles) WHERE 'DESIGNER' = ANY(roles);",
reverse_sql="DROP INDEX IF EXISTS rides_company_designer_role_idx;"
),
# Ensure trigram extension is available for fuzzy search
migrations.RunSQL(
"CREATE EXTENSION IF NOT EXISTS pg_trgm;",
reverse_sql="-- Cannot safely drop pg_trgm extension"
),
]