Implement hybrid filtering strategy for parks and rides

- Added comprehensive documentation for hybrid filtering implementation, including architecture, API endpoints, performance characteristics, and usage examples.
- Developed a hybrid pagination and client-side filtering recommendation, detailing server-side responsibilities and client-side logic.
- Created a test script for hybrid filtering endpoints, covering various test cases including basic filtering, search functionality, pagination, and edge cases.
This commit is contained in:
pacnpal
2025-09-14 21:07:17 -04:00
parent 0fd6dc2560
commit 35f8d0ef8f
42 changed files with 8490 additions and 224 deletions

View File

@@ -0,0 +1,88 @@
# Generated by Django 5.2.5 on 2025-09-14 19:01
import pgtrigger.compiler
import pgtrigger.migrations
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("parks", "0013_remove_park_insert_insert_remove_park_update_update_and_more"),
]
operations = [
pgtrigger.migrations.RemoveTrigger(
model_name="park",
name="insert_insert",
),
pgtrigger.migrations.RemoveTrigger(
model_name="park",
name="update_update",
),
migrations.AddField(
model_name="park",
name="opening_year",
field=models.IntegerField(
blank=True,
db_index=True,
help_text="Year the park opened (computed from opening_date)",
null=True,
),
),
migrations.AddField(
model_name="park",
name="search_text",
field=models.TextField(
blank=True,
db_index=True,
help_text="Searchable text combining name, description, location, and operator",
),
),
migrations.AddField(
model_name="parkevent",
name="opening_year",
field=models.IntegerField(
blank=True,
help_text="Year the park opened (computed from opening_date)",
null=True,
),
),
migrations.AddField(
model_name="parkevent",
name="search_text",
field=models.TextField(
blank=True,
help_text="Searchable text combining name, description, location, and operator",
),
),
pgtrigger.migrations.AddTrigger(
model_name="park",
trigger=pgtrigger.compiler.Trigger(
name="insert_insert",
sql=pgtrigger.compiler.UpsertTriggerSql(
func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'insert\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
hash="39ac89dc193467b8b41f06ff15903f0a3e22f6b0",
operation="INSERT",
pgid="pgtrigger_insert_insert_66883",
table="parks_park",
when="AFTER",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="park",
trigger=pgtrigger.compiler.Trigger(
name="update_update",
sql=pgtrigger.compiler.UpsertTriggerSql(
condition="WHEN (OLD.* IS DISTINCT FROM NEW.*)",
func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'update\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
hash="af7925b4ef24b42c66b7795b9e0c6c8f510e597c",
operation="UPDATE",
pgid="pgtrigger_update_update_19f56",
table="parks_park",
when="AFTER",
),
),
),
]

View File

@@ -0,0 +1,64 @@
# Generated by Django 5.2.5 on 2025-09-14 19:01
from django.db import migrations
def populate_computed_fields(apps, schema_editor):
"""Populate computed fields for existing parks using raw SQL with disabled triggers"""
# Temporarily disable pghistory triggers
schema_editor.execute("ALTER TABLE parks_park DISABLE TRIGGER ALL;")
try:
# Use raw SQL to update opening_year from opening_date
schema_editor.execute("""
UPDATE parks_park
SET opening_year = EXTRACT(YEAR FROM opening_date)
WHERE opening_date IS NOT NULL;
""")
# Use raw SQL to populate search_text
# This is a simplified version - we'll populate it with just name and description
schema_editor.execute("""
UPDATE parks_park
SET search_text = LOWER(
COALESCE(name, '') || ' ' ||
COALESCE(description, '')
);
""")
# Update search_text to include operator names using a join
schema_editor.execute("""
UPDATE parks_park
SET search_text = LOWER(
COALESCE(parks_park.name, '') || ' ' ||
COALESCE(parks_park.description, '') || ' ' ||
COALESCE(parks_company.name, '')
)
FROM parks_company
WHERE parks_park.operator_id = parks_company.id;
""")
finally:
# Re-enable pghistory triggers
schema_editor.execute("ALTER TABLE parks_park ENABLE TRIGGER ALL;")
def reverse_populate_computed_fields(apps, schema_editor):
"""Clear computed fields (reverse operation)"""
Park = apps.get_model('parks', 'Park')
Park.objects.update(opening_year=None, search_text='')
class Migration(migrations.Migration):
dependencies = [
("parks", "0014_add_hybrid_filtering_fields"),
]
operations = [
migrations.RunPython(
populate_computed_fields,
reverse_populate_computed_fields,
),
]

View File

@@ -0,0 +1,85 @@
# Generated by Django 5.2.5 on 2025-09-14 19:12
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("parks", "0015_populate_hybrid_filtering_fields"),
]
operations = [
# Composite indexes for common filter combinations
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_status_park_type_idx ON parks_park (status, park_type);",
reverse_sql="DROP INDEX IF EXISTS parks_park_status_park_type_idx;"
),
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_opening_year_status_idx ON parks_park (opening_year, status) WHERE opening_year IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS parks_park_opening_year_status_idx;"
),
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_size_rating_idx ON parks_park (size_acres, average_rating) WHERE size_acres IS NOT NULL AND average_rating IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS parks_park_size_rating_idx;"
),
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_ride_coaster_count_idx ON parks_park (ride_count, coaster_count) WHERE ride_count IS NOT NULL AND coaster_count IS NOT NULL;",
reverse_sql="DROP INDEX IF EXISTS parks_park_ride_coaster_count_idx;"
),
# Full-text search index for search_text field
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_search_text_gin_idx ON parks_park USING gin(to_tsvector('english', search_text));",
reverse_sql="DROP INDEX IF EXISTS parks_park_search_text_gin_idx;"
),
# Trigram index for fuzzy search on search_text
migrations.RunSQL(
"CREATE EXTENSION IF NOT EXISTS pg_trgm;",
reverse_sql="-- Cannot drop extension as it might be used elsewhere"
),
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_search_text_trgm_idx ON parks_park USING gin(search_text gin_trgm_ops);",
reverse_sql="DROP INDEX IF EXISTS parks_park_search_text_trgm_idx;"
),
# Indexes for location-based filtering (assuming location relationship exists)
migrations.RunSQL(
"""
CREATE INDEX IF NOT EXISTS parks_parklocation_country_state_idx
ON parks_parklocation (country, state)
WHERE country IS NOT NULL AND state IS NOT NULL;
""",
reverse_sql="DROP INDEX IF EXISTS parks_parklocation_country_state_idx;"
),
# Index for operator-based filtering
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_operator_status_idx ON parks_park (operator_id, status);",
reverse_sql="DROP INDEX IF EXISTS parks_park_operator_status_idx;"
),
# Partial indexes for common status filters
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_operating_parks_idx ON parks_park (name, opening_year) WHERE status IN ('OPERATING', 'CLOSED_TEMP');",
reverse_sql="DROP INDEX IF EXISTS parks_park_operating_parks_idx;"
),
# Index for ordering by name (already exists but ensuring it's optimized)
migrations.RunSQL(
"CREATE INDEX IF NOT EXISTS parks_park_name_lower_idx ON parks_park (LOWER(name));",
reverse_sql="DROP INDEX IF EXISTS parks_park_name_lower_idx;"
),
# Covering index for common query patterns
migrations.RunSQL(
"""
CREATE INDEX IF NOT EXISTS parks_park_hybrid_covering_idx
ON parks_park (status, park_type, opening_year)
INCLUDE (name, slug, size_acres, average_rating, ride_count, coaster_count, operator_id)
WHERE status IN ('OPERATING', 'CLOSED_TEMP');
""",
reverse_sql="DROP INDEX IF EXISTS parks_park_hybrid_covering_idx;"
),
]

View File

@@ -0,0 +1,73 @@
# Generated by Django 5.2.5 on 2025-09-15 00:50
import django.utils.timezone
import pgtrigger.compiler
import pgtrigger.migrations
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("parks", "0016_add_hybrid_filtering_indexes"),
]
operations = [
pgtrigger.migrations.RemoveTrigger(
model_name="park",
name="insert_insert",
),
pgtrigger.migrations.RemoveTrigger(
model_name="park",
name="update_update",
),
migrations.AddField(
model_name="park",
name="timezone",
field=models.CharField(
default=django.utils.timezone.now,
help_text="Timezone identifier for park operations (e.g., 'America/New_York')",
max_length=50,
),
preserve_default=False,
),
migrations.AddField(
model_name="parkevent",
name="timezone",
field=models.CharField(
default=django.utils.timezone.now,
help_text="Timezone identifier for park operations (e.g., 'America/New_York')",
max_length=50,
),
preserve_default=False,
),
pgtrigger.migrations.AddTrigger(
model_name="park",
trigger=pgtrigger.compiler.Trigger(
name="insert_insert",
sql=pgtrigger.compiler.UpsertTriggerSql(
func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "timezone", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'insert\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."timezone", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
hash="9da686bd8a1881fe7a3fdfebc14411680fe47527",
operation="INSERT",
pgid="pgtrigger_insert_insert_66883",
table="parks_park",
when="AFTER",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="park",
trigger=pgtrigger.compiler.Trigger(
name="update_update",
sql=pgtrigger.compiler.UpsertTriggerSql(
condition="WHEN (OLD.* IS DISTINCT FROM NEW.*)",
func='INSERT INTO "parks_parkevent" ("average_rating", "banner_image_id", "card_image_id", "closing_date", "coaster_count", "created_at", "description", "id", "name", "opening_date", "opening_year", "operating_season", "operator_id", "park_type", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "property_owner_id", "ride_count", "search_text", "size_acres", "slug", "status", "timezone", "updated_at", "url", "website") VALUES (NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date", NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date", NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type", _pgh_attach_context(), NOW(), \'update\', NEW."id", NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres", NEW."slug", NEW."status", NEW."timezone", NEW."updated_at", NEW."url", NEW."website"); RETURN NULL;',
hash="787e3176b96b506020f056ee1122d90d25e4cb0d",
operation="UPDATE",
pgid="pgtrigger_update_update_19f56",
table="parks_park",
when="AFTER",
),
),
),
]

View File

@@ -0,0 +1,12 @@
# Generated by Django 5.2.5 on 2025-09-15 01:03
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("parks", "0017_add_timezone_to_pghistory_triggers"),
]
operations = []

View File

@@ -0,0 +1,52 @@
# Generated manually to fix pghistory timezone issue
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("parks", "0018_auto_20250914_2103"),
]
operations = [
migrations.RunSQL(
sql="""
-- Drop the existing trigger function
DROP FUNCTION IF EXISTS pgtrigger_insert_insert_66883() CASCADE;
-- Recreate the trigger function with timezone field
CREATE OR REPLACE FUNCTION pgtrigger_insert_insert_66883()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO "parks_parkevent" (
"average_rating", "banner_image_id", "card_image_id", "closing_date",
"coaster_count", "created_at", "description", "id", "name", "opening_date",
"opening_year", "operating_season", "operator_id", "park_type",
"pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id",
"property_owner_id", "ride_count", "search_text", "size_acres",
"slug", "status", "updated_at", "url", "website", "timezone"
) VALUES (
NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date",
NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date",
NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type",
_pgh_attach_context(), NOW(), 'insert', NEW."id",
NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres",
NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website", NEW."timezone"
);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Recreate the trigger
CREATE TRIGGER pgtrigger_insert_insert_66883
AFTER INSERT ON parks_park
FOR EACH ROW
EXECUTE FUNCTION pgtrigger_insert_insert_66883();
""",
reverse_sql="""
-- This is irreversible, but we can drop and recreate without timezone
DROP FUNCTION IF EXISTS pgtrigger_insert_insert_66883() CASCADE;
"""
),
]

View File

@@ -0,0 +1,52 @@
# Generated manually to fix pghistory UPDATE timezone issue
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("parks", "0019_fix_pghistory_timezone"),
]
operations = [
migrations.RunSQL(
sql="""
-- Drop the existing UPDATE trigger function
DROP FUNCTION IF EXISTS pgtrigger_update_update_19f56() CASCADE;
-- Recreate the UPDATE trigger function with timezone field
CREATE OR REPLACE FUNCTION pgtrigger_update_update_19f56()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO "parks_parkevent" (
"average_rating", "banner_image_id", "card_image_id", "closing_date",
"coaster_count", "created_at", "description", "id", "name", "opening_date",
"opening_year", "operating_season", "operator_id", "park_type",
"pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id",
"property_owner_id", "ride_count", "search_text", "size_acres",
"slug", "status", "updated_at", "url", "website", "timezone"
) VALUES (
NEW."average_rating", NEW."banner_image_id", NEW."card_image_id", NEW."closing_date",
NEW."coaster_count", NEW."created_at", NEW."description", NEW."id", NEW."name", NEW."opening_date",
NEW."opening_year", NEW."operating_season", NEW."operator_id", NEW."park_type",
_pgh_attach_context(), NOW(), 'update', NEW."id",
NEW."property_owner_id", NEW."ride_count", NEW."search_text", NEW."size_acres",
NEW."slug", NEW."status", NEW."updated_at", NEW."url", NEW."website", NEW."timezone"
);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Recreate the UPDATE trigger
CREATE TRIGGER pgtrigger_update_update_19f56
AFTER UPDATE ON parks_park
FOR EACH ROW
EXECUTE FUNCTION pgtrigger_update_update_19f56();
""",
reverse_sql="""
-- This is irreversible, but we can drop and recreate without timezone
DROP FUNCTION IF EXISTS pgtrigger_update_update_19f56() CASCADE;
"""
),
]