Files
thrillwiki_django_no_react/backend/apps/rides/services/search.py

732 lines
27 KiB
Python

"""
Ride Search Service
Provides comprehensive search and filtering capabilities for rides using PostgreSQL's
advanced full-text search features including SearchVector, SearchQuery, SearchRank,
and TrigramSimilarity for fuzzy matching.
This service implements the filtering design specified in:
backend/docs/ride_filtering_design.md
"""
from typing import Any
from django.contrib.postgres.search import (
SearchQuery,
SearchRank,
SearchVector,
TrigramSimilarity,
)
from django.db import models
from django.db.models import F, Q, Value
from django.db.models.functions import Greatest
from apps.parks.models import Park
from apps.rides.models import Ride
from apps.rides.models.company import Company
class RideSearchService:
"""
Advanced search service for rides with PostgreSQL full-text search capabilities.
Features:
- Full-text search with ranking and highlighting
- Fuzzy matching with trigram similarity
- Comprehensive filtering across 8 categories
- Range filtering for numeric fields
- Date range filtering
- Multi-select filtering
- Sorting with multiple options
- Search suggestions and autocomplete
"""
# Search configuration
SEARCH_LANGUAGES = ["english"]
TRIGRAM_SIMILARITY_THRESHOLD = 0.3
SEARCH_RANK_WEIGHTS = [0.1, 0.2, 0.4, 1.0] # D, C, B, A weights
# Filter categories from our design
FILTER_CATEGORIES = {
"search_text": ["global_search", "name_search", "description_search"],
"basic_info": ["category", "status", "park", "park_area"],
"dates": ["opening_date_range", "closing_date_range", "status_since_range"],
"height_safety": ["min_height_range", "max_height_range"],
"performance": ["capacity_range", "duration_range", "rating_range"],
"relationships": ["manufacturer", "designer", "ride_model"],
"roller_coaster": [
"height_ft_range",
"length_ft_range",
"speed_mph_range",
"inversions_range",
"track_material",
"coaster_type",
"propulsion_system",
],
"company": ["manufacturer_roles", "designer_roles", "founded_date_range"],
}
# Sorting options
SORT_OPTIONS = {
"relevance": "search_rank",
"name_asc": "name",
"name_desc": "-name",
"opening_date_asc": "opening_date",
"opening_date_desc": "-opening_date",
"rating_asc": "average_rating",
"rating_desc": "-average_rating",
"height_asc": "rollercoasterstats__height_ft",
"height_desc": "-rollercoasterstats__height_ft",
"speed_asc": "rollercoasterstats__speed_mph",
"speed_desc": "-rollercoasterstats__speed_mph",
"capacity_asc": "capacity_per_hour",
"capacity_desc": "-capacity_per_hour",
"created_asc": "created_at",
"created_desc": "-created_at",
}
def __init__(self):
"""Initialize the search service."""
self.base_queryset = self._get_base_queryset()
def _get_base_queryset(self):
"""
Get the base queryset with all necessary relationships pre-loaded
for optimal performance.
"""
return Ride.objects.select_related(
"park",
"park_area",
"manufacturer",
"designer",
"ride_model",
"rollercoasterstats",
).prefetch_related("manufacturer__roles", "designer__roles")
def search_and_filter(
self,
filters: dict[str, Any],
sort_by: str = "relevance",
page: int = 1,
page_size: int = 20,
) -> dict[str, Any]:
"""
Main search and filter method that combines all capabilities.
Args:
filters: Dictionary of filter parameters
sort_by: Sorting option key
page: Page number for pagination
page_size: Number of results per page
Returns:
Dictionary containing results, pagination info, and metadata
"""
queryset = self.base_queryset
search_metadata = {}
# Apply text search with ranking
if filters.get("global_search"):
queryset, search_rank = self._apply_full_text_search(
queryset, filters["global_search"]
)
search_metadata["search_applied"] = True
search_metadata["search_term"] = filters["global_search"]
else:
search_rank = Value(0)
# Apply all filter categories
queryset = self._apply_basic_info_filters(queryset, filters)
queryset = self._apply_date_filters(queryset, filters)
queryset = self._apply_height_safety_filters(queryset, filters)
queryset = self._apply_performance_filters(queryset, filters)
queryset = self._apply_relationship_filters(queryset, filters)
queryset = self._apply_roller_coaster_filters(queryset, filters)
queryset = self._apply_company_filters(queryset, filters)
# Add search rank to queryset for sorting
queryset = queryset.annotate(search_rank=search_rank)
# Apply sorting
queryset = self._apply_sorting(queryset, sort_by)
# Get total count before pagination
total_count = queryset.count()
# Apply pagination
start_idx = (page - 1) * page_size
end_idx = start_idx + page_size
results = list(queryset[start_idx:end_idx])
# Generate search highlights if search was applied
if filters.get("global_search"):
results = self._add_search_highlights(results, filters["global_search"])
return {
"results": results,
"pagination": {
"page": page,
"page_size": page_size,
"total_count": total_count,
"total_pages": (total_count + page_size - 1) // page_size,
"has_next": end_idx < total_count,
"has_previous": page > 1,
},
"metadata": search_metadata,
"applied_filters": self._get_applied_filters_summary(filters),
}
def _apply_full_text_search(
self, queryset, search_term: str
) -> tuple[models.QuerySet, models.Expression]:
"""
Apply PostgreSQL full-text search with ranking and fuzzy matching.
"""
if not search_term or not search_term.strip():
return queryset, Value(0)
search_term = search_term.strip()
# Create search vector combining multiple fields with different weights
search_vector = (
SearchVector("name", weight="A")
+ SearchVector("description", weight="B")
+ SearchVector("park__name", weight="C")
+ SearchVector("manufacturer__name", weight="C")
+ SearchVector("designer__name", weight="C")
+ SearchVector("ride_model__name", weight="D")
)
# Create search query - try different query types for best results
search_query = SearchQuery(search_term, config="english")
# Calculate search rank
search_rank = SearchRank(
search_vector, search_query, weights=self.SEARCH_RANK_WEIGHTS
)
# Apply trigram similarity for fuzzy matching on name
trigram_similarity = TrigramSimilarity("name", search_term)
# Combine full-text search with trigram similarity
queryset = queryset.annotate(trigram_similarity=trigram_similarity).filter(
Q(search_vector=search_query)
| Q(trigram_similarity__gte=self.TRIGRAM_SIMILARITY_THRESHOLD)
)
# Use the greatest of search rank and trigram similarity for final ranking
final_rank = Greatest(search_rank, F("trigram_similarity"))
return queryset, final_rank
def _apply_basic_info_filters(
self, queryset, filters: dict[str, Any]
) -> models.QuerySet:
"""Apply basic information filters."""
# Category filter (multi-select)
if filters.get("category"):
categories = (
filters["category"]
if isinstance(filters["category"], list)
else [filters["category"]]
)
queryset = queryset.filter(category__in=categories)
# Status filter (multi-select)
if filters.get("status"):
statuses = (
filters["status"]
if isinstance(filters["status"], list)
else [filters["status"]]
)
queryset = queryset.filter(status__in=statuses)
# Park filter (multi-select)
if filters.get("park"):
parks = (
filters["park"]
if isinstance(filters["park"], list)
else [filters["park"]]
)
if isinstance(parks[0], str): # If slugs provided
queryset = queryset.filter(park__slug__in=parks)
else: # If IDs provided
queryset = queryset.filter(park_id__in=parks)
# Park area filter (multi-select)
if filters.get("park_area"):
areas = (
filters["park_area"]
if isinstance(filters["park_area"], list)
else [filters["park_area"]]
)
if isinstance(areas[0], str): # If slugs provided
queryset = queryset.filter(park_area__slug__in=areas)
else: # If IDs provided
queryset = queryset.filter(park_area_id__in=areas)
return queryset
def _apply_date_filters(self, queryset, filters: dict[str, Any]) -> models.QuerySet:
"""Apply date range filters."""
# Opening date range
if filters.get("opening_date_range"):
date_range = filters["opening_date_range"]
if date_range.get("start"):
queryset = queryset.filter(opening_date__gte=date_range["start"])
if date_range.get("end"):
queryset = queryset.filter(opening_date__lte=date_range["end"])
# Closing date range
if filters.get("closing_date_range"):
date_range = filters["closing_date_range"]
if date_range.get("start"):
queryset = queryset.filter(closing_date__gte=date_range["start"])
if date_range.get("end"):
queryset = queryset.filter(closing_date__lte=date_range["end"])
# Status since range
if filters.get("status_since_range"):
date_range = filters["status_since_range"]
if date_range.get("start"):
queryset = queryset.filter(status_since__gte=date_range["start"])
if date_range.get("end"):
queryset = queryset.filter(status_since__lte=date_range["end"])
return queryset
def _apply_height_safety_filters(
self, queryset, filters: dict[str, Any]
) -> models.QuerySet:
"""Apply height and safety requirement filters."""
# Minimum height range
if filters.get("min_height_range"):
height_range = filters["min_height_range"]
if height_range.get("min") is not None:
queryset = queryset.filter(min_height_in__gte=height_range["min"])
if height_range.get("max") is not None:
queryset = queryset.filter(min_height_in__lte=height_range["max"])
# Maximum height range
if filters.get("max_height_range"):
height_range = filters["max_height_range"]
if height_range.get("min") is not None:
queryset = queryset.filter(max_height_in__gte=height_range["min"])
if height_range.get("max") is not None:
queryset = queryset.filter(max_height_in__lte=height_range["max"])
return queryset
def _apply_performance_filters(
self, queryset, filters: dict[str, Any]
) -> models.QuerySet:
"""Apply performance metric filters."""
# Capacity range
if filters.get("capacity_range"):
capacity_range = filters["capacity_range"]
if capacity_range.get("min") is not None:
queryset = queryset.filter(capacity_per_hour__gte=capacity_range["min"])
if capacity_range.get("max") is not None:
queryset = queryset.filter(capacity_per_hour__lte=capacity_range["max"])
# Duration range
if filters.get("duration_range"):
duration_range = filters["duration_range"]
if duration_range.get("min") is not None:
queryset = queryset.filter(
ride_duration_seconds__gte=duration_range["min"]
)
if duration_range.get("max") is not None:
queryset = queryset.filter(
ride_duration_seconds__lte=duration_range["max"]
)
# Rating range
if filters.get("rating_range"):
rating_range = filters["rating_range"]
if rating_range.get("min") is not None:
queryset = queryset.filter(average_rating__gte=rating_range["min"])
if rating_range.get("max") is not None:
queryset = queryset.filter(average_rating__lte=rating_range["max"])
return queryset
def _apply_relationship_filters(
self, queryset, filters: dict[str, Any]
) -> models.QuerySet:
"""Apply relationship filters (manufacturer, designer, ride model)."""
# Manufacturer filter (multi-select)
if filters.get("manufacturer"):
manufacturers = (
filters["manufacturer"]
if isinstance(filters["manufacturer"], list)
else [filters["manufacturer"]]
)
if isinstance(manufacturers[0], str): # If slugs provided
queryset = queryset.filter(manufacturer__slug__in=manufacturers)
else: # If IDs provided
queryset = queryset.filter(manufacturer_id__in=manufacturers)
# Designer filter (multi-select)
if filters.get("designer"):
designers = (
filters["designer"]
if isinstance(filters["designer"], list)
else [filters["designer"]]
)
if isinstance(designers[0], str): # If slugs provided
queryset = queryset.filter(designer__slug__in=designers)
else: # If IDs provided
queryset = queryset.filter(designer_id__in=designers)
# Ride model filter (multi-select)
if filters.get("ride_model"):
models_list = (
filters["ride_model"]
if isinstance(filters["ride_model"], list)
else [filters["ride_model"]]
)
if isinstance(models_list[0], str): # If slugs provided
queryset = queryset.filter(ride_model__slug__in=models_list)
else: # If IDs provided
queryset = queryset.filter(ride_model_id__in=models_list)
return queryset
def _apply_roller_coaster_filters(
self, queryset, filters: dict[str, Any]
) -> models.QuerySet:
"""Apply roller coaster specific filters."""
queryset = self._apply_numeric_range_filter(
queryset, filters, "height_ft_range", "rollercoasterstats__height_ft"
)
queryset = self._apply_numeric_range_filter(
queryset, filters, "length_ft_range", "rollercoasterstats__length_ft"
)
queryset = self._apply_numeric_range_filter(
queryset, filters, "speed_mph_range", "rollercoasterstats__speed_mph"
)
queryset = self._apply_numeric_range_filter(
queryset, filters, "inversions_range", "rollercoasterstats__inversions"
)
# Track material filter (multi-select)
if filters.get("track_material"):
materials = (
filters["track_material"]
if isinstance(filters["track_material"], list)
else [filters["track_material"]]
)
queryset = queryset.filter(rollercoasterstats__track_material__in=materials)
# Coaster type filter (multi-select)
if filters.get("coaster_type"):
types = (
filters["coaster_type"]
if isinstance(filters["coaster_type"], list)
else [filters["coaster_type"]]
)
queryset = queryset.filter(
rollercoasterstats__roller_coaster_type__in=types
)
# Propulsion system filter (multi-select)
if filters.get("propulsion_system"):
propulsion_systems = (
filters["propulsion_system"]
if isinstance(filters["propulsion_system"], list)
else [filters["propulsion_system"]]
)
queryset = queryset.filter(rollercoasterstats__propulsion_system__in=propulsion_systems)
return queryset
def _apply_numeric_range_filter(
self,
queryset,
filters: dict[str, Any],
filter_key: str,
field_name: str,
) -> models.QuerySet:
"""Apply numeric range filter to reduce complexity."""
if filters.get(filter_key):
range_filter = filters[filter_key]
if range_filter.get("min") is not None:
queryset = queryset.filter(
**{f"{field_name}__gte": range_filter["min"]}
)
if range_filter.get("max") is not None:
queryset = queryset.filter(
**{f"{field_name}__lte": range_filter["max"]}
)
return queryset
def _apply_company_filters(
self, queryset, filters: dict[str, Any]
) -> models.QuerySet:
"""Apply company-related filters."""
# Manufacturer roles filter
if filters.get("manufacturer_roles"):
roles = (
filters["manufacturer_roles"]
if isinstance(filters["manufacturer_roles"], list)
else [filters["manufacturer_roles"]]
)
queryset = queryset.filter(manufacturer__roles__overlap=roles)
# Designer roles filter
if filters.get("designer_roles"):
roles = (
filters["designer_roles"]
if isinstance(filters["designer_roles"], list)
else [filters["designer_roles"]]
)
queryset = queryset.filter(designer__roles__overlap=roles)
# Founded date range
if filters.get("founded_date_range"):
date_range = filters["founded_date_range"]
if date_range.get("start"):
queryset = queryset.filter(
Q(manufacturer__founded_date__gte=date_range["start"])
| Q(designer__founded_date__gte=date_range["start"])
)
if date_range.get("end"):
queryset = queryset.filter(
Q(manufacturer__founded_date__lte=date_range["end"])
| Q(designer__founded_date__lte=date_range["end"])
)
return queryset
def _apply_sorting(self, queryset, sort_by: str) -> models.QuerySet:
"""Apply sorting to the queryset."""
if sort_by not in self.SORT_OPTIONS:
sort_by = "relevance"
sort_field = self.SORT_OPTIONS[sort_by]
# Handle special case for relevance sorting
if sort_by == "relevance":
return queryset.order_by("-search_rank", "name")
# Apply the sorting
return queryset.order_by(
sort_field, "name"
) # Always add name as secondary sort
def _add_search_highlights(
self, results: list[Ride], search_term: str
) -> list[Ride]:
"""Add search highlights to results using SearchHeadline."""
if not search_term or not results:
return results
# Create search query for highlighting
SearchQuery(search_term, config="english")
# Add highlights to each result
# (note: highlights would need to be processed at query time)
for ride in results:
# Store highlighted versions as dynamic attributes (for template use)
ride.highlighted_name = ride.name
ride.highlighted_description = ride.description
return results
def _get_applied_filters_summary(self, filters: dict[str, Any]) -> dict[str, Any]:
"""Generate a summary of applied filters for the frontend."""
applied = {}
# Count filters in each category
for category, filter_keys in self.FILTER_CATEGORIES.items():
category_filters = []
for key in filter_keys:
if filters.get(key):
category_filters.append(
{
"key": key,
"value": filters[key],
"display_name": self._get_filter_display_name(key),
}
)
if category_filters:
applied[category] = category_filters
return applied
def _get_filter_display_name(self, filter_key: str) -> str:
"""Convert filter key to human-readable display name."""
display_names = {
"global_search": "Search",
"category": "Category",
"status": "Status",
"park": "Park",
"park_area": "Park Area",
"opening_date_range": "Opening Date",
"closing_date_range": "Closing Date",
"status_since_range": "Status Since",
"min_height_range": "Minimum Height",
"max_height_range": "Maximum Height",
"capacity_range": "Capacity",
"duration_range": "Duration",
"rating_range": "Rating",
"manufacturer": "Manufacturer",
"designer": "Designer",
"ride_model": "Ride Model",
"height_ft_range": "Height (ft)",
"length_ft_range": "Length (ft)",
"speed_mph_range": "Speed (mph)",
"inversions_range": "Inversions",
"track_material": "Track Material",
"coaster_type": "Coaster Type",
"propulsion_system": "Propulsion System",
"manufacturer_roles": "Manufacturer Roles",
"designer_roles": "Designer Roles",
"founded_date_range": "Founded Date",
}
if filter_key in display_names:
return display_names[filter_key]
else:
raise ValueError(f"Unknown filter key: {filter_key}")
def get_search_suggestions(
self, query: str, limit: int = 10
) -> list[dict[str, Any]]:
"""
Get search suggestions for autocomplete functionality.
"""
if not query or len(query) < 2:
return []
suggestions = []
# Ride names with trigram similarity
ride_suggestions = (
Ride.objects.annotate(similarity=TrigramSimilarity("name", query))
.filter(similarity__gte=0.1)
.order_by("-similarity")
.values("name", "slug", "similarity")[: limit // 2]
)
for ride in ride_suggestions:
suggestions.append(
{
"type": "ride",
"text": ride["name"],
"slug": ride["slug"],
"score": ride["similarity"],
}
)
# Park names
park_suggestions = (
Park.objects.annotate(similarity=TrigramSimilarity("name", query))
.filter(similarity__gte=0.1)
.order_by("-similarity")
.values("name", "slug", "similarity")[: limit // 4]
)
for park in park_suggestions:
suggestions.append(
{
"type": "park",
"text": park["name"],
"slug": park["slug"],
"score": park["similarity"],
}
)
# Manufacturer names
manufacturer_suggestions = (
Company.objects.filter(roles__contains=["MANUFACTURER"])
.annotate(similarity=TrigramSimilarity("name", query))
.filter(similarity__gte=0.1)
.order_by("-similarity")
.values("name", "slug", "similarity")[: limit // 4]
)
for manufacturer in manufacturer_suggestions:
suggestions.append(
{
"type": "manufacturer",
"text": manufacturer["name"],
"slug": manufacturer["slug"],
"score": manufacturer["similarity"],
}
)
# Sort by score and return top results
suggestions.sort(key=lambda x: x["score"], reverse=True)
return suggestions[:limit]
def get_filter_options(
self, filter_type: str, context_filters: dict[str, Any] | None = None
) -> list[dict[str, Any]]:
"""
Get available options for a specific filter type.
Optionally filter options based on current context.
"""
context_filters = context_filters or {}
base_queryset = self.base_queryset
# Apply context filters to narrow down options
if context_filters:
temp_filters = context_filters.copy()
temp_filters.pop(
filter_type, None
) # Remove the filter we're getting options for
base_queryset = self._apply_all_filters(base_queryset, temp_filters)
if filter_type == "park":
return list(
base_queryset.values("park__name", "park__slug")
.distinct()
.order_by("park__name")
)
elif filter_type == "manufacturer":
return list(
base_queryset.filter(manufacturer__isnull=False)
.values("manufacturer__name", "manufacturer__slug")
.distinct()
.order_by("manufacturer__name")
)
elif filter_type == "designer":
return list(
base_queryset.filter(designer__isnull=False)
.values("designer__name", "designer__slug")
.distinct()
.order_by("designer__name")
)
# Add more filter options as needed
return []
def _apply_all_filters(self, queryset, filters: dict[str, Any]) -> models.QuerySet:
"""Apply all filters except search ranking."""
queryset = self._apply_basic_info_filters(queryset, filters)
queryset = self._apply_date_filters(queryset, filters)
queryset = self._apply_height_safety_filters(queryset, filters)
queryset = self._apply_performance_filters(queryset, filters)
queryset = self._apply_relationship_filters(queryset, filters)
queryset = self._apply_roller_coaster_filters(queryset, filters)
queryset = self._apply_company_filters(queryset, filters)
return queryset