""" Ride Search Service Provides comprehensive search and filtering capabilities for rides using PostgreSQL's advanced full-text search features including SearchVector, SearchQuery, SearchRank, and TrigramSimilarity for fuzzy matching. This service implements the filtering design specified in: backend/docs/ride_filtering_design.md """ from typing import Any from django.contrib.postgres.search import ( SearchQuery, SearchRank, SearchVector, TrigramSimilarity, ) from django.db import models from django.db.models import F, Q, Value from django.db.models.functions import Greatest from apps.parks.models import Park from apps.rides.models import Ride from apps.rides.models.company import Company class RideSearchService: """ Advanced search service for rides with PostgreSQL full-text search capabilities. Features: - Full-text search with ranking and highlighting - Fuzzy matching with trigram similarity - Comprehensive filtering across 8 categories - Range filtering for numeric fields - Date range filtering - Multi-select filtering - Sorting with multiple options - Search suggestions and autocomplete """ # Search configuration SEARCH_LANGUAGES = ["english"] TRIGRAM_SIMILARITY_THRESHOLD = 0.3 SEARCH_RANK_WEIGHTS = [0.1, 0.2, 0.4, 1.0] # D, C, B, A weights # Filter categories from our design FILTER_CATEGORIES = { "search_text": ["global_search", "name_search", "description_search"], "basic_info": ["category", "status", "park", "park_area"], "dates": ["opening_date_range", "closing_date_range", "status_since_range"], "height_safety": ["min_height_range", "max_height_range"], "performance": ["capacity_range", "duration_range", "rating_range"], "relationships": ["manufacturer", "designer", "ride_model"], "roller_coaster": [ "height_ft_range", "length_ft_range", "speed_mph_range", "inversions_range", "track_material", "coaster_type", "propulsion_system", ], "company": ["manufacturer_roles", "designer_roles", "founded_date_range"], } # Sorting options SORT_OPTIONS = { "relevance": "search_rank", "name_asc": "name", "name_desc": "-name", "opening_date_asc": "opening_date", "opening_date_desc": "-opening_date", "rating_asc": "average_rating", "rating_desc": "-average_rating", "height_asc": "rollercoasterstats__height_ft", "height_desc": "-rollercoasterstats__height_ft", "speed_asc": "rollercoasterstats__speed_mph", "speed_desc": "-rollercoasterstats__speed_mph", "capacity_asc": "capacity_per_hour", "capacity_desc": "-capacity_per_hour", "created_asc": "created_at", "created_desc": "-created_at", } def __init__(self): """Initialize the search service.""" self.base_queryset = self._get_base_queryset() def _get_base_queryset(self): """ Get the base queryset with all necessary relationships pre-loaded for optimal performance. """ return Ride.objects.select_related( "park", "park_area", "manufacturer", "designer", "ride_model", "rollercoasterstats", ).prefetch_related("manufacturer__roles", "designer__roles") def search_and_filter( self, filters: dict[str, Any], sort_by: str = "relevance", page: int = 1, page_size: int = 20, ) -> dict[str, Any]: """ Main search and filter method that combines all capabilities. Args: filters: Dictionary of filter parameters sort_by: Sorting option key page: Page number for pagination page_size: Number of results per page Returns: Dictionary containing results, pagination info, and metadata """ queryset = self.base_queryset search_metadata = {} # Apply text search with ranking if filters.get("global_search"): queryset, search_rank = self._apply_full_text_search( queryset, filters["global_search"] ) search_metadata["search_applied"] = True search_metadata["search_term"] = filters["global_search"] else: search_rank = Value(0) # Apply all filter categories queryset = self._apply_basic_info_filters(queryset, filters) queryset = self._apply_date_filters(queryset, filters) queryset = self._apply_height_safety_filters(queryset, filters) queryset = self._apply_performance_filters(queryset, filters) queryset = self._apply_relationship_filters(queryset, filters) queryset = self._apply_roller_coaster_filters(queryset, filters) queryset = self._apply_company_filters(queryset, filters) # Add search rank to queryset for sorting queryset = queryset.annotate(search_rank=search_rank) # Apply sorting queryset = self._apply_sorting(queryset, sort_by) # Get total count before pagination total_count = queryset.count() # Apply pagination start_idx = (page - 1) * page_size end_idx = start_idx + page_size results = list(queryset[start_idx:end_idx]) # Generate search highlights if search was applied if filters.get("global_search"): results = self._add_search_highlights(results, filters["global_search"]) return { "results": results, "pagination": { "page": page, "page_size": page_size, "total_count": total_count, "total_pages": (total_count + page_size - 1) // page_size, "has_next": end_idx < total_count, "has_previous": page > 1, }, "metadata": search_metadata, "applied_filters": self._get_applied_filters_summary(filters), } def _apply_full_text_search( self, queryset, search_term: str ) -> tuple[models.QuerySet, models.Expression]: """ Apply PostgreSQL full-text search with ranking and fuzzy matching. """ if not search_term or not search_term.strip(): return queryset, Value(0) search_term = search_term.strip() # Create search vector combining multiple fields with different weights search_vector = ( SearchVector("name", weight="A") + SearchVector("description", weight="B") + SearchVector("park__name", weight="C") + SearchVector("manufacturer__name", weight="C") + SearchVector("designer__name", weight="C") + SearchVector("ride_model__name", weight="D") ) # Create search query - try different query types for best results search_query = SearchQuery(search_term, config="english") # Calculate search rank search_rank = SearchRank( search_vector, search_query, weights=self.SEARCH_RANK_WEIGHTS ) # Apply trigram similarity for fuzzy matching on name trigram_similarity = TrigramSimilarity("name", search_term) # Combine full-text search with trigram similarity queryset = queryset.annotate(trigram_similarity=trigram_similarity).filter( Q(search_vector=search_query) | Q(trigram_similarity__gte=self.TRIGRAM_SIMILARITY_THRESHOLD) ) # Use the greatest of search rank and trigram similarity for final ranking final_rank = Greatest(search_rank, F("trigram_similarity")) return queryset, final_rank def _apply_basic_info_filters( self, queryset, filters: dict[str, Any] ) -> models.QuerySet: """Apply basic information filters.""" # Category filter (multi-select) if filters.get("category"): categories = ( filters["category"] if isinstance(filters["category"], list) else [filters["category"]] ) queryset = queryset.filter(category__in=categories) # Status filter (multi-select) if filters.get("status"): statuses = ( filters["status"] if isinstance(filters["status"], list) else [filters["status"]] ) queryset = queryset.filter(status__in=statuses) # Park filter (multi-select) if filters.get("park"): parks = ( filters["park"] if isinstance(filters["park"], list) else [filters["park"]] ) if isinstance(parks[0], str): # If slugs provided queryset = queryset.filter(park__slug__in=parks) else: # If IDs provided queryset = queryset.filter(park_id__in=parks) # Park area filter (multi-select) if filters.get("park_area"): areas = ( filters["park_area"] if isinstance(filters["park_area"], list) else [filters["park_area"]] ) if isinstance(areas[0], str): # If slugs provided queryset = queryset.filter(park_area__slug__in=areas) else: # If IDs provided queryset = queryset.filter(park_area_id__in=areas) return queryset def _apply_date_filters(self, queryset, filters: dict[str, Any]) -> models.QuerySet: """Apply date range filters.""" # Opening date range if filters.get("opening_date_range"): date_range = filters["opening_date_range"] if date_range.get("start"): queryset = queryset.filter(opening_date__gte=date_range["start"]) if date_range.get("end"): queryset = queryset.filter(opening_date__lte=date_range["end"]) # Closing date range if filters.get("closing_date_range"): date_range = filters["closing_date_range"] if date_range.get("start"): queryset = queryset.filter(closing_date__gte=date_range["start"]) if date_range.get("end"): queryset = queryset.filter(closing_date__lte=date_range["end"]) # Status since range if filters.get("status_since_range"): date_range = filters["status_since_range"] if date_range.get("start"): queryset = queryset.filter(status_since__gte=date_range["start"]) if date_range.get("end"): queryset = queryset.filter(status_since__lte=date_range["end"]) return queryset def _apply_height_safety_filters( self, queryset, filters: dict[str, Any] ) -> models.QuerySet: """Apply height and safety requirement filters.""" # Minimum height range if filters.get("min_height_range"): height_range = filters["min_height_range"] if height_range.get("min") is not None: queryset = queryset.filter(min_height_in__gte=height_range["min"]) if height_range.get("max") is not None: queryset = queryset.filter(min_height_in__lte=height_range["max"]) # Maximum height range if filters.get("max_height_range"): height_range = filters["max_height_range"] if height_range.get("min") is not None: queryset = queryset.filter(max_height_in__gte=height_range["min"]) if height_range.get("max") is not None: queryset = queryset.filter(max_height_in__lte=height_range["max"]) return queryset def _apply_performance_filters( self, queryset, filters: dict[str, Any] ) -> models.QuerySet: """Apply performance metric filters.""" # Capacity range if filters.get("capacity_range"): capacity_range = filters["capacity_range"] if capacity_range.get("min") is not None: queryset = queryset.filter(capacity_per_hour__gte=capacity_range["min"]) if capacity_range.get("max") is not None: queryset = queryset.filter(capacity_per_hour__lte=capacity_range["max"]) # Duration range if filters.get("duration_range"): duration_range = filters["duration_range"] if duration_range.get("min") is not None: queryset = queryset.filter( ride_duration_seconds__gte=duration_range["min"] ) if duration_range.get("max") is not None: queryset = queryset.filter( ride_duration_seconds__lte=duration_range["max"] ) # Rating range if filters.get("rating_range"): rating_range = filters["rating_range"] if rating_range.get("min") is not None: queryset = queryset.filter(average_rating__gte=rating_range["min"]) if rating_range.get("max") is not None: queryset = queryset.filter(average_rating__lte=rating_range["max"]) return queryset def _apply_relationship_filters( self, queryset, filters: dict[str, Any] ) -> models.QuerySet: """Apply relationship filters (manufacturer, designer, ride model).""" # Manufacturer filter (multi-select) if filters.get("manufacturer"): manufacturers = ( filters["manufacturer"] if isinstance(filters["manufacturer"], list) else [filters["manufacturer"]] ) if isinstance(manufacturers[0], str): # If slugs provided queryset = queryset.filter(manufacturer__slug__in=manufacturers) else: # If IDs provided queryset = queryset.filter(manufacturer_id__in=manufacturers) # Designer filter (multi-select) if filters.get("designer"): designers = ( filters["designer"] if isinstance(filters["designer"], list) else [filters["designer"]] ) if isinstance(designers[0], str): # If slugs provided queryset = queryset.filter(designer__slug__in=designers) else: # If IDs provided queryset = queryset.filter(designer_id__in=designers) # Ride model filter (multi-select) if filters.get("ride_model"): models_list = ( filters["ride_model"] if isinstance(filters["ride_model"], list) else [filters["ride_model"]] ) if isinstance(models_list[0], str): # If slugs provided queryset = queryset.filter(ride_model__slug__in=models_list) else: # If IDs provided queryset = queryset.filter(ride_model_id__in=models_list) return queryset def _apply_roller_coaster_filters( self, queryset, filters: dict[str, Any] ) -> models.QuerySet: """Apply roller coaster specific filters.""" queryset = self._apply_numeric_range_filter( queryset, filters, "height_ft_range", "rollercoasterstats__height_ft" ) queryset = self._apply_numeric_range_filter( queryset, filters, "length_ft_range", "rollercoasterstats__length_ft" ) queryset = self._apply_numeric_range_filter( queryset, filters, "speed_mph_range", "rollercoasterstats__speed_mph" ) queryset = self._apply_numeric_range_filter( queryset, filters, "inversions_range", "rollercoasterstats__inversions" ) # Track material filter (multi-select) if filters.get("track_material"): materials = ( filters["track_material"] if isinstance(filters["track_material"], list) else [filters["track_material"]] ) queryset = queryset.filter(rollercoasterstats__track_material__in=materials) # Coaster type filter (multi-select) if filters.get("coaster_type"): types = ( filters["coaster_type"] if isinstance(filters["coaster_type"], list) else [filters["coaster_type"]] ) queryset = queryset.filter( rollercoasterstats__roller_coaster_type__in=types ) # Propulsion system filter (multi-select) if filters.get("propulsion_system"): propulsion_systems = ( filters["propulsion_system"] if isinstance(filters["propulsion_system"], list) else [filters["propulsion_system"]] ) queryset = queryset.filter(rollercoasterstats__propulsion_system__in=propulsion_systems) return queryset def _apply_numeric_range_filter( self, queryset, filters: dict[str, Any], filter_key: str, field_name: str, ) -> models.QuerySet: """Apply numeric range filter to reduce complexity.""" if filters.get(filter_key): range_filter = filters[filter_key] if range_filter.get("min") is not None: queryset = queryset.filter( **{f"{field_name}__gte": range_filter["min"]} ) if range_filter.get("max") is not None: queryset = queryset.filter( **{f"{field_name}__lte": range_filter["max"]} ) return queryset def _apply_company_filters( self, queryset, filters: dict[str, Any] ) -> models.QuerySet: """Apply company-related filters.""" # Manufacturer roles filter if filters.get("manufacturer_roles"): roles = ( filters["manufacturer_roles"] if isinstance(filters["manufacturer_roles"], list) else [filters["manufacturer_roles"]] ) queryset = queryset.filter(manufacturer__roles__overlap=roles) # Designer roles filter if filters.get("designer_roles"): roles = ( filters["designer_roles"] if isinstance(filters["designer_roles"], list) else [filters["designer_roles"]] ) queryset = queryset.filter(designer__roles__overlap=roles) # Founded date range if filters.get("founded_date_range"): date_range = filters["founded_date_range"] if date_range.get("start"): queryset = queryset.filter( Q(manufacturer__founded_date__gte=date_range["start"]) | Q(designer__founded_date__gte=date_range["start"]) ) if date_range.get("end"): queryset = queryset.filter( Q(manufacturer__founded_date__lte=date_range["end"]) | Q(designer__founded_date__lte=date_range["end"]) ) return queryset def _apply_sorting(self, queryset, sort_by: str) -> models.QuerySet: """Apply sorting to the queryset.""" if sort_by not in self.SORT_OPTIONS: sort_by = "relevance" sort_field = self.SORT_OPTIONS[sort_by] # Handle special case for relevance sorting if sort_by == "relevance": return queryset.order_by("-search_rank", "name") # Apply the sorting return queryset.order_by( sort_field, "name" ) # Always add name as secondary sort def _add_search_highlights( self, results: list[Ride], search_term: str ) -> list[Ride]: """Add search highlights to results using SearchHeadline.""" if not search_term or not results: return results # Create search query for highlighting SearchQuery(search_term, config="english") # Add highlights to each result # (note: highlights would need to be processed at query time) for ride in results: # Store highlighted versions as dynamic attributes (for template use) ride.highlighted_name = ride.name ride.highlighted_description = ride.description return results def _get_applied_filters_summary(self, filters: dict[str, Any]) -> dict[str, Any]: """Generate a summary of applied filters for the frontend.""" applied = {} # Count filters in each category for category, filter_keys in self.FILTER_CATEGORIES.items(): category_filters = [] for key in filter_keys: if filters.get(key): category_filters.append( { "key": key, "value": filters[key], "display_name": self._get_filter_display_name(key), } ) if category_filters: applied[category] = category_filters return applied def _get_filter_display_name(self, filter_key: str) -> str: """Convert filter key to human-readable display name.""" display_names = { "global_search": "Search", "category": "Category", "status": "Status", "park": "Park", "park_area": "Park Area", "opening_date_range": "Opening Date", "closing_date_range": "Closing Date", "status_since_range": "Status Since", "min_height_range": "Minimum Height", "max_height_range": "Maximum Height", "capacity_range": "Capacity", "duration_range": "Duration", "rating_range": "Rating", "manufacturer": "Manufacturer", "designer": "Designer", "ride_model": "Ride Model", "height_ft_range": "Height (ft)", "length_ft_range": "Length (ft)", "speed_mph_range": "Speed (mph)", "inversions_range": "Inversions", "track_material": "Track Material", "coaster_type": "Coaster Type", "propulsion_system": "Propulsion System", "manufacturer_roles": "Manufacturer Roles", "designer_roles": "Designer Roles", "founded_date_range": "Founded Date", } if filter_key in display_names: return display_names[filter_key] else: raise ValueError(f"Unknown filter key: {filter_key}") def get_search_suggestions( self, query: str, limit: int = 10 ) -> list[dict[str, Any]]: """ Get search suggestions for autocomplete functionality. """ if not query or len(query) < 2: return [] suggestions = [] # Ride names with trigram similarity ride_suggestions = ( Ride.objects.annotate(similarity=TrigramSimilarity("name", query)) .filter(similarity__gte=0.1) .order_by("-similarity") .values("name", "slug", "similarity")[: limit // 2] ) for ride in ride_suggestions: suggestions.append( { "type": "ride", "text": ride["name"], "slug": ride["slug"], "score": ride["similarity"], } ) # Park names park_suggestions = ( Park.objects.annotate(similarity=TrigramSimilarity("name", query)) .filter(similarity__gte=0.1) .order_by("-similarity") .values("name", "slug", "similarity")[: limit // 4] ) for park in park_suggestions: suggestions.append( { "type": "park", "text": park["name"], "slug": park["slug"], "score": park["similarity"], } ) # Manufacturer names manufacturer_suggestions = ( Company.objects.filter(roles__contains=["MANUFACTURER"]) .annotate(similarity=TrigramSimilarity("name", query)) .filter(similarity__gte=0.1) .order_by("-similarity") .values("name", "slug", "similarity")[: limit // 4] ) for manufacturer in manufacturer_suggestions: suggestions.append( { "type": "manufacturer", "text": manufacturer["name"], "slug": manufacturer["slug"], "score": manufacturer["similarity"], } ) # Sort by score and return top results suggestions.sort(key=lambda x: x["score"], reverse=True) return suggestions[:limit] def get_filter_options( self, filter_type: str, context_filters: dict[str, Any] | None = None ) -> list[dict[str, Any]]: """ Get available options for a specific filter type. Optionally filter options based on current context. """ context_filters = context_filters or {} base_queryset = self.base_queryset # Apply context filters to narrow down options if context_filters: temp_filters = context_filters.copy() temp_filters.pop( filter_type, None ) # Remove the filter we're getting options for base_queryset = self._apply_all_filters(base_queryset, temp_filters) if filter_type == "park": return list( base_queryset.values("park__name", "park__slug") .distinct() .order_by("park__name") ) elif filter_type == "manufacturer": return list( base_queryset.filter(manufacturer__isnull=False) .values("manufacturer__name", "manufacturer__slug") .distinct() .order_by("manufacturer__name") ) elif filter_type == "designer": return list( base_queryset.filter(designer__isnull=False) .values("designer__name", "designer__slug") .distinct() .order_by("designer__name") ) # Add more filter options as needed return [] def _apply_all_filters(self, queryset, filters: dict[str, Any]) -> models.QuerySet: """Apply all filters except search ranking.""" queryset = self._apply_basic_info_filters(queryset, filters) queryset = self._apply_date_filters(queryset, filters) queryset = self._apply_height_safety_filters(queryset, filters) queryset = self._apply_performance_filters(queryset, filters) queryset = self._apply_relationship_filters(queryset, filters) queryset = self._apply_roller_coaster_filters(queryset, filters) queryset = self._apply_company_filters(queryset, filters) return queryset