""" Ride Search Service Provides comprehensive search and filtering capabilities for rides using PostgreSQL's advanced full-text search features including SearchVector, SearchQuery, SearchRank, and TrigramSimilarity for fuzzy matching. This service implements the filtering design specified in: backend/docs/ride_filtering_design.md """ from django.contrib.postgres.search import ( SearchVector, SearchQuery, SearchRank, TrigramSimilarity, ) from django.db import models from django.db.models import Q, F, Value from django.db.models.functions import Greatest from typing import Dict, List, Optional, Any from apps.rides.models import Ride from apps.parks.models import Park from apps.rides.models.company import Company class RideSearchService: """ Advanced search service for rides with PostgreSQL full-text search capabilities. Features: - Full-text search with ranking and highlighting - Fuzzy matching with trigram similarity - Comprehensive filtering across 8 categories - Range filtering for numeric fields - Date range filtering - Multi-select filtering - Sorting with multiple options - Search suggestions and autocomplete """ # Search configuration SEARCH_LANGUAGES = ["english"] TRIGRAM_SIMILARITY_THRESHOLD = 0.3 SEARCH_RANK_WEIGHTS = [0.1, 0.2, 0.4, 1.0] # D, C, B, A weights # Filter categories from our design FILTER_CATEGORIES = { "search_text": ["global_search", "name_search", "description_search"], "basic_info": ["category", "status", "park", "park_area"], "dates": ["opening_date_range", "closing_date_range", "status_since_range"], "height_safety": ["min_height_range", "max_height_range"], "performance": ["capacity_range", "duration_range", "rating_range"], "relationships": ["manufacturer", "designer", "ride_model"], "roller_coaster": [ "height_ft_range", "length_ft_range", "speed_mph_range", "inversions_range", "track_material", "coaster_type", "launch_type", ], "company": ["manufacturer_roles", "designer_roles", "founded_date_range"], } # Sorting options SORT_OPTIONS = { "relevance": "search_rank", "name_asc": "name", "name_desc": "-name", "opening_date_asc": "opening_date", "opening_date_desc": "-opening_date", "rating_asc": "average_rating", "rating_desc": "-average_rating", "height_asc": "rollercoasterstats__height_ft", "height_desc": "-rollercoasterstats__height_ft", "speed_asc": "rollercoasterstats__speed_mph", "speed_desc": "-rollercoasterstats__speed_mph", "capacity_asc": "capacity_per_hour", "capacity_desc": "-capacity_per_hour", "created_asc": "created_at", "created_desc": "-created_at", } def __init__(self): """Initialize the search service.""" self.base_queryset = self._get_base_queryset() def _get_base_queryset(self): """ Get the base queryset with all necessary relationships pre-loaded for optimal performance. """ return Ride.objects.select_related( "park", "park_area", "manufacturer", "designer", "ride_model", "rollercoasterstats", ).prefetch_related("manufacturer__roles", "designer__roles") def search_and_filter( self, filters: Dict[str, Any], sort_by: str = "relevance", page: int = 1, page_size: int = 20, ) -> Dict[str, Any]: """ Main search and filter method that combines all capabilities. Args: filters: Dictionary of filter parameters sort_by: Sorting option key page: Page number for pagination page_size: Number of results per page Returns: Dictionary containing results, pagination info, and metadata """ queryset = self.base_queryset search_metadata = {} # Apply text search with ranking if filters.get("global_search"): queryset, search_rank = self._apply_full_text_search( queryset, filters["global_search"] ) search_metadata["search_applied"] = True search_metadata["search_term"] = filters["global_search"] else: search_rank = Value(0) # Apply all filter categories queryset = self._apply_basic_info_filters(queryset, filters) queryset = self._apply_date_filters(queryset, filters) queryset = self._apply_height_safety_filters(queryset, filters) queryset = self._apply_performance_filters(queryset, filters) queryset = self._apply_relationship_filters(queryset, filters) queryset = self._apply_roller_coaster_filters(queryset, filters) queryset = self._apply_company_filters(queryset, filters) # Add search rank to queryset for sorting queryset = queryset.annotate(search_rank=search_rank) # Apply sorting queryset = self._apply_sorting(queryset, sort_by) # Get total count before pagination total_count = queryset.count() # Apply pagination start_idx = (page - 1) * page_size end_idx = start_idx + page_size results = list(queryset[start_idx:end_idx]) # Generate search highlights if search was applied if filters.get("global_search"): results = self._add_search_highlights(results, filters["global_search"]) return { "results": results, "pagination": { "page": page, "page_size": page_size, "total_count": total_count, "total_pages": (total_count + page_size - 1) // page_size, "has_next": end_idx < total_count, "has_previous": page > 1, }, "metadata": search_metadata, "applied_filters": self._get_applied_filters_summary(filters), } def _apply_full_text_search( self, queryset, search_term: str ) -> tuple[models.QuerySet, models.Expression]: """ Apply PostgreSQL full-text search with ranking and fuzzy matching. """ if not search_term or not search_term.strip(): return queryset, Value(0) search_term = search_term.strip() # Create search vector combining multiple fields with different weights search_vector = ( SearchVector("name", weight="A") + SearchVector("description", weight="B") + SearchVector("park__name", weight="C") + SearchVector("manufacturer__name", weight="C") + SearchVector("designer__name", weight="C") + SearchVector("ride_model__name", weight="D") ) # Create search query - try different query types for best results search_query = SearchQuery(search_term, config="english") # Calculate search rank search_rank = SearchRank( search_vector, search_query, weights=self.SEARCH_RANK_WEIGHTS ) # Apply trigram similarity for fuzzy matching on name trigram_similarity = TrigramSimilarity("name", search_term) # Combine full-text search with trigram similarity queryset = queryset.annotate(trigram_similarity=trigram_similarity).filter( Q(search_vector=search_query) | Q(trigram_similarity__gte=self.TRIGRAM_SIMILARITY_THRESHOLD) ) # Use the greatest of search rank and trigram similarity for final ranking final_rank = Greatest(search_rank, F("trigram_similarity")) return queryset, final_rank def _apply_basic_info_filters( self, queryset, filters: Dict[str, Any] ) -> models.QuerySet: """Apply basic information filters.""" # Category filter (multi-select) if filters.get("category"): categories = ( filters["category"] if isinstance(filters["category"], list) else [filters["category"]] ) queryset = queryset.filter(category__in=categories) # Status filter (multi-select) if filters.get("status"): statuses = ( filters["status"] if isinstance(filters["status"], list) else [filters["status"]] ) queryset = queryset.filter(status__in=statuses) # Park filter (multi-select) if filters.get("park"): parks = ( filters["park"] if isinstance(filters["park"], list) else [filters["park"]] ) if isinstance(parks[0], str): # If slugs provided queryset = queryset.filter(park__slug__in=parks) else: # If IDs provided queryset = queryset.filter(park_id__in=parks) # Park area filter (multi-select) if filters.get("park_area"): areas = ( filters["park_area"] if isinstance(filters["park_area"], list) else [filters["park_area"]] ) if isinstance(areas[0], str): # If slugs provided queryset = queryset.filter(park_area__slug__in=areas) else: # If IDs provided queryset = queryset.filter(park_area_id__in=areas) return queryset def _apply_date_filters(self, queryset, filters: Dict[str, Any]) -> models.QuerySet: """Apply date range filters.""" # Opening date range if filters.get("opening_date_range"): date_range = filters["opening_date_range"] if date_range.get("start"): queryset = queryset.filter(opening_date__gte=date_range["start"]) if date_range.get("end"): queryset = queryset.filter(opening_date__lte=date_range["end"]) # Closing date range if filters.get("closing_date_range"): date_range = filters["closing_date_range"] if date_range.get("start"): queryset = queryset.filter(closing_date__gte=date_range["start"]) if date_range.get("end"): queryset = queryset.filter(closing_date__lte=date_range["end"]) # Status since range if filters.get("status_since_range"): date_range = filters["status_since_range"] if date_range.get("start"): queryset = queryset.filter(status_since__gte=date_range["start"]) if date_range.get("end"): queryset = queryset.filter(status_since__lte=date_range["end"]) return queryset def _apply_height_safety_filters( self, queryset, filters: Dict[str, Any] ) -> models.QuerySet: """Apply height and safety requirement filters.""" # Minimum height range if filters.get("min_height_range"): height_range = filters["min_height_range"] if height_range.get("min") is not None: queryset = queryset.filter(min_height_in__gte=height_range["min"]) if height_range.get("max") is not None: queryset = queryset.filter(min_height_in__lte=height_range["max"]) # Maximum height range if filters.get("max_height_range"): height_range = filters["max_height_range"] if height_range.get("min") is not None: queryset = queryset.filter(max_height_in__gte=height_range["min"]) if height_range.get("max") is not None: queryset = queryset.filter(max_height_in__lte=height_range["max"]) return queryset def _apply_performance_filters( self, queryset, filters: Dict[str, Any] ) -> models.QuerySet: """Apply performance metric filters.""" # Capacity range if filters.get("capacity_range"): capacity_range = filters["capacity_range"] if capacity_range.get("min") is not None: queryset = queryset.filter(capacity_per_hour__gte=capacity_range["min"]) if capacity_range.get("max") is not None: queryset = queryset.filter(capacity_per_hour__lte=capacity_range["max"]) # Duration range if filters.get("duration_range"): duration_range = filters["duration_range"] if duration_range.get("min") is not None: queryset = queryset.filter( ride_duration_seconds__gte=duration_range["min"] ) if duration_range.get("max") is not None: queryset = queryset.filter( ride_duration_seconds__lte=duration_range["max"] ) # Rating range if filters.get("rating_range"): rating_range = filters["rating_range"] if rating_range.get("min") is not None: queryset = queryset.filter(average_rating__gte=rating_range["min"]) if rating_range.get("max") is not None: queryset = queryset.filter(average_rating__lte=rating_range["max"]) return queryset def _apply_relationship_filters( self, queryset, filters: Dict[str, Any] ) -> models.QuerySet: """Apply relationship filters (manufacturer, designer, ride model).""" # Manufacturer filter (multi-select) if filters.get("manufacturer"): manufacturers = ( filters["manufacturer"] if isinstance(filters["manufacturer"], list) else [filters["manufacturer"]] ) if isinstance(manufacturers[0], str): # If slugs provided queryset = queryset.filter(manufacturer__slug__in=manufacturers) else: # If IDs provided queryset = queryset.filter(manufacturer_id__in=manufacturers) # Designer filter (multi-select) if filters.get("designer"): designers = ( filters["designer"] if isinstance(filters["designer"], list) else [filters["designer"]] ) if isinstance(designers[0], str): # If slugs provided queryset = queryset.filter(designer__slug__in=designers) else: # If IDs provided queryset = queryset.filter(designer_id__in=designers) # Ride model filter (multi-select) if filters.get("ride_model"): models_list = ( filters["ride_model"] if isinstance(filters["ride_model"], list) else [filters["ride_model"]] ) if isinstance(models_list[0], str): # If slugs provided queryset = queryset.filter(ride_model__slug__in=models_list) else: # If IDs provided queryset = queryset.filter(ride_model_id__in=models_list) return queryset def _apply_roller_coaster_filters( self, queryset, filters: Dict[str, Any] ) -> models.QuerySet: """Apply roller coaster specific filters.""" queryset = self._apply_numeric_range_filter( queryset, filters, "height_ft_range", "rollercoasterstats__height_ft" ) queryset = self._apply_numeric_range_filter( queryset, filters, "length_ft_range", "rollercoasterstats__length_ft" ) queryset = self._apply_numeric_range_filter( queryset, filters, "speed_mph_range", "rollercoasterstats__speed_mph" ) queryset = self._apply_numeric_range_filter( queryset, filters, "inversions_range", "rollercoasterstats__inversions" ) # Track material filter (multi-select) if filters.get("track_material"): materials = ( filters["track_material"] if isinstance(filters["track_material"], list) else [filters["track_material"]] ) queryset = queryset.filter(rollercoasterstats__track_material__in=materials) # Coaster type filter (multi-select) if filters.get("coaster_type"): types = ( filters["coaster_type"] if isinstance(filters["coaster_type"], list) else [filters["coaster_type"]] ) queryset = queryset.filter( rollercoasterstats__roller_coaster_type__in=types ) # Launch type filter (multi-select) if filters.get("launch_type"): launch_types = ( filters["launch_type"] if isinstance(filters["launch_type"], list) else [filters["launch_type"]] ) queryset = queryset.filter(rollercoasterstats__launch_type__in=launch_types) return queryset def _apply_numeric_range_filter( self, queryset, filters: Dict[str, Any], filter_key: str, field_name: str, ) -> models.QuerySet: """Apply numeric range filter to reduce complexity.""" if filters.get(filter_key): range_filter = filters[filter_key] if range_filter.get("min") is not None: queryset = queryset.filter( **{f"{field_name}__gte": range_filter["min"]} ) if range_filter.get("max") is not None: queryset = queryset.filter( **{f"{field_name}__lte": range_filter["max"]} ) return queryset def _apply_company_filters( self, queryset, filters: Dict[str, Any] ) -> models.QuerySet: """Apply company-related filters.""" # Manufacturer roles filter if filters.get("manufacturer_roles"): roles = ( filters["manufacturer_roles"] if isinstance(filters["manufacturer_roles"], list) else [filters["manufacturer_roles"]] ) queryset = queryset.filter(manufacturer__roles__overlap=roles) # Designer roles filter if filters.get("designer_roles"): roles = ( filters["designer_roles"] if isinstance(filters["designer_roles"], list) else [filters["designer_roles"]] ) queryset = queryset.filter(designer__roles__overlap=roles) # Founded date range if filters.get("founded_date_range"): date_range = filters["founded_date_range"] if date_range.get("start"): queryset = queryset.filter( Q(manufacturer__founded_date__gte=date_range["start"]) | Q(designer__founded_date__gte=date_range["start"]) ) if date_range.get("end"): queryset = queryset.filter( Q(manufacturer__founded_date__lte=date_range["end"]) | Q(designer__founded_date__lte=date_range["end"]) ) return queryset def _apply_sorting(self, queryset, sort_by: str) -> models.QuerySet: """Apply sorting to the queryset.""" if sort_by not in self.SORT_OPTIONS: sort_by = "relevance" sort_field = self.SORT_OPTIONS[sort_by] # Handle special case for relevance sorting if sort_by == "relevance": return queryset.order_by("-search_rank", "name") # Apply the sorting return queryset.order_by( sort_field, "name" ) # Always add name as secondary sort def _add_search_highlights( self, results: List[Ride], search_term: str ) -> List[Ride]: """Add search highlights to results using SearchHeadline.""" if not search_term or not results: return results # Create search query for highlighting SearchQuery(search_term, config="english") # Add highlights to each result # (note: highlights would need to be processed at query time) for ride in results: # Store highlighted versions as dynamic attributes (for template use) setattr(ride, "highlighted_name", ride.name) setattr(ride, "highlighted_description", ride.description) return results def _get_applied_filters_summary(self, filters: Dict[str, Any]) -> Dict[str, Any]: """Generate a summary of applied filters for the frontend.""" applied = {} # Count filters in each category for category, filter_keys in self.FILTER_CATEGORIES.items(): category_filters = [] for key in filter_keys: if filters.get(key): category_filters.append( { "key": key, "value": filters[key], "display_name": self._get_filter_display_name(key), } ) if category_filters: applied[category] = category_filters return applied def _get_filter_display_name(self, filter_key: str) -> str: """Convert filter key to human-readable display name.""" display_names = { "global_search": "Search", "category": "Category", "status": "Status", "park": "Park", "park_area": "Park Area", "opening_date_range": "Opening Date", "closing_date_range": "Closing Date", "status_since_range": "Status Since", "min_height_range": "Minimum Height", "max_height_range": "Maximum Height", "capacity_range": "Capacity", "duration_range": "Duration", "rating_range": "Rating", "manufacturer": "Manufacturer", "designer": "Designer", "ride_model": "Ride Model", "height_ft_range": "Height (ft)", "length_ft_range": "Length (ft)", "speed_mph_range": "Speed (mph)", "inversions_range": "Inversions", "track_material": "Track Material", "coaster_type": "Coaster Type", "launch_type": "Launch Type", "manufacturer_roles": "Manufacturer Roles", "designer_roles": "Designer Roles", "founded_date_range": "Founded Date", } return display_names.get(filter_key, filter_key.replace("_", " ").title()) def get_search_suggestions( self, query: str, limit: int = 10 ) -> List[Dict[str, Any]]: """ Get search suggestions for autocomplete functionality. """ if not query or len(query) < 2: return [] suggestions = [] # Ride names with trigram similarity ride_suggestions = ( Ride.objects.annotate(similarity=TrigramSimilarity("name", query)) .filter(similarity__gte=0.1) .order_by("-similarity") .values("name", "slug", "similarity")[: limit // 2] ) for ride in ride_suggestions: suggestions.append( { "type": "ride", "text": ride["name"], "slug": ride["slug"], "score": ride["similarity"], } ) # Park names park_suggestions = ( Park.objects.annotate(similarity=TrigramSimilarity("name", query)) .filter(similarity__gte=0.1) .order_by("-similarity") .values("name", "slug", "similarity")[: limit // 4] ) for park in park_suggestions: suggestions.append( { "type": "park", "text": park["name"], "slug": park["slug"], "score": park["similarity"], } ) # Manufacturer names manufacturer_suggestions = ( Company.objects.filter(roles__contains=["MANUFACTURER"]) .annotate(similarity=TrigramSimilarity("name", query)) .filter(similarity__gte=0.1) .order_by("-similarity") .values("name", "slug", "similarity")[: limit // 4] ) for manufacturer in manufacturer_suggestions: suggestions.append( { "type": "manufacturer", "text": manufacturer["name"], "slug": manufacturer["slug"], "score": manufacturer["similarity"], } ) # Sort by score and return top results suggestions.sort(key=lambda x: x["score"], reverse=True) return suggestions[:limit] def get_filter_options( self, filter_type: str, context_filters: Optional[Dict[str, Any]] = None ) -> List[Dict[str, Any]]: """ Get available options for a specific filter type. Optionally filter options based on current context. """ context_filters = context_filters or {} base_queryset = self.base_queryset # Apply context filters to narrow down options if context_filters: temp_filters = context_filters.copy() temp_filters.pop( filter_type, None ) # Remove the filter we're getting options for base_queryset = self._apply_all_filters(base_queryset, temp_filters) if filter_type == "park": return list( base_queryset.values("park__name", "park__slug") .distinct() .order_by("park__name") ) elif filter_type == "manufacturer": return list( base_queryset.filter(manufacturer__isnull=False) .values("manufacturer__name", "manufacturer__slug") .distinct() .order_by("manufacturer__name") ) elif filter_type == "designer": return list( base_queryset.filter(designer__isnull=False) .values("designer__name", "designer__slug") .distinct() .order_by("designer__name") ) # Add more filter options as needed return [] def _apply_all_filters(self, queryset, filters: Dict[str, Any]) -> models.QuerySet: """Apply all filters except search ranking.""" queryset = self._apply_basic_info_filters(queryset, filters) queryset = self._apply_date_filters(queryset, filters) queryset = self._apply_height_safety_filters(queryset, filters) queryset = self._apply_performance_filters(queryset, filters) queryset = self._apply_relationship_filters(queryset, filters) queryset = self._apply_roller_coaster_filters(queryset, filters) queryset = self._apply_company_filters(queryset, filters) return queryset