""" Service for calculating ride rankings using the Internet Roller Coaster Poll algorithm. This service implements a pairwise comparison system where each ride is compared to every other ride based on mutual riders (users who have rated both rides). Rankings are determined by winning percentage in these comparisons. """ import logging from typing import Dict, List, Optional from decimal import Decimal from datetime import date from django.db import transaction from django.db.models import Avg, Count, Q from django.utils import timezone from apps.rides.models import ( Ride, RideReview, RideRanking, RidePairComparison, RankingSnapshot, ) logger = logging.getLogger(__name__) class RideRankingService: """ Calculates ride rankings using the Internet Roller Coaster Poll algorithm. Algorithm Overview: 1. For each pair of rides, find users who have rated both 2. Count how many users preferred each ride (higher rating) 3. Calculate wins, losses, and ties for each ride 4. Rank rides by winning percentage (ties count as 0.5 wins) 5. Break ties by head-to-head comparison """ def __init__(self): self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") self.calculation_version = "1.0" def update_all_rankings(self, category: Optional[str] = None) -> Dict[str, any]: """ Main entry point to update all ride rankings. Args: category: Optional ride category to filter ('RC' for roller coasters, etc.) If None, ranks all rides. Returns: Dictionary with statistics about the ranking calculation """ start_time = timezone.now() self.logger.info( f"Starting ranking calculation for category: {category or 'ALL'}" ) try: with transaction.atomic(): # Get rides to rank rides = self._get_eligible_rides(category) if not rides: self.logger.warning("No eligible rides found for ranking") return { "status": "skipped", "message": "No eligible rides found", "duration": (timezone.now() - start_time).total_seconds(), } self.logger.info(f"Found {len(rides)} rides to rank") # Calculate pairwise comparisons comparisons = self._calculate_all_comparisons(rides) # Calculate rankings from comparisons rankings = self._calculate_rankings_from_comparisons(rides, comparisons) # Save rankings self._save_rankings(rankings) # Save snapshots for historical tracking self._save_ranking_snapshots(rankings) # Clean up old data self._cleanup_old_data() duration = (timezone.now() - start_time).total_seconds() self.logger.info( f"Ranking calculation completed in {duration:.2f} seconds" ) return { "status": "success", "rides_ranked": len(rides), "comparisons_made": len(comparisons), "duration": duration, "timestamp": timezone.now(), } except Exception as e: self.logger.error(f"Error updating rankings: {e}", exc_info=True) raise def _get_eligible_rides(self, category: Optional[str] = None) -> List[Ride]: """ Get rides that are eligible for ranking. Only includes rides that: - Are currently operating - Have at least one review/rating """ queryset = ( Ride.objects.filter(status="OPERATING", reviews__is_published=True) .annotate( review_count=Count("reviews", filter=Q(reviews__is_published=True)) ) .filter(review_count__gt=0) ) if category: queryset = queryset.filter(category=category) return list(queryset.distinct()) def _calculate_all_comparisons( self, rides: List[Ride] ) -> Dict[tuple[int, int], RidePairComparison]: """ Calculate pairwise comparisons for all ride pairs. Returns a dictionary keyed by (ride_a_id, ride_b_id) tuples. """ comparisons = {} total_pairs = len(rides) * (len(rides) - 1) // 2 processed = 0 for i, ride_a in enumerate(rides): for ride_b in rides[i + 1:]: comparison = self._calculate_pairwise_comparison(ride_a, ride_b) if comparison: # Store both directions for easy lookup comparisons[(ride_a.id, ride_b.id)] = comparison comparisons[(ride_b.id, ride_a.id)] = comparison processed += 1 if processed % 100 == 0: self.logger.debug( f"Processed {processed}/{total_pairs} comparisons" ) return comparisons def _calculate_pairwise_comparison( self, ride_a: Ride, ride_b: Ride ) -> Optional[RidePairComparison]: """ Calculate the pairwise comparison between two rides. Finds users who have rated both rides and determines which ride they preferred based on their ratings. """ # Get mutual riders (users who have rated both rides) ride_a_reviewers = set( RideReview.objects.filter(ride=ride_a, is_published=True).values_list( "user_id", flat=True ) ) ride_b_reviewers = set( RideReview.objects.filter(ride=ride_b, is_published=True).values_list( "user_id", flat=True ) ) mutual_riders = ride_a_reviewers & ride_b_reviewers if not mutual_riders: # No mutual riders, no comparison possible return None # Get ratings from mutual riders ride_a_ratings = { review.user_id: review.rating for review in RideReview.objects.filter( ride=ride_a, user_id__in=mutual_riders, is_published=True ) } ride_b_ratings = { review.user_id: review.rating for review in RideReview.objects.filter( ride=ride_b, user_id__in=mutual_riders, is_published=True ) } # Count wins and ties ride_a_wins = 0 ride_b_wins = 0 ties = 0 for user_id in mutual_riders: rating_a = ride_a_ratings.get(user_id, 0) rating_b = ride_b_ratings.get(user_id, 0) if rating_a > rating_b: ride_a_wins += 1 elif rating_b > rating_a: ride_b_wins += 1 else: ties += 1 # Calculate average ratings from mutual riders ride_a_avg = ( sum(ride_a_ratings.values()) / len(ride_a_ratings) if ride_a_ratings else 0 ) ride_b_avg = ( sum(ride_b_ratings.values()) / len(ride_b_ratings) if ride_b_ratings else 0 ) # Create or update comparison record comparison, created = RidePairComparison.objects.update_or_create( ride_a=ride_a if ride_a.id < ride_b.id else ride_b, ride_b=ride_b if ride_a.id < ride_b.id else ride_a, defaults={ "ride_a_wins": ride_a_wins if ride_a.id < ride_b.id else ride_b_wins, "ride_b_wins": ride_b_wins if ride_a.id < ride_b.id else ride_a_wins, "ties": ties, "mutual_riders_count": len(mutual_riders), "ride_a_avg_rating": ( Decimal(str(ride_a_avg)) if ride_a.id < ride_b.id else Decimal(str(ride_b_avg)) ), "ride_b_avg_rating": ( Decimal(str(ride_b_avg)) if ride_a.id < ride_b.id else Decimal(str(ride_a_avg)) ), }, ) return comparison def _calculate_rankings_from_comparisons( self, rides: List[Ride], comparisons: Dict[tuple[int, int], RidePairComparison] ) -> List[Dict]: """ Calculate final rankings from pairwise comparisons. Returns a list of dictionaries containing ranking data for each ride. """ rankings = [] for ride in rides: wins = 0 losses = 0 ties = 0 comparison_count = 0 # Count wins, losses, and ties for other_ride in rides: if ride.id == other_ride.id: continue comparison_key = ( min(ride.id, other_ride.id), max(ride.id, other_ride.id), ) comparison = comparisons.get(comparison_key) if not comparison: continue comparison_count += 1 # Determine win/loss/tie for this ride if comparison.ride_a_id == ride.id: if comparison.ride_a_wins > comparison.ride_b_wins: wins += 1 elif comparison.ride_a_wins < comparison.ride_b_wins: losses += 1 else: ties += 1 else: # ride_b_id == ride.id if comparison.ride_b_wins > comparison.ride_a_wins: wins += 1 elif comparison.ride_b_wins < comparison.ride_a_wins: losses += 1 else: ties += 1 # Calculate winning percentage (ties count as 0.5) total_comparisons = wins + losses + ties if total_comparisons > 0: winning_percentage = Decimal( str((wins + 0.5 * ties) / total_comparisons) ) else: winning_percentage = Decimal("0.5") # Get average rating and reviewer count ride_stats = RideReview.objects.filter( ride=ride, is_published=True ).aggregate( avg_rating=Avg("rating"), reviewer_count=Count("user", distinct=True) ) rankings.append( { "ride": ride, "wins": wins, "losses": losses, "ties": ties, "winning_percentage": winning_percentage, "comparison_count": comparison_count, "average_rating": ride_stats["avg_rating"], "mutual_riders_count": ride_stats["reviewer_count"] or 0, } ) # Sort by winning percentage (descending), then by mutual riders count for ties rankings.sort( key=lambda x: ( x["winning_percentage"], x["mutual_riders_count"], x["average_rating"] or 0, ), reverse=True, ) # Handle tie-breaking with head-to-head comparisons rankings = self._apply_tiebreakers(rankings, comparisons) # Assign final ranks for i, ranking_data in enumerate(rankings, 1): ranking_data["rank"] = i return rankings def _apply_tiebreakers( self, rankings: List[Dict], comparisons: Dict[tuple[int, int], RidePairComparison], ) -> List[Dict]: """ Apply head-to-head tiebreaker for rides with identical winning percentages. If two rides have the same winning percentage, the one that beat the other in their head-to-head comparison gets the higher rank. """ i = 0 while i < len(rankings) - 1: # Find rides with same winning percentage tied_group = [rankings[i]] j = i + 1 while ( j < len(rankings) and rankings[j]["winning_percentage"] == rankings[i]["winning_percentage"] ): tied_group.append(rankings[j]) j += 1 if len(tied_group) > 1: # Apply head-to-head tiebreaker within the group tied_group = self._sort_tied_group(tied_group, comparisons) # Replace the tied section with sorted group rankings[i:j] = tied_group i = j return rankings def _sort_tied_group( self, tied_group: List[Dict], comparisons: Dict[tuple[int, int], RidePairComparison], ) -> List[Dict]: """ Sort a group of tied rides using head-to-head comparisons. """ # Create mini-rankings within the tied group for ride_data in tied_group: mini_wins = 0 mini_losses = 0 for other_data in tied_group: if ride_data["ride"].id == other_data["ride"].id: continue comparison_key = ( min(ride_data["ride"].id, other_data["ride"].id), max(ride_data["ride"].id, other_data["ride"].id), ) comparison = comparisons.get(comparison_key) if comparison: if comparison.ride_a_id == ride_data["ride"].id: if comparison.ride_a_wins > comparison.ride_b_wins: mini_wins += 1 elif comparison.ride_a_wins < comparison.ride_b_wins: mini_losses += 1 else: if comparison.ride_b_wins > comparison.ride_a_wins: mini_wins += 1 elif comparison.ride_b_wins < comparison.ride_a_wins: mini_losses += 1 ride_data["tiebreaker_score"] = mini_wins - mini_losses # Sort by tiebreaker score, then by mutual riders count, then by average rating tied_group.sort( key=lambda x: ( x["tiebreaker_score"], x["mutual_riders_count"], x["average_rating"] or 0, ), reverse=True, ) return tied_group def _save_rankings(self, rankings: List[Dict]): """Save calculated rankings to the database.""" for ranking_data in rankings: RideRanking.objects.update_or_create( ride=ranking_data["ride"], defaults={ "rank": ranking_data["rank"], "wins": ranking_data["wins"], "losses": ranking_data["losses"], "ties": ranking_data["ties"], "winning_percentage": ranking_data["winning_percentage"], "mutual_riders_count": ranking_data["mutual_riders_count"], "comparison_count": ranking_data["comparison_count"], "average_rating": ranking_data["average_rating"], "last_calculated": timezone.now(), "calculation_version": self.calculation_version, }, ) def _save_ranking_snapshots(self, rankings: List[Dict]): """Save ranking snapshots for historical tracking.""" today = date.today() for ranking_data in rankings: RankingSnapshot.objects.update_or_create( ride=ranking_data["ride"], snapshot_date=today, defaults={ "rank": ranking_data["rank"], "winning_percentage": ranking_data["winning_percentage"], }, ) def _cleanup_old_data(self, days_to_keep: int = 365): """Clean up old comparison and snapshot data.""" cutoff_date = timezone.now() - timezone.timedelta(days=days_to_keep) # Delete old snapshots deleted_snapshots = RankingSnapshot.objects.filter( snapshot_date__lt=cutoff_date.date() ).delete() if deleted_snapshots[0] > 0: self.logger.info(f"Deleted {deleted_snapshots[0]} old ranking snapshots") def get_ride_ranking_details(self, ride: Ride) -> Optional[Dict]: """ Get detailed ranking information for a specific ride. Returns dictionary with ranking details or None if not ranked. """ try: ranking = RideRanking.objects.get(ride=ride) # Get recent head-to-head comparisons comparisons = ( RidePairComparison.objects.filter(Q(ride_a=ride) | Q(ride_b=ride)) .select_related("ride_a", "ride_b") .order_by("-mutual_riders_count")[:10] ) # Get ranking history history = RankingSnapshot.objects.filter(ride=ride).order_by( "-snapshot_date" )[:30] return { "current_rank": ranking.rank, "winning_percentage": ranking.winning_percentage, "wins": ranking.wins, "losses": ranking.losses, "ties": ranking.ties, "average_rating": ranking.average_rating, "mutual_riders_count": ranking.mutual_riders_count, "last_calculated": ranking.last_calculated, "head_to_head": [ { "opponent": ( comp.ride_b if comp.ride_a_id == ride.id else comp.ride_a ), "result": ( "win" if ( ( comp.ride_a_id == ride.id and comp.ride_a_wins > comp.ride_b_wins ) or ( comp.ride_b_id == ride.id and comp.ride_b_wins > comp.ride_a_wins ) ) else ( "loss" if ( ( comp.ride_a_id == ride.id and comp.ride_a_wins < comp.ride_b_wins ) or ( comp.ride_b_id == ride.id and comp.ride_b_wins < comp.ride_a_wins ) ) else "tie" ) ), "mutual_riders": comp.mutual_riders_count, } for comp in comparisons ], "ranking_history": [ { "date": snapshot.snapshot_date, "rank": snapshot.rank, "winning_percentage": snapshot.winning_percentage, } for snapshot in history ], } except RideRanking.DoesNotExist: return None