thrillwiki_django_no_react/backend/apps/parks/services/hybrid_loader.py

"""
Smart Park Loader for Hybrid Filtering Strategy

This module provides intelligent data loading capabilities for the hybrid filtering approach,
optimizing database queries and implementing progressive loading strategies.
"""

from typing import Dict, Optional, Any
from django.db import models
from django.core.cache import cache
from django.conf import settings
from apps.parks.models import Park


class SmartParkLoader:
    """
    Intelligent park data loader that optimizes queries based on filtering requirements.
    Implements progressive loading and smart caching strategies.
    """

    # Cache configuration
    CACHE_TIMEOUT = getattr(settings, 'HYBRID_FILTER_CACHE_TIMEOUT', 300)  # 5 minutes
    CACHE_KEY_PREFIX = 'hybrid_parks'

    # Progressive loading thresholds
    INITIAL_LOAD_SIZE = 50
    PROGRESSIVE_LOAD_SIZE = 25
    MAX_CLIENT_SIDE_RECORDS = 200

    def __init__(self):
        self.base_queryset = self._get_optimized_queryset()

    def _get_optimized_queryset(self) -> models.QuerySet:
        """Get optimized base queryset with all necessary prefetches."""
        return Park.objects.select_related(
            'operator',
            'property_owner',
            'banner_image',
            'card_image',
        ).prefetch_related(
            'location',  # ParkLocation relationship
        ).filter(
            # Only include operating and temporarily closed parks by default
            status__in=['OPERATING', 'CLOSED_TEMP']
        ).order_by('name')

    def get_initial_load(self, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        """
        Get initial park data load with smart filtering decisions.

        Args:
            filters: Optional filters to apply

        Returns:
            Dictionary containing parks data and metadata
        """
        cache_key = self._generate_cache_key('initial', filters)
        cached_result = cache.get(cache_key)

        if cached_result:
            return cached_result

        # Apply filters if provided
        queryset = self.base_queryset
        if filters:
            queryset = self._apply_filters(queryset, filters)

        # Get total count for pagination decisions
        total_count = queryset.count()

        # Determine loading strategy
        if total_count <= self.MAX_CLIENT_SIDE_RECORDS:
            # Load all data for client-side filtering
            parks = list(queryset.all())
            strategy = 'client_side'
            has_more = False
        else:
            # Load initial batch for server-side pagination
            parks = list(queryset[:self.INITIAL_LOAD_SIZE])
            strategy = 'server_side'
            has_more = total_count > self.INITIAL_LOAD_SIZE

        result = {
            'parks': parks,
            'total_count': total_count,
            'strategy': strategy,
            'has_more': has_more,
            'next_offset': len(parks) if has_more else None,
            'filter_metadata': self._get_filter_metadata(queryset),
        }

        # Cache the result
        cache.set(cache_key, result, self.CACHE_TIMEOUT)

        return result

    def get_progressive_load(
        self,
        offset: int,
        filters: Optional[Dict[str, Any]] = None
    ) -> Dict[str, Any]:
        """
        Get next batch of parks for progressive loading.

        Args:
            offset: Starting offset for the batch
            filters: Optional filters to apply

        Returns:
            Dictionary containing parks data and metadata
        """
        cache_key = self._generate_cache_key(f'progressive_{offset}', filters)
        cached_result = cache.get(cache_key)

        if cached_result:
            return cached_result

        # Apply filters if provided
        queryset = self.base_queryset
        if filters:
            queryset = self._apply_filters(queryset, filters)

        # Get the batch
        end_offset = offset + self.PROGRESSIVE_LOAD_SIZE
        parks = list(queryset[offset:end_offset])

        # Check if there are more records
        total_count = queryset.count()
        has_more = end_offset < total_count

        result = {
            'parks': parks,
            'total_count': total_count,
            'has_more': has_more,
            'next_offset': end_offset if has_more else None,
        }

        # Cache the result
        cache.set(cache_key, result, self.CACHE_TIMEOUT)

        return result

    def get_filter_metadata(self, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        """
        Get metadata about available filter options.

        Args:
            filters: Current filters to scope the metadata

        Returns:
            Dictionary containing filter metadata
        """
        cache_key = self._generate_cache_key('metadata', filters)
        cached_result = cache.get(cache_key)

        if cached_result:
            return cached_result

        # Apply filters if provided
        queryset = self.base_queryset
        if filters:
            queryset = self._apply_filters(queryset, filters)

        result = self._get_filter_metadata(queryset)

        # Cache the result
        cache.set(cache_key, result, self.CACHE_TIMEOUT)

        return result

    def _apply_filters(self, queryset: models.QuerySet, filters: Dict[str, Any]) -> models.QuerySet:
        """Apply filters to the queryset."""

        # Status filter
        if 'status' in filters and filters['status']:
            if isinstance(filters['status'], list):
                queryset = queryset.filter(status__in=filters['status'])
            else:
                queryset = queryset.filter(status=filters['status'])

        # Park type filter
        if 'park_type' in filters and filters['park_type']:
            if isinstance(filters['park_type'], list):
                queryset = queryset.filter(park_type__in=filters['park_type'])
            else:
                queryset = queryset.filter(park_type=filters['park_type'])

        # Country filter
        if 'country' in filters and filters['country']:
            queryset = queryset.filter(location__country__in=filters['country'])

        # State filter
        if 'state' in filters and filters['state']:
            queryset = queryset.filter(location__state__in=filters['state'])

        # Opening year range
        if 'opening_year_min' in filters and filters['opening_year_min']:
            queryset = queryset.filter(opening_year__gte=filters['opening_year_min'])

        if 'opening_year_max' in filters and filters['opening_year_max']:
            queryset = queryset.filter(opening_year__lte=filters['opening_year_max'])

        # Size range
        if 'size_min' in filters and filters['size_min']:
            queryset = queryset.filter(size_acres__gte=filters['size_min'])

        if 'size_max' in filters and filters['size_max']:
            queryset = queryset.filter(size_acres__lte=filters['size_max'])

        # Rating range
        if 'rating_min' in filters and filters['rating_min']:
            queryset = queryset.filter(average_rating__gte=filters['rating_min'])

        if 'rating_max' in filters and filters['rating_max']:
            queryset = queryset.filter(average_rating__lte=filters['rating_max'])

        # Ride count range
        if 'ride_count_min' in filters and filters['ride_count_min']:
            queryset = queryset.filter(ride_count__gte=filters['ride_count_min'])

        if 'ride_count_max' in filters and filters['ride_count_max']:
            queryset = queryset.filter(ride_count__lte=filters['ride_count_max'])

        # Coaster count range
        if 'coaster_count_min' in filters and filters['coaster_count_min']:
            queryset = queryset.filter(coaster_count__gte=filters['coaster_count_min'])

        if 'coaster_count_max' in filters and filters['coaster_count_max']:
            queryset = queryset.filter(coaster_count__lte=filters['coaster_count_max'])

        # Operator filter
        if 'operator' in filters and filters['operator']:
            if isinstance(filters['operator'], list):
                queryset = queryset.filter(operator__slug__in=filters['operator'])
            else:
                queryset = queryset.filter(operator__slug=filters['operator'])

        # Search query
        if 'search' in filters and filters['search']:
            search_term = filters['search'].lower()
            queryset = queryset.filter(search_text__icontains=search_term)

        return queryset

    def _get_filter_metadata(self, queryset: models.QuerySet) -> Dict[str, Any]:
        """Generate filter metadata from the current queryset."""

        # Get distinct values for categorical filters with counts
        countries_data = list(
            queryset.values('location__country')
            .exclude(location__country__isnull=True)
            .annotate(count=models.Count('id'))
            .order_by('location__country')
        )

        states_data = list(
            queryset.values('location__state')
            .exclude(location__state__isnull=True)
            .annotate(count=models.Count('id'))
            .order_by('location__state')
        )

        park_types_data = list(
            queryset.values('park_type')
            .exclude(park_type__isnull=True)
            .annotate(count=models.Count('id'))
            .order_by('park_type')
        )

        statuses_data = list(
            queryset.values('status')
            .annotate(count=models.Count('id'))
            .order_by('status')
        )

        operators_data = list(
            queryset.select_related('operator')
            .values('operator__id', 'operator__name', 'operator__slug')
            .exclude(operator__isnull=True)
            .annotate(count=models.Count('id'))
            .order_by('operator__name')
        )

        # Convert to frontend-expected format with value/label/count
        countries = [
            {
                'value': item['location__country'],
                'label': item['location__country'],
                'count': item['count']
            }
            for item in countries_data
        ]

        states = [
            {
                'value': item['location__state'],
                'label': item['location__state'],
                'count': item['count']
            }
            for item in states_data
        ]

        park_types = [
            {
                'value': item['park_type'],
                'label': item['park_type'],
                'count': item['count']
            }
            for item in park_types_data
        ]

        statuses = [
            {
                'value': item['status'],
                'label': self._get_status_label(item['status']),
                'count': item['count']
            }
            for item in statuses_data
        ]

        operators = [
            {
                'value': item['operator__slug'],
                'label': item['operator__name'],
                'count': item['count']
            }
            for item in operators_data
        ]

        # Get ranges for numerical filters
        aggregates = queryset.aggregate(
            opening_year_min=models.Min('opening_year'),
            opening_year_max=models.Max('opening_year'),
            size_min=models.Min('size_acres'),
            size_max=models.Max('size_acres'),
            rating_min=models.Min('average_rating'),
            rating_max=models.Max('average_rating'),
            ride_count_min=models.Min('ride_count'),
            ride_count_max=models.Max('ride_count'),
            coaster_count_min=models.Min('coaster_count'),
            coaster_count_max=models.Max('coaster_count'),
        )

        return {
            'categorical': {
                'countries': countries,
                'states': states,
                'park_types': park_types,
                'statuses': statuses,
                'operators': operators,
            },
            'ranges': {
                'opening_year': {
                    'min': aggregates['opening_year_min'],
                    'max': aggregates['opening_year_max'],
                    'step': 1,
                    'unit': 'year'
                },
                'size_acres': {
                    'min': float(aggregates['size_min']) if aggregates['size_min'] else None,
                    'max': float(aggregates['size_max']) if aggregates['size_max'] else None,
                    'step': 1.0,
                    'unit': 'acres'
                },
                'average_rating': {
                    'min': float(aggregates['rating_min']) if aggregates['rating_min'] else None,
                    'max': float(aggregates['rating_max']) if aggregates['rating_max'] else None,
                    'step': 0.1,
                    'unit': 'stars'
                },
                'ride_count': {
                    'min': aggregates['ride_count_min'],
                    'max': aggregates['ride_count_max'],
                    'step': 1,
                    'unit': 'rides'
                },
                'coaster_count': {
                    'min': aggregates['coaster_count_min'],
                    'max': aggregates['coaster_count_max'],
                    'step': 1,
                    'unit': 'coasters'
                },
            },
            'total_count': queryset.count(),
        }

    def _get_status_label(self, status: str) -> str:
        """Convert status code to human-readable label."""
        status_labels = {
            'OPERATING': 'Operating',
            'CLOSED_TEMP': 'Temporarily Closed',
            'CLOSED_PERM': 'Permanently Closed',
            'UNDER_CONSTRUCTION': 'Under Construction',
        }
        if status in status_labels:
            return status_labels[status]
        else:
            raise ValueError(f"Unknown park status: {status}")

    def _generate_cache_key(self, operation: str, filters: Optional[Dict[str, Any]] = None) -> str:
        """Generate cache key for the given operation and filters."""
        key_parts = [self.CACHE_KEY_PREFIX, operation]

        if filters:
            # Create a consistent string representation of filters
            filter_str = '_'.join(f"{k}:{v}" for k, v in sorted(filters.items()) if v)
            key_parts.append(filter_str)

        return '_'.join(key_parts)

    def invalidate_cache(self, filters: Optional[Dict[str, Any]] = None) -> None:
        """Invalidate cached data for the given filters."""
        # This is a simplified implementation
        # In production, you might want to use cache versioning or tags
        cache_keys = [
            self._generate_cache_key('initial', filters),
            self._generate_cache_key('metadata', filters),
        ]

        # Also invalidate progressive load caches
        for offset in range(0, 1000, self.PROGRESSIVE_LOAD_SIZE):
            cache_keys.append(self._generate_cache_key(f'progressive_{offset}', filters))

        cache.delete_many(cache_keys)


# Singleton instance
smart_park_loader = SmartParkLoader()