remove backend

This commit is contained in:
pacnpal
2025-09-21 20:19:12 -04:00
parent 9e724bd795
commit f3c59ad6ff
557 changed files with 1739 additions and 4836 deletions

View File

@@ -0,0 +1,5 @@
"""
Core tasks package for ThrillWiki.
This package contains all Celery tasks for the core application.
"""

607
apps/core/tasks/trending.py Normal file
View File

@@ -0,0 +1,607 @@
"""
Trending calculation tasks for ThrillWiki.
This module contains Celery tasks for calculating and caching trending content.
All tasks run asynchronously to avoid blocking the main application.
"""
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any
from celery import shared_task
from django.utils import timezone
from django.core.cache import cache
from django.contrib.contenttypes.models import ContentType
from django.db.models import Q
from apps.core.analytics import PageView
from apps.parks.models import Park
from apps.rides.models import Ride
logger = logging.getLogger(__name__)
@shared_task(bind=True, max_retries=3, default_retry_delay=60)
def calculate_trending_content(
self, content_type: str = "all", limit: int = 50
) -> Dict[str, Any]:
"""
Calculate trending content using real analytics data.
This task runs periodically to update trending calculations based on:
- View growth rates
- Content ratings
- Recency factors
- Popularity metrics
Args:
content_type: 'parks', 'rides', or 'all'
limit: Maximum number of results to calculate
Returns:
Dict containing trending results and metadata
"""
try:
logger.info(f"Starting trending calculation for {content_type}")
# Time windows for calculations
current_period_hours = 168 # 7 days
previous_period_hours = 336 # 14 days (for previous 7-day window comparison)
trending_items = []
if content_type in ["all", "parks"]:
park_items = _calculate_trending_parks(
current_period_hours,
previous_period_hours,
limit if content_type == "parks" else limit * 2,
)
trending_items.extend(park_items)
if content_type in ["all", "rides"]:
ride_items = _calculate_trending_rides(
current_period_hours,
previous_period_hours,
limit if content_type == "rides" else limit * 2,
)
trending_items.extend(ride_items)
# Sort by trending score and apply limit
trending_items.sort(key=lambda x: x.get("trending_score", 0), reverse=True)
trending_items = trending_items[:limit]
# Format results for API consumption
formatted_results = _format_trending_results(
trending_items, current_period_hours, previous_period_hours
)
# Cache results
cache_key = f"trending:calculated:{content_type}:{limit}"
cache.set(cache_key, formatted_results, 3600) # Cache for 1 hour
logger.info(
f"Calculated {len(formatted_results)} trending items for {content_type}"
)
return {
"success": True,
"content_type": content_type,
"count": len(formatted_results),
"results": formatted_results,
"calculated_at": timezone.now().isoformat(),
}
except Exception as e:
logger.error(f"Error calculating trending content: {e}", exc_info=True)
# Retry the task
raise self.retry(exc=e)
@shared_task(bind=True, max_retries=3, default_retry_delay=30)
def calculate_new_content(
self, content_type: str = "all", days_back: int = 30, limit: int = 50
) -> Dict[str, Any]:
"""
Calculate new content based on opening dates and creation dates.
Args:
content_type: 'parks', 'rides', or 'all'
days_back: How many days to look back for new content
limit: Maximum number of results
Returns:
Dict containing new content results and metadata
"""
try:
logger.info(f"Starting new content calculation for {content_type}")
cutoff_date = timezone.now() - timedelta(days=days_back)
new_items = []
if content_type in ["all", "parks"]:
parks = _get_new_parks(
cutoff_date, limit if content_type == "parks" else limit * 2
)
new_items.extend(parks)
if content_type in ["all", "rides"]:
rides = _get_new_rides(
cutoff_date, limit if content_type == "rides" else limit * 2
)
new_items.extend(rides)
# Sort by date added (most recent first) and apply limit
new_items.sort(key=lambda x: x.get("date_added", ""), reverse=True)
new_items = new_items[:limit]
# Format results for API consumption
formatted_results = _format_new_content_results(new_items)
# Cache results
cache_key = f"new_content:calculated:{content_type}:{days_back}:{limit}"
cache.set(cache_key, formatted_results, 1800) # Cache for 30 minutes
logger.info(f"Calculated {len(formatted_results)} new items for {content_type}")
return {
"success": True,
"content_type": content_type,
"count": len(formatted_results),
"results": formatted_results,
"calculated_at": timezone.now().isoformat(),
}
except Exception as e:
logger.error(f"Error calculating new content: {e}", exc_info=True)
raise self.retry(exc=e)
@shared_task(bind=True)
def warm_trending_cache(self) -> Dict[str, Any]:
"""
Warm the trending cache by pre-calculating common queries.
This task runs periodically to ensure fast API responses.
"""
try:
logger.info("Starting trending cache warming")
# Common query combinations to pre-calculate
queries = [
{"content_type": "all", "limit": 20},
{"content_type": "parks", "limit": 10},
{"content_type": "rides", "limit": 10},
{"content_type": "all", "limit": 50},
]
results = {}
for query in queries:
# Trigger trending calculation
calculate_trending_content.delay(**query)
# Trigger new content calculation
calculate_new_content.delay(**query)
results[f"trending_{query['content_type']}_{query['limit']}"] = "scheduled"
results[f"new_content_{query['content_type']}_{query['limit']}"] = (
"scheduled"
)
logger.info("Trending cache warming completed")
return {
"success": True,
"queries_scheduled": len(queries) * 2,
"results": results,
"warmed_at": timezone.now().isoformat(),
}
except Exception as e:
logger.error(f"Error warming trending cache: {e}", exc_info=True)
return {
"success": False,
"error": str(e),
"warmed_at": timezone.now().isoformat(),
}
def _calculate_trending_parks(
current_period_hours: int, previous_period_hours: int, limit: int
) -> List[Dict[str, Any]]:
"""Calculate trending scores for parks using real data."""
parks = Park.objects.filter(status="OPERATING").select_related(
"location", "operator"
)
trending_parks = []
for park in parks:
try:
score = _calculate_content_score(
park, "park", current_period_hours, previous_period_hours
)
if score > 0: # Only include items with positive trending scores
trending_parks.append(
{
"content_object": park,
"content_type": "park",
"trending_score": score,
"id": park.id,
"name": park.name,
"slug": park.slug,
"location": (
park.formatted_location if hasattr(park, "location") else ""
),
"category": "park",
"rating": (
float(park.average_rating) if park.average_rating else 0.0
),
}
)
except Exception as e:
logger.warning(f"Error calculating score for park {park.id}: {e}")
return trending_parks
def _calculate_trending_rides(
current_period_hours: int, previous_period_hours: int, limit: int
) -> List[Dict[str, Any]]:
"""Calculate trending scores for rides using real data."""
rides = Ride.objects.filter(status="OPERATING").select_related(
"park", "park__location"
)
trending_rides = []
for ride in rides:
try:
score = _calculate_content_score(
ride, "ride", current_period_hours, previous_period_hours
)
if score > 0: # Only include items with positive trending scores
# Get location from park
location = ""
if ride.park and hasattr(ride.park, "location") and ride.park.location:
location = ride.park.formatted_location
trending_rides.append(
{
"content_object": ride,
"content_type": "ride",
"trending_score": score,
"id": ride.pk,
"name": ride.name,
"slug": ride.slug,
"location": location,
"category": "ride",
"rating": (
float(ride.average_rating) if ride.average_rating else 0.0
),
}
)
except Exception as e:
logger.warning(f"Error calculating score for ride {ride.pk}: {e}")
return trending_rides
def _calculate_content_score(
content_obj: Any,
content_type: str,
current_period_hours: int,
previous_period_hours: int,
) -> float:
"""
Calculate weighted trending score for content object using real analytics data.
Algorithm Components:
- View Growth Rate (40% weight): Recent view increase vs historical
- Rating Score (30% weight): Average user rating normalized
- Recency Factor (20% weight): How recently content was added/updated
- Popularity Boost (10% weight): Total view count normalization
Returns:
Float between 0.0 and 1.0 representing trending strength
"""
try:
# Get content type for PageView queries
ct = ContentType.objects.get_for_model(content_obj)
# 1. View Growth Score (40% weight)
view_growth_score = _calculate_view_growth_score(
ct, content_obj.id, current_period_hours, previous_period_hours
)
# 2. Rating Score (30% weight)
rating_score = _calculate_rating_score(content_obj)
# 3. Recency Score (20% weight)
recency_score = _calculate_recency_score(content_obj)
# 4. Popularity Score (10% weight)
popularity_score = _calculate_popularity_score(
ct, content_obj.id, current_period_hours
)
# Calculate weighted final score
final_score = (
view_growth_score * 0.4
+ rating_score * 0.3
+ recency_score * 0.2
+ popularity_score * 0.1
)
logger.debug(
f"{content_type} {content_obj.id}: "
f"growth={view_growth_score:.3f}, rating={rating_score:.3f}, "
f"recency={recency_score:.3f}, popularity={popularity_score:.3f}, "
f"final={final_score:.3f}"
)
return final_score
except Exception as e:
logger.error(
f"Error calculating score for {content_type} {content_obj.id}: {e}"
)
return 0.0
def _calculate_view_growth_score(
content_type: ContentType,
object_id: int,
current_period_hours: int,
previous_period_hours: int,
) -> float:
"""Calculate normalized view growth score using real PageView data."""
try:
current_views, previous_views, growth_percentage = PageView.get_views_growth(
content_type,
object_id,
current_period_hours,
previous_period_hours,
)
if previous_views == 0:
# New content with views gets boost
return min(current_views / 100.0, 1.0) if current_views > 0 else 0.0
# Normalize growth percentage to 0-1 scale
# 100% growth = 0.5, 500% growth = 1.0
normalized_growth = (
min(growth_percentage / 500.0, 1.0) if growth_percentage > 0 else 0.0
)
return max(normalized_growth, 0.0)
except Exception as e:
logger.warning(f"Error calculating view growth: {e}")
return 0.0
def _calculate_rating_score(content_obj: Any) -> float:
"""Calculate normalized rating score."""
try:
rating = getattr(content_obj, "average_rating", None)
if rating is None or rating == 0:
return 0.3 # Neutral score for unrated content
# Normalize rating from 1-10 scale to 0-1 scale
# Rating of 5 = 0.4, Rating of 8 = 0.7, Rating of 10 = 1.0
return min(max((float(rating) - 1) / 9.0, 0.0), 1.0)
except Exception as e:
logger.warning(f"Error calculating rating score: {e}")
return 0.3
def _calculate_recency_score(content_obj: Any) -> float:
"""Calculate recency score based on when content was added/updated."""
try:
# Use opening_date for parks/rides, or created_at as fallback
date_added = getattr(content_obj, "opening_date", None)
if not date_added:
date_added = getattr(content_obj, "created_at", None)
if not date_added:
return 0.5 # Neutral score for unknown dates
# Handle both date and datetime objects
if hasattr(date_added, "date"):
date_added = date_added.date()
# Calculate days since added
today = timezone.now().date()
days_since_added = (today - date_added).days
# Recency score: newer content gets higher scores
# 0 days = 1.0, 30 days = 0.8, 365 days = 0.1, >365 days = 0.0
if days_since_added <= 0:
return 1.0
elif days_since_added <= 30:
return 1.0 - (days_since_added / 30.0) * 0.2 # 1.0 to 0.8
elif days_since_added <= 365:
return 0.8 - ((days_since_added - 30) / (365 - 30)) * 0.7 # 0.8 to 0.1
else:
return 0.0
except Exception as e:
logger.warning(f"Error calculating recency score: {e}")
return 0.5
def _calculate_popularity_score(
content_type: ContentType, object_id: int, hours: int
) -> float:
"""Calculate popularity score based on total view count."""
try:
total_views = PageView.get_total_views_count(
content_type, object_id, hours=hours
)
# Normalize views to 0-1 scale
# 0 views = 0.0, 100 views = 0.5, 1000+ views = 1.0
if total_views == 0:
return 0.0
elif total_views <= 100:
return total_views / 200.0 # 0.0 to 0.5
else:
return min(0.5 + (total_views - 100) / 1800.0, 1.0) # 0.5 to 1.0
except Exception as e:
logger.warning(f"Error calculating popularity score: {e}")
return 0.0
def _get_new_parks(cutoff_date: datetime, limit: int) -> List[Dict[str, Any]]:
"""Get recently added parks using real data."""
new_parks = (
Park.objects.filter(
Q(created_at__gte=cutoff_date) | Q(opening_date__gte=cutoff_date.date()),
status="OPERATING",
)
.select_related("location", "operator")
.order_by("-created_at", "-opening_date")[:limit]
)
results = []
for park in new_parks:
date_added = park.opening_date or park.created_at
if date_added:
if isinstance(date_added, datetime):
date_added = date_added.date()
opening_date = getattr(park, "opening_date", None)
if opening_date and isinstance(opening_date, datetime):
opening_date = opening_date.date()
results.append(
{
"content_object": park,
"content_type": "park",
"id": park.pk,
"name": park.name,
"slug": park.slug,
"park": park.name, # For parks, park field is the park name itself
"category": "park",
"date_added": date_added.isoformat() if date_added else "",
"date_opened": opening_date.isoformat() if opening_date else "",
}
)
return results
def _get_new_rides(cutoff_date: datetime, limit: int) -> List[Dict[str, Any]]:
"""Get recently added rides using real data."""
new_rides = (
Ride.objects.filter(
Q(created_at__gte=cutoff_date) | Q(opening_date__gte=cutoff_date.date()),
status="OPERATING",
)
.select_related("park", "park__location")
.order_by("-created_at", "-opening_date")[:limit]
)
results = []
for ride in new_rides:
date_added = getattr(ride, "opening_date", None) or getattr(
ride, "created_at", None
)
if date_added:
if isinstance(date_added, datetime):
date_added = date_added.date()
opening_date = getattr(ride, "opening_date", None)
if opening_date and isinstance(opening_date, datetime):
opening_date = opening_date.date()
results.append(
{
"content_object": ride,
"content_type": "ride",
"id": ride.pk,
"name": ride.name,
"slug": ride.slug,
"park": ride.park.name if ride.park else "",
"category": "ride",
"date_added": date_added.isoformat() if date_added else "",
"date_opened": opening_date.isoformat() if opening_date else "",
}
)
return results
def _format_trending_results(
trending_items: List[Dict[str, Any]],
current_period_hours: int,
previous_period_hours: int,
) -> List[Dict[str, Any]]:
"""Format trending results for frontend consumption."""
formatted_results = []
for rank, item in enumerate(trending_items, 1):
try:
# Get view change for display
content_obj = item["content_object"]
ct = ContentType.objects.get_for_model(content_obj)
current_views, previous_views, growth_percentage = (
PageView.get_views_growth(
ct,
content_obj.id,
current_period_hours,
previous_period_hours,
)
)
# Format exactly as frontend expects
formatted_item = {
"id": item["id"],
"name": item["name"],
"location": item["location"],
"category": item["category"],
"rating": item["rating"],
"rank": rank,
"views": current_views,
"views_change": (
f"+{growth_percentage:.1f}%"
if growth_percentage > 0
else f"{growth_percentage:.1f}%"
),
"slug": item["slug"],
}
formatted_results.append(formatted_item)
except Exception as e:
logger.warning(f"Error formatting trending item: {e}")
return formatted_results
def _format_new_content_results(
new_items: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""Format new content results for frontend consumption."""
formatted_results = []
for item in new_items:
try:
# Format exactly as frontend expects
formatted_item = {
"id": item["id"],
"name": item["name"],
"park": item["park"],
"category": item["category"],
"date_added": item["date_added"],
"date_opened": item["date_opened"],
"slug": item["slug"],
}
formatted_results.append(formatted_item)
except Exception as e:
logger.warning(f"Error formatting new content item: {e}")
return formatted_results