From 915a9fe2dfe77055f36525ccc2c7b1923958efd7 Mon Sep 17 00:00:00 2001 From: "gpt-engineer-app[bot]" <159125892+gpt-engineer-app[bot]@users.noreply.github.com> Date: Tue, 11 Nov 2025 02:21:27 +0000 Subject: [PATCH] Add automated data retention cleanup Implements edge function, Django tasks, and UI hooks/panels for automatic retention of old metrics, anomalies, alerts, and incidents, plus updates to query keys and monitoring dashboard to reflect data-retention workflows. --- django/README_MONITORING.md | 47 +++++ django/apps/monitoring/tasks_retention.py | 168 ++++++++++++++++++ django/config/celery_beat_schedule.py | 19 ++ src/components/admin/DataRetentionPanel.tsx | 161 +++++++++++++++++ src/hooks/admin/useDataRetention.ts | 134 ++++++++++++++ src/lib/queryKeys.ts | 1 + src/pages/admin/MonitoringOverview.tsx | 4 + .../functions/data-retention-cleanup/index.ts | 48 +++++ ...1_71adb7c9-fcca-42c6-a2b7-0e2733c450cf.sql | 7 + 9 files changed, 589 insertions(+) create mode 100644 django/apps/monitoring/tasks_retention.py create mode 100644 src/components/admin/DataRetentionPanel.tsx create mode 100644 src/hooks/admin/useDataRetention.ts create mode 100644 supabase/functions/data-retention-cleanup/index.ts create mode 100644 supabase/migrations/20251111021921_71adb7c9-fcca-42c6-a2b7-0e2733c450cf.sql diff --git a/django/README_MONITORING.md b/django/README_MONITORING.md index a27a2102..b1e85a3d 100644 --- a/django/README_MONITORING.md +++ b/django/README_MONITORING.md @@ -136,6 +136,24 @@ SELECT cron.schedule( ); ``` +### 5. Data Retention Cleanup Setup + +The `data-retention-cleanup` edge function should run daily: + +```sql +SELECT cron.schedule( + 'data-retention-cleanup-daily', + '0 3 * * *', -- Daily at 3:00 AM + $$ + SELECT net.http_post( + url:='https://api.thrillwiki.com/functions/v1/data-retention-cleanup', + headers:='{"Content-Type": "application/json", "Authorization": "Bearer YOUR_ANON_KEY"}'::jsonb, + body:=concat('{"time": "', now(), '"}')::jsonb + ) as request_id; + $$ +); +``` + ## Metrics Collected ### Django Metrics @@ -154,6 +172,35 @@ SELECT cron.schedule( - `submission_approval_rate`: Percentage of approved submissions (workflow) - `avg_moderation_time`: Average time to moderate in minutes (workflow) +## Data Retention Policies + +The system automatically cleans up old data to manage database size: + +### Retention Periods +- **Metrics** (`metric_time_series`): 30 days +- **Anomaly Detections**: 30 days (resolved alerts archived after 7 days) +- **Resolved Alerts**: 90 days +- **Resolved Incidents**: 90 days + +### Cleanup Functions + +The following database functions manage data retention: + +1. **`cleanup_old_metrics(retention_days)`**: Deletes metrics older than specified days (default: 30) +2. **`cleanup_old_anomalies(retention_days)`**: Archives resolved anomalies and deletes old unresolved ones (default: 30) +3. **`cleanup_old_alerts(retention_days)`**: Deletes old resolved alerts (default: 90) +4. **`cleanup_old_incidents(retention_days)`**: Deletes old resolved incidents (default: 90) +5. **`run_data_retention_cleanup()`**: Master function that runs all cleanup operations + +### Automated Cleanup Schedule + +Django Celery tasks run retention cleanup automatically: +- Full cleanup: Daily at 3:00 AM +- Metrics cleanup: Daily at 3:30 AM +- Anomaly cleanup: Daily at 4:00 AM + +View retention statistics in the Admin Dashboard's Data Retention panel. + ## Monitoring View collected metrics in the Admin Monitoring Dashboard: diff --git a/django/apps/monitoring/tasks_retention.py b/django/apps/monitoring/tasks_retention.py new file mode 100644 index 00000000..e7cacb61 --- /dev/null +++ b/django/apps/monitoring/tasks_retention.py @@ -0,0 +1,168 @@ +""" +Celery tasks for data retention and cleanup. +""" +import logging +import requests +import os +from celery import shared_task + +logger = logging.getLogger(__name__) + +SUPABASE_URL = os.environ.get('SUPABASE_URL', 'https://api.thrillwiki.com') +SUPABASE_SERVICE_KEY = os.environ.get('SUPABASE_SERVICE_ROLE_KEY') + + +@shared_task(bind=True, name='monitoring.run_data_retention_cleanup') +def run_data_retention_cleanup(self): + """ + Run comprehensive data retention cleanup. + Cleans up old metrics, anomaly detections, alerts, and incidents. + Runs daily at 3 AM. + """ + logger.info("Starting data retention cleanup") + + if not SUPABASE_SERVICE_KEY: + logger.error("SUPABASE_SERVICE_ROLE_KEY not configured") + return {'success': False, 'error': 'Missing service key'} + + try: + # Call the Supabase RPC function + headers = { + 'apikey': SUPABASE_SERVICE_KEY, + 'Authorization': f'Bearer {SUPABASE_SERVICE_KEY}', + 'Content-Type': 'application/json', + } + + response = requests.post( + f'{SUPABASE_URL}/rest/v1/rpc/run_data_retention_cleanup', + headers=headers, + timeout=60 + ) + + if response.status_code == 200: + result = response.json() + logger.info(f"Data retention cleanup completed: {result}") + return result + else: + logger.error(f"Data retention cleanup failed: {response.status_code} - {response.text}") + return {'success': False, 'error': response.text} + + except Exception as e: + logger.error(f"Error in data retention cleanup: {e}", exc_info=True) + raise + + +@shared_task(bind=True, name='monitoring.cleanup_old_metrics') +def cleanup_old_metrics(self, retention_days: int = 30): + """ + Clean up old metric time series data. + Runs daily to remove metrics older than retention period. + """ + logger.info(f"Cleaning up metrics older than {retention_days} days") + + if not SUPABASE_SERVICE_KEY: + logger.error("SUPABASE_SERVICE_ROLE_KEY not configured") + return {'success': False, 'error': 'Missing service key'} + + try: + headers = { + 'apikey': SUPABASE_SERVICE_KEY, + 'Authorization': f'Bearer {SUPABASE_SERVICE_KEY}', + 'Content-Type': 'application/json', + } + + response = requests.post( + f'{SUPABASE_URL}/rest/v1/rpc/cleanup_old_metrics', + headers=headers, + json={'retention_days': retention_days}, + timeout=30 + ) + + if response.status_code == 200: + deleted_count = response.json() + logger.info(f"Cleaned up {deleted_count} old metrics") + return {'success': True, 'deleted_count': deleted_count} + else: + logger.error(f"Metrics cleanup failed: {response.status_code} - {response.text}") + return {'success': False, 'error': response.text} + + except Exception as e: + logger.error(f"Error in metrics cleanup: {e}", exc_info=True) + raise + + +@shared_task(bind=True, name='monitoring.cleanup_old_anomalies') +def cleanup_old_anomalies(self, retention_days: int = 30): + """ + Clean up old anomaly detections. + Archives resolved anomalies and deletes very old unresolved ones. + """ + logger.info(f"Cleaning up anomalies older than {retention_days} days") + + if not SUPABASE_SERVICE_KEY: + logger.error("SUPABASE_SERVICE_ROLE_KEY not configured") + return {'success': False, 'error': 'Missing service key'} + + try: + headers = { + 'apikey': SUPABASE_SERVICE_KEY, + 'Authorization': f'Bearer {SUPABASE_SERVICE_KEY}', + 'Content-Type': 'application/json', + } + + response = requests.post( + f'{SUPABASE_URL}/rest/v1/rpc/cleanup_old_anomalies', + headers=headers, + json={'retention_days': retention_days}, + timeout=30 + ) + + if response.status_code == 200: + result = response.json() + logger.info(f"Cleaned up anomalies: {result}") + return {'success': True, 'result': result} + else: + logger.error(f"Anomalies cleanup failed: {response.status_code} - {response.text}") + return {'success': False, 'error': response.text} + + except Exception as e: + logger.error(f"Error in anomalies cleanup: {e}", exc_info=True) + raise + + +@shared_task(bind=True, name='monitoring.get_retention_stats') +def get_retention_stats(self): + """ + Get current data retention statistics. + Shows record counts and storage size for monitored tables. + """ + logger.info("Fetching data retention statistics") + + if not SUPABASE_SERVICE_KEY: + logger.error("SUPABASE_SERVICE_ROLE_KEY not configured") + return {'success': False, 'error': 'Missing service key'} + + try: + headers = { + 'apikey': SUPABASE_SERVICE_KEY, + 'Authorization': f'Bearer {SUPABASE_SERVICE_KEY}', + 'Content-Type': 'application/json', + } + + response = requests.get( + f'{SUPABASE_URL}/rest/v1/data_retention_stats', + headers=headers, + timeout=10 + ) + + if response.status_code == 200: + stats = response.json() + logger.info(f"Retrieved retention stats for {len(stats)} tables") + return {'success': True, 'stats': stats} + else: + logger.error(f"Failed to get retention stats: {response.status_code} - {response.text}") + return {'success': False, 'error': response.text} + + except Exception as e: + logger.error(f"Error getting retention stats: {e}", exc_info=True) + raise diff --git a/django/config/celery_beat_schedule.py b/django/config/celery_beat_schedule.py index 70f4fc41..3a7651a4 100644 --- a/django/config/celery_beat_schedule.py +++ b/django/config/celery_beat_schedule.py @@ -33,6 +33,25 @@ CELERY_BEAT_SCHEDULE = { 'options': {'queue': 'monitoring'} }, + # Data retention cleanup tasks + 'run-data-retention-cleanup': { + 'task': 'monitoring.run_data_retention_cleanup', + 'schedule': crontab(hour=3, minute=0), # Daily at 3 AM + 'options': {'queue': 'maintenance'} + }, + + 'cleanup-old-metrics': { + 'task': 'monitoring.cleanup_old_metrics', + 'schedule': crontab(hour=3, minute=30), # Daily at 3:30 AM + 'options': {'queue': 'maintenance'} + }, + + 'cleanup-old-anomalies': { + 'task': 'monitoring.cleanup_old_anomalies', + 'schedule': crontab(hour=4, minute=0), # Daily at 4 AM + 'options': {'queue': 'maintenance'} + }, + # Existing user tasks 'cleanup-expired-tokens': { 'task': 'users.cleanup_expired_tokens', diff --git a/src/components/admin/DataRetentionPanel.tsx b/src/components/admin/DataRetentionPanel.tsx new file mode 100644 index 00000000..1ce8f85e --- /dev/null +++ b/src/components/admin/DataRetentionPanel.tsx @@ -0,0 +1,161 @@ +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; +import { Button } from "@/components/ui/button"; +import { Badge } from "@/components/ui/badge"; +import { Trash2, Database, Clock, HardDrive, TrendingDown } from "lucide-react"; +import { useRetentionStats, useRunCleanup } from "@/hooks/admin/useDataRetention"; +import { formatDistanceToNow } from "date-fns"; + +export function DataRetentionPanel() { + const { data: stats, isLoading } = useRetentionStats(); + const runCleanup = useRunCleanup(); + + if (isLoading) { + return ( + + + Data Retention + Loading retention statistics... + + + ); + } + + const totalRecords = stats?.reduce((sum, s) => sum + s.total_records, 0) || 0; + const totalSize = stats?.reduce((sum, s) => { + const size = s.table_size.replace(/[^0-9.]/g, ''); + return sum + parseFloat(size); + }, 0) || 0; + + return ( + + +
+
+ + + Data Retention Management + + + Automatic cleanup of old metrics and monitoring data + +
+ +
+
+ + {/* Summary Stats */} +
+
+
+ + Total Records +
+
{totalRecords.toLocaleString()}
+
+
+
+ + Total Size +
+
{totalSize.toFixed(1)} MB
+
+
+
+ + Tables Monitored +
+
{stats?.length || 0}
+
+
+ + {/* Retention Policies */} +
+

Retention Policies

+
+
+ Metrics (metric_time_series) + 30 days +
+
+ Anomaly Detections + 30 days +
+
+ Resolved Alerts + 90 days +
+
+ Resolved Incidents + 90 days +
+
+
+ + {/* Table Statistics */} +
+

Storage Details

+
+ {stats?.map((stat) => ( +
+
+ {stat.table_name} + {stat.table_size} +
+
+
+
Total
+
+ {stat.total_records.toLocaleString()} +
+
+
+
Last 7 days
+
+ {stat.last_7_days.toLocaleString()} +
+
+
+
Last 30 days
+
+ {stat.last_30_days.toLocaleString()} +
+
+
+ {stat.oldest_record && ( +
+ + Oldest:{" "} + {formatDistanceToNow(new Date(stat.oldest_record), { + addSuffix: true, + })} +
+ )} +
+ ))} +
+
+ + {/* Cleanup Schedule */} +
+

Automated Cleanup Schedule

+
+
• Full cleanup runs daily at 3:00 AM
+
• Metrics cleanup at 3:30 AM
+
• Anomaly cleanup at 4:00 AM
+
+
+
+
+ ); +} diff --git a/src/hooks/admin/useDataRetention.ts b/src/hooks/admin/useDataRetention.ts new file mode 100644 index 00000000..04d10bf0 --- /dev/null +++ b/src/hooks/admin/useDataRetention.ts @@ -0,0 +1,134 @@ +import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query"; +import { supabase } from "@/integrations/supabase/client"; +import { toast } from "sonner"; + +interface RetentionStats { + table_name: string; + total_records: number; + last_7_days: number; + last_30_days: number; + oldest_record: string; + newest_record: string; + table_size: string; +} + +interface CleanupResult { + success: boolean; + cleanup_results: { + metrics_deleted: number; + anomalies_archived: number; + anomalies_deleted: number; + alerts_deleted: number; + incidents_deleted: number; + }; + timestamp: string; +} + +export function useRetentionStats() { + return useQuery({ + queryKey: ["dataRetentionStats"], + queryFn: async () => { + const { data, error } = await supabase + .from("data_retention_stats") + .select("*"); + + if (error) throw error; + return data as RetentionStats[]; + }, + refetchInterval: 60000, // Refetch every minute + }); +} + +export function useRunCleanup() { + const queryClient = useQueryClient(); + + return useMutation({ + mutationFn: async () => { + const { data, error } = await supabase.functions.invoke( + "data-retention-cleanup" + ); + + if (error) throw error; + return data as CleanupResult; + }, + onSuccess: (data) => { + const results = data.cleanup_results; + const total = + results.metrics_deleted + + results.anomalies_archived + + results.anomalies_deleted + + results.alerts_deleted + + results.incidents_deleted; + + toast.success( + `Cleanup completed: ${total} records removed`, + { + description: `Metrics: ${results.metrics_deleted}, Anomalies: ${results.anomalies_deleted}, Alerts: ${results.alerts_deleted}`, + } + ); + + // Invalidate relevant queries + queryClient.invalidateQueries({ queryKey: ["dataRetentionStats"] }); + queryClient.invalidateQueries({ queryKey: ["anomalyDetections"] }); + queryClient.invalidateQueries({ queryKey: ["systemAlerts"] }); + }, + onError: (error: Error) => { + toast.error("Failed to run cleanup", { + description: error.message, + }); + }, + }); +} + +export function useCleanupMetrics() { + const queryClient = useQueryClient(); + + return useMutation({ + mutationFn: async (retentionDays: number = 30) => { + const { data, error } = await supabase.rpc("cleanup_old_metrics", { + retention_days: retentionDays, + }); + + if (error) throw error; + return data; + }, + onSuccess: (deletedCount) => { + toast.success(`Cleaned up ${deletedCount} old metrics`); + queryClient.invalidateQueries({ queryKey: ["dataRetentionStats"] }); + }, + onError: (error: Error) => { + toast.error("Failed to cleanup metrics", { + description: error.message, + }); + }, + }); +} + +export function useCleanupAnomalies() { + const queryClient = useQueryClient(); + + return useMutation({ + mutationFn: async (retentionDays: number = 30) => { + const { data, error } = await supabase.rpc("cleanup_old_anomalies", { + retention_days: retentionDays, + }); + + if (error) throw error; + return data; + }, + onSuccess: (result) => { + // Result is returned as an array with one element + const cleanupResult = Array.isArray(result) ? result[0] : result; + toast.success( + `Cleaned up anomalies: ${cleanupResult.archived_count} archived, ${cleanupResult.deleted_count} deleted` + ); + queryClient.invalidateQueries({ queryKey: ["dataRetentionStats"] }); + queryClient.invalidateQueries({ queryKey: ["anomalyDetections"] }); + }, + onError: (error: Error) => { + toast.error("Failed to cleanup anomalies", { + description: error.message, + }); + }, + }); +} diff --git a/src/lib/queryKeys.ts b/src/lib/queryKeys.ts index 4a283a38..ee4030a2 100644 --- a/src/lib/queryKeys.ts +++ b/src/lib/queryKeys.ts @@ -96,5 +96,6 @@ export const queryKeys = { incidents: (status?: string) => ['monitoring', 'incidents', status] as const, incidentDetails: (incidentId: string) => ['monitoring', 'incident-details', incidentId] as const, anomalyDetections: () => ['monitoring', 'anomaly-detections'] as const, + dataRetentionStats: () => ['monitoring', 'data-retention-stats'] as const, }, } as const; diff --git a/src/pages/admin/MonitoringOverview.tsx b/src/pages/admin/MonitoringOverview.tsx index df41dfea..1cbe20c8 100644 --- a/src/pages/admin/MonitoringOverview.tsx +++ b/src/pages/admin/MonitoringOverview.tsx @@ -7,6 +7,7 @@ import { GroupedAlertsPanel } from '@/components/admin/GroupedAlertsPanel'; import { CorrelatedAlertsPanel } from '@/components/admin/CorrelatedAlertsPanel'; import { IncidentsPanel } from '@/components/admin/IncidentsPanel'; import { AnomalyDetectionPanel } from '@/components/admin/AnomalyDetectionPanel'; +import { DataRetentionPanel } from '@/components/admin/DataRetentionPanel'; import { MonitoringQuickStats } from '@/components/admin/MonitoringQuickStats'; import { RecentActivityTimeline } from '@/components/admin/RecentActivityTimeline'; import { MonitoringNavCards } from '@/components/admin/MonitoringNavCards'; @@ -150,6 +151,9 @@ export default function MonitoringOverview() { isLoading={anomalies.isLoading} /> + {/* Data Retention Management */} + + {/* Quick Stats Grid */} { + if (req.method === 'OPTIONS') { + return new Response(null, { headers: corsHeaders }); + } + + try { + const supabaseUrl = Deno.env.get('SUPABASE_URL')!; + const supabaseKey = Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')!; + const supabase = createClient(supabaseUrl, supabaseKey); + + console.log('Starting data retention cleanup...'); + + // Call the master cleanup function + const { data, error } = await supabase.rpc('run_data_retention_cleanup'); + + if (error) { + console.error('Error running data retention cleanup:', error); + throw error; + } + + console.log('Data retention cleanup completed:', data); + + return new Response( + JSON.stringify({ + success: true, + cleanup_results: data.cleanup_results, + timestamp: data.timestamp, + }), + { headers: { ...corsHeaders, 'Content-Type': 'application/json' } } + ); + } catch (error) { + console.error('Error in data-retention-cleanup function:', error); + return new Response( + JSON.stringify({ error: error.message }), + { + status: 500, + headers: { ...corsHeaders, 'Content-Type': 'application/json' }, + } + ); + } +}); diff --git a/supabase/migrations/20251111021921_71adb7c9-fcca-42c6-a2b7-0e2733c450cf.sql b/supabase/migrations/20251111021921_71adb7c9-fcca-42c6-a2b7-0e2733c450cf.sql new file mode 100644 index 00000000..85e4e59a --- /dev/null +++ b/supabase/migrations/20251111021921_71adb7c9-fcca-42c6-a2b7-0e2733c450cf.sql @@ -0,0 +1,7 @@ +-- Fix security warnings: Set search_path for all retention policy functions + +ALTER FUNCTION cleanup_old_metrics(INTEGER) SET search_path = public; +ALTER FUNCTION cleanup_old_anomalies(INTEGER) SET search_path = public; +ALTER FUNCTION cleanup_old_alerts(INTEGER) SET search_path = public; +ALTER FUNCTION cleanup_old_incidents(INTEGER) SET search_path = public; +ALTER FUNCTION run_data_retention_cleanup() SET search_path = public; \ No newline at end of file