mirror of
https://github.com/pacnpal/thrilltrack-explorer.git
synced 2025-12-24 03:11:13 -05:00
Implement ML Anomaly Detection
Introduce statistical anomaly detection for metrics via edge function, hooks, and UI components. Adds detection algorithms (z-score, moving average, rate of change), anomaly storage, auto-alerts, and dashboard rendering of detected anomalies with run-once trigger and scheduling guidance.
This commit is contained in:
302
supabase/functions/detect-anomalies/index.ts
Normal file
302
supabase/functions/detect-anomalies/index.ts
Normal file
@@ -0,0 +1,302 @@
|
||||
import { createClient } from 'https://esm.sh/@supabase/supabase-js@2.57.4';
|
||||
|
||||
const corsHeaders = {
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
|
||||
};
|
||||
|
||||
interface MetricData {
|
||||
timestamp: string;
|
||||
metric_value: number;
|
||||
}
|
||||
|
||||
interface AnomalyDetectionConfig {
|
||||
metric_name: string;
|
||||
metric_category: string;
|
||||
enabled: boolean;
|
||||
sensitivity: number;
|
||||
lookback_window_minutes: number;
|
||||
detection_algorithms: string[];
|
||||
min_data_points: number;
|
||||
alert_threshold_score: number;
|
||||
auto_create_alert: boolean;
|
||||
}
|
||||
|
||||
interface AnomalyResult {
|
||||
isAnomaly: boolean;
|
||||
anomalyType: string;
|
||||
deviationScore: number;
|
||||
confidenceScore: number;
|
||||
algorithm: string;
|
||||
baselineValue: number;
|
||||
anomalyValue: number;
|
||||
}
|
||||
|
||||
// Statistical anomaly detection algorithms
|
||||
class AnomalyDetector {
|
||||
// Z-Score algorithm: Detects outliers based on standard deviation
|
||||
static zScore(data: number[], currentValue: number, sensitivity: number = 3.0): AnomalyResult {
|
||||
if (data.length < 2) {
|
||||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'z_score', baselineValue: currentValue, anomalyValue: currentValue };
|
||||
}
|
||||
|
||||
const mean = data.reduce((sum, val) => sum + val, 0) / data.length;
|
||||
const variance = data.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / data.length;
|
||||
const stdDev = Math.sqrt(variance);
|
||||
|
||||
if (stdDev === 0) {
|
||||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'z_score', baselineValue: mean, anomalyValue: currentValue };
|
||||
}
|
||||
|
||||
const zScore = Math.abs((currentValue - mean) / stdDev);
|
||||
const isAnomaly = zScore > sensitivity;
|
||||
|
||||
return {
|
||||
isAnomaly,
|
||||
anomalyType: currentValue > mean ? 'spike' : 'drop',
|
||||
deviationScore: zScore,
|
||||
confidenceScore: Math.min(zScore / (sensitivity * 2), 1),
|
||||
algorithm: 'z_score',
|
||||
baselineValue: mean,
|
||||
anomalyValue: currentValue,
|
||||
};
|
||||
}
|
||||
|
||||
// Moving Average algorithm: Detects deviation from trend
|
||||
static movingAverage(data: number[], currentValue: number, sensitivity: number = 2.5, window: number = 10): AnomalyResult {
|
||||
if (data.length < window) {
|
||||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'moving_average', baselineValue: currentValue, anomalyValue: currentValue };
|
||||
}
|
||||
|
||||
const recentData = data.slice(-window);
|
||||
const ma = recentData.reduce((sum, val) => sum + val, 0) / recentData.length;
|
||||
|
||||
const mad = recentData.reduce((sum, val) => sum + Math.abs(val - ma), 0) / recentData.length;
|
||||
|
||||
if (mad === 0) {
|
||||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'moving_average', baselineValue: ma, anomalyValue: currentValue };
|
||||
}
|
||||
|
||||
const deviation = Math.abs(currentValue - ma) / mad;
|
||||
const isAnomaly = deviation > sensitivity;
|
||||
|
||||
return {
|
||||
isAnomaly,
|
||||
anomalyType: currentValue > ma ? 'spike' : 'drop',
|
||||
deviationScore: deviation,
|
||||
confidenceScore: Math.min(deviation / (sensitivity * 2), 1),
|
||||
algorithm: 'moving_average',
|
||||
baselineValue: ma,
|
||||
anomalyValue: currentValue,
|
||||
};
|
||||
}
|
||||
|
||||
// Rate of Change algorithm: Detects sudden changes
|
||||
static rateOfChange(data: number[], currentValue: number, sensitivity: number = 3.0): AnomalyResult {
|
||||
if (data.length < 2) {
|
||||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'rate_of_change', baselineValue: currentValue, anomalyValue: currentValue };
|
||||
}
|
||||
|
||||
const previousValue = data[data.length - 1];
|
||||
|
||||
if (previousValue === 0) {
|
||||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'rate_of_change', baselineValue: previousValue, anomalyValue: currentValue };
|
||||
}
|
||||
|
||||
const percentChange = Math.abs((currentValue - previousValue) / previousValue) * 100;
|
||||
const isAnomaly = percentChange > (sensitivity * 10); // sensitivity * 10 = % threshold
|
||||
|
||||
return {
|
||||
isAnomaly,
|
||||
anomalyType: currentValue > previousValue ? 'trend_change' : 'drop',
|
||||
deviationScore: percentChange / 10,
|
||||
confidenceScore: Math.min(percentChange / (sensitivity * 20), 1),
|
||||
algorithm: 'rate_of_change',
|
||||
baselineValue: previousValue,
|
||||
anomalyValue: currentValue,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Deno.serve(async (req) => {
|
||||
if (req.method === 'OPTIONS') {
|
||||
return new Response(null, { headers: corsHeaders });
|
||||
}
|
||||
|
||||
try {
|
||||
const supabaseUrl = Deno.env.get('SUPABASE_URL')!;
|
||||
const supabaseKey = Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')!;
|
||||
const supabase = createClient(supabaseUrl, supabaseKey);
|
||||
|
||||
console.log('Starting anomaly detection run...');
|
||||
|
||||
// Get all enabled anomaly detection configurations
|
||||
const { data: configs, error: configError } = await supabase
|
||||
.from('anomaly_detection_config')
|
||||
.select('*')
|
||||
.eq('enabled', true);
|
||||
|
||||
if (configError) {
|
||||
console.error('Error fetching configs:', configError);
|
||||
throw configError;
|
||||
}
|
||||
|
||||
console.log(`Processing ${configs?.length || 0} metric configurations`);
|
||||
|
||||
const anomaliesDetected: any[] = [];
|
||||
|
||||
for (const config of (configs as AnomalyDetectionConfig[])) {
|
||||
try {
|
||||
// Fetch historical data for this metric
|
||||
const windowStart = new Date(Date.now() - config.lookback_window_minutes * 60 * 1000);
|
||||
|
||||
const { data: metricData, error: metricError } = await supabase
|
||||
.from('metric_time_series')
|
||||
.select('timestamp, metric_value')
|
||||
.eq('metric_name', config.metric_name)
|
||||
.gte('timestamp', windowStart.toISOString())
|
||||
.order('timestamp', { ascending: true });
|
||||
|
||||
if (metricError) {
|
||||
console.error(`Error fetching metric data for ${config.metric_name}:`, metricError);
|
||||
continue;
|
||||
}
|
||||
|
||||
const data = metricData as MetricData[];
|
||||
|
||||
if (!data || data.length < config.min_data_points) {
|
||||
console.log(`Insufficient data for ${config.metric_name}: ${data?.length || 0} points`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get current value (most recent)
|
||||
const currentValue = data[data.length - 1].metric_value;
|
||||
const historicalValues = data.slice(0, -1).map(d => d.metric_value);
|
||||
|
||||
// Run detection algorithms
|
||||
const results: AnomalyResult[] = [];
|
||||
|
||||
for (const algorithm of config.detection_algorithms) {
|
||||
let result: AnomalyResult;
|
||||
|
||||
switch (algorithm) {
|
||||
case 'z_score':
|
||||
result = AnomalyDetector.zScore(historicalValues, currentValue, config.sensitivity);
|
||||
break;
|
||||
case 'moving_average':
|
||||
result = AnomalyDetector.movingAverage(historicalValues, currentValue, config.sensitivity);
|
||||
break;
|
||||
case 'rate_of_change':
|
||||
result = AnomalyDetector.rateOfChange(historicalValues, currentValue, config.sensitivity);
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (result.isAnomaly && result.deviationScore >= config.alert_threshold_score) {
|
||||
results.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
// If any algorithm detected an anomaly
|
||||
if (results.length > 0) {
|
||||
// Use the result with highest confidence
|
||||
const bestResult = results.reduce((best, current) =>
|
||||
current.confidenceScore > best.confidenceScore ? current : best
|
||||
);
|
||||
|
||||
// Determine severity based on deviation score
|
||||
const severity =
|
||||
bestResult.deviationScore >= 5 ? 'critical' :
|
||||
bestResult.deviationScore >= 4 ? 'high' :
|
||||
bestResult.deviationScore >= 3 ? 'medium' : 'low';
|
||||
|
||||
// Insert anomaly detection record
|
||||
const { data: anomaly, error: anomalyError } = await supabase
|
||||
.from('anomaly_detections')
|
||||
.insert({
|
||||
metric_name: config.metric_name,
|
||||
metric_category: config.metric_category,
|
||||
anomaly_type: bestResult.anomalyType,
|
||||
severity,
|
||||
baseline_value: bestResult.baselineValue,
|
||||
anomaly_value: bestResult.anomalyValue,
|
||||
deviation_score: bestResult.deviationScore,
|
||||
confidence_score: bestResult.confidenceScore,
|
||||
detection_algorithm: bestResult.algorithm,
|
||||
time_window_start: windowStart.toISOString(),
|
||||
time_window_end: new Date().toISOString(),
|
||||
metadata: {
|
||||
algorithms_run: config.detection_algorithms,
|
||||
total_data_points: data.length,
|
||||
sensitivity: config.sensitivity,
|
||||
},
|
||||
})
|
||||
.select()
|
||||
.single();
|
||||
|
||||
if (anomalyError) {
|
||||
console.error(`Error inserting anomaly for ${config.metric_name}:`, anomalyError);
|
||||
continue;
|
||||
}
|
||||
|
||||
anomaliesDetected.push(anomaly);
|
||||
|
||||
// Auto-create alert if configured
|
||||
if (config.auto_create_alert && severity in ['critical', 'high']) {
|
||||
const { data: alert, error: alertError } = await supabase
|
||||
.from('system_alerts')
|
||||
.insert({
|
||||
alert_type: 'anomaly_detected',
|
||||
severity,
|
||||
message: `Anomaly detected in ${config.metric_name}: ${bestResult.anomalyType} (${bestResult.deviationScore.toFixed(2)}σ deviation)`,
|
||||
metadata: {
|
||||
anomaly_id: anomaly.id,
|
||||
metric_name: config.metric_name,
|
||||
baseline_value: bestResult.baselineValue,
|
||||
anomaly_value: bestResult.anomalyValue,
|
||||
algorithm: bestResult.algorithm,
|
||||
},
|
||||
})
|
||||
.select()
|
||||
.single();
|
||||
|
||||
if (!alertError && alert) {
|
||||
// Update anomaly with alert_id
|
||||
await supabase
|
||||
.from('anomaly_detections')
|
||||
.update({ alert_created: true, alert_id: alert.id })
|
||||
.eq('id', anomaly.id);
|
||||
|
||||
console.log(`Created alert for anomaly in ${config.metric_name}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Anomaly detected: ${config.metric_name} - ${bestResult.anomalyType} (${bestResult.deviationScore.toFixed(2)}σ)`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error processing metric ${config.metric_name}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Anomaly detection complete. Detected ${anomaliesDetected.length} anomalies`);
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
success: true,
|
||||
anomalies_detected: anomaliesDetected.length,
|
||||
anomalies: anomaliesDetected,
|
||||
}),
|
||||
{ headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('Error in detect-anomalies function:', error);
|
||||
return new Response(
|
||||
JSON.stringify({ error: error.message }),
|
||||
{
|
||||
status: 500,
|
||||
headers: { ...corsHeaders, 'Content-Type': 'application/json' },
|
||||
}
|
||||
);
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,143 @@
|
||||
-- ML-based Anomaly Detection System
|
||||
|
||||
-- Table: Time-series metrics for anomaly detection
|
||||
CREATE TABLE metric_time_series (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
metric_name TEXT NOT NULL,
|
||||
metric_category TEXT NOT NULL CHECK (metric_category IN ('system', 'database', 'rate_limit', 'moderation', 'api')),
|
||||
metric_value NUMERIC NOT NULL,
|
||||
timestamp TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Table: Detected anomalies
|
||||
CREATE TABLE anomaly_detections (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
metric_name TEXT NOT NULL,
|
||||
metric_category TEXT NOT NULL,
|
||||
anomaly_type TEXT NOT NULL CHECK (anomaly_type IN ('spike', 'drop', 'trend_change', 'outlier', 'pattern_break')),
|
||||
severity TEXT NOT NULL CHECK (severity IN ('critical', 'high', 'medium', 'low')),
|
||||
baseline_value NUMERIC NOT NULL,
|
||||
anomaly_value NUMERIC NOT NULL,
|
||||
deviation_score NUMERIC NOT NULL,
|
||||
confidence_score NUMERIC NOT NULL CHECK (confidence_score >= 0 AND confidence_score <= 1),
|
||||
detection_algorithm TEXT NOT NULL,
|
||||
time_window_start TIMESTAMPTZ NOT NULL,
|
||||
time_window_end TIMESTAMPTZ NOT NULL,
|
||||
detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
alert_created BOOLEAN NOT NULL DEFAULT false,
|
||||
alert_id UUID,
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Table: Anomaly detection configuration
|
||||
CREATE TABLE anomaly_detection_config (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
metric_name TEXT NOT NULL UNIQUE,
|
||||
metric_category TEXT NOT NULL,
|
||||
enabled BOOLEAN NOT NULL DEFAULT true,
|
||||
sensitivity NUMERIC NOT NULL DEFAULT 3.0 CHECK (sensitivity > 0),
|
||||
lookback_window_minutes INTEGER NOT NULL DEFAULT 60,
|
||||
detection_algorithms TEXT[] NOT NULL DEFAULT ARRAY['z_score', 'moving_average', 'rate_of_change'],
|
||||
min_data_points INTEGER NOT NULL DEFAULT 10,
|
||||
alert_threshold_score NUMERIC NOT NULL DEFAULT 2.5,
|
||||
auto_create_alert BOOLEAN NOT NULL DEFAULT true,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- View: Recent anomalies with alert status
|
||||
CREATE OR REPLACE VIEW recent_anomalies_view
|
||||
WITH (security_invoker=on)
|
||||
AS
|
||||
SELECT
|
||||
ad.id,
|
||||
ad.metric_name,
|
||||
ad.metric_category,
|
||||
ad.anomaly_type,
|
||||
ad.severity,
|
||||
ad.baseline_value,
|
||||
ad.anomaly_value,
|
||||
ad.deviation_score,
|
||||
ad.confidence_score,
|
||||
ad.detection_algorithm,
|
||||
ad.time_window_start,
|
||||
ad.time_window_end,
|
||||
ad.detected_at,
|
||||
ad.alert_created,
|
||||
ad.alert_id,
|
||||
sa.message as alert_message,
|
||||
sa.resolved_at as alert_resolved_at
|
||||
FROM anomaly_detections ad
|
||||
LEFT JOIN system_alerts sa ON sa.id = ad.alert_id::uuid
|
||||
WHERE ad.detected_at > NOW() - INTERVAL '24 hours'
|
||||
ORDER BY ad.detected_at DESC;
|
||||
|
||||
-- Insert default anomaly detection configurations
|
||||
INSERT INTO anomaly_detection_config (metric_name, metric_category, sensitivity, lookback_window_minutes, detection_algorithms, alert_threshold_score) VALUES
|
||||
('error_rate', 'system', 2.5, 60, ARRAY['z_score', 'moving_average'], 2.0),
|
||||
('response_time', 'api', 3.0, 30, ARRAY['z_score', 'rate_of_change'], 2.5),
|
||||
('database_connections', 'database', 2.0, 120, ARRAY['z_score', 'moving_average'], 3.0),
|
||||
('rate_limit_violations', 'rate_limit', 2.5, 60, ARRAY['z_score', 'spike_detection'], 2.0),
|
||||
('moderation_queue_size', 'moderation', 3.0, 120, ARRAY['z_score', 'trend_change'], 2.5),
|
||||
('cpu_usage', 'system', 2.5, 30, ARRAY['z_score', 'moving_average'], 2.0),
|
||||
('memory_usage', 'system', 2.5, 30, ARRAY['z_score', 'moving_average'], 2.0),
|
||||
('request_rate', 'api', 3.0, 60, ARRAY['z_score', 'rate_of_change'], 2.5);
|
||||
|
||||
-- Create indexes
|
||||
CREATE INDEX idx_metric_time_series_name_timestamp ON metric_time_series(metric_name, timestamp DESC);
|
||||
CREATE INDEX idx_metric_time_series_category_timestamp ON metric_time_series(metric_category, timestamp DESC);
|
||||
CREATE INDEX idx_anomaly_detections_detected_at ON anomaly_detections(detected_at DESC);
|
||||
CREATE INDEX idx_anomaly_detections_alert_created ON anomaly_detections(alert_created) WHERE alert_created = false;
|
||||
CREATE INDEX idx_anomaly_detections_metric ON anomaly_detections(metric_name, detected_at DESC);
|
||||
|
||||
-- Grant permissions
|
||||
GRANT SELECT, INSERT ON metric_time_series TO authenticated;
|
||||
GRANT SELECT ON anomaly_detections TO authenticated;
|
||||
GRANT SELECT ON anomaly_detection_config TO authenticated;
|
||||
GRANT SELECT ON recent_anomalies_view TO authenticated;
|
||||
|
||||
-- RLS Policies
|
||||
ALTER TABLE metric_time_series ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE anomaly_detections ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE anomaly_detection_config ENABLE ROW LEVEL SECURITY;
|
||||
|
||||
-- System can insert metrics
|
||||
CREATE POLICY system_insert_metrics ON metric_time_series
|
||||
FOR INSERT WITH CHECK (true);
|
||||
|
||||
-- Moderators can view all metrics
|
||||
CREATE POLICY moderators_view_metrics ON metric_time_series
|
||||
FOR SELECT USING (
|
||||
EXISTS (
|
||||
SELECT 1 FROM user_roles
|
||||
WHERE user_id = auth.uid()
|
||||
AND role IN ('moderator', 'admin', 'superuser')
|
||||
)
|
||||
);
|
||||
|
||||
-- Moderators can view anomalies
|
||||
CREATE POLICY moderators_view_anomalies ON anomaly_detections
|
||||
FOR SELECT USING (
|
||||
EXISTS (
|
||||
SELECT 1 FROM user_roles
|
||||
WHERE user_id = auth.uid()
|
||||
AND role IN ('moderator', 'admin', 'superuser')
|
||||
)
|
||||
);
|
||||
|
||||
-- System can insert anomalies
|
||||
CREATE POLICY system_insert_anomalies ON anomaly_detections
|
||||
FOR INSERT WITH CHECK (true);
|
||||
|
||||
-- Admins can manage anomaly config
|
||||
CREATE POLICY admins_manage_config ON anomaly_detection_config
|
||||
FOR ALL USING (
|
||||
EXISTS (
|
||||
SELECT 1 FROM user_roles
|
||||
WHERE user_id = auth.uid()
|
||||
AND role IN ('admin', 'superuser')
|
||||
)
|
||||
);
|
||||
Reference in New Issue
Block a user