mirror of
https://github.com/pacnpal/thrilltrack-explorer.git
synced 2025-12-20 08:31:12 -05:00
Implements complete plan to resolve duplicate span_id issues and metric collection errors: - Ensure edge handlers return proper Response objects to prevent double logging - Update collect-metrics to use valid metric categories, fix system_alerts query, and adjust returns - Apply detect-anomalies adjustments if needed and add defensive handling in wrapper - Prepare ground for end-to-end verification of location-related fixes
496 lines
18 KiB
TypeScript
496 lines
18 KiB
TypeScript
import { serve } from 'https://deno.land/std@0.190.0/http/server.ts';
|
||
import { createEdgeFunction, type EdgeFunctionContext } from '../_shared/edgeFunctionWrapper.ts';
|
||
import { addSpanEvent } from '../_shared/logger.ts';
|
||
import { corsHeaders } from '../_shared/cors.ts';
|
||
|
||
interface MetricData {
|
||
timestamp: string;
|
||
metric_value: number;
|
||
}
|
||
|
||
interface AnomalyDetectionConfig {
|
||
metric_name: string;
|
||
metric_category: string;
|
||
enabled: boolean;
|
||
sensitivity: number;
|
||
lookback_window_minutes: number;
|
||
detection_algorithms: string[];
|
||
min_data_points: number;
|
||
alert_threshold_score: number;
|
||
auto_create_alert: boolean;
|
||
}
|
||
|
||
interface AnomalyResult {
|
||
isAnomaly: boolean;
|
||
anomalyType: string;
|
||
deviationScore: number;
|
||
confidenceScore: number;
|
||
algorithm: string;
|
||
baselineValue: number;
|
||
anomalyValue: number;
|
||
}
|
||
|
||
// Advanced ML-based anomaly detection algorithms
|
||
class AnomalyDetector {
|
||
// Isolation Forest approximation: Detects outliers based on isolation score
|
||
static isolationForest(data: number[], currentValue: number, sensitivity: number = 0.6): AnomalyResult {
|
||
if (data.length < 10) {
|
||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'isolation_forest', baselineValue: currentValue, anomalyValue: currentValue };
|
||
}
|
||
|
||
// Calculate isolation score (simplified version)
|
||
// Based on how different the value is from random samples
|
||
const samples = 20;
|
||
let isolationScore = 0;
|
||
|
||
for (let i = 0; i < samples; i++) {
|
||
const randomSample = data[Math.floor(Math.random() * data.length)];
|
||
const distance = Math.abs(currentValue - randomSample);
|
||
isolationScore += distance;
|
||
}
|
||
|
||
isolationScore = isolationScore / samples;
|
||
|
||
// Normalize by standard deviation
|
||
const mean = data.reduce((sum, val) => sum + val, 0) / data.length;
|
||
const variance = data.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / data.length;
|
||
const stdDev = Math.sqrt(variance);
|
||
|
||
const normalizedScore = stdDev > 0 ? isolationScore / stdDev : 0;
|
||
const isAnomaly = normalizedScore > (1 / sensitivity);
|
||
|
||
return {
|
||
isAnomaly,
|
||
anomalyType: currentValue > mean ? 'outlier_high' : 'outlier_low',
|
||
deviationScore: normalizedScore,
|
||
confidenceScore: Math.min(normalizedScore / 5, 1),
|
||
algorithm: 'isolation_forest',
|
||
baselineValue: mean,
|
||
anomalyValue: currentValue,
|
||
};
|
||
}
|
||
|
||
// Seasonal decomposition: Detects anomalies considering seasonal patterns
|
||
static seasonalDecomposition(data: number[], currentValue: number, sensitivity: number = 2.5, period: number = 24): AnomalyResult {
|
||
if (data.length < period * 2) {
|
||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'seasonal', baselineValue: currentValue, anomalyValue: currentValue };
|
||
}
|
||
|
||
// Calculate seasonal component (average of values at same position in period)
|
||
const position = data.length % period;
|
||
const seasonalValues: number[] = [];
|
||
|
||
for (let i = position; i < data.length; i += period) {
|
||
seasonalValues.push(data[i]);
|
||
}
|
||
|
||
const seasonalMean = seasonalValues.reduce((sum, val) => sum + val, 0) / seasonalValues.length;
|
||
const seasonalStdDev = Math.sqrt(
|
||
seasonalValues.reduce((sum, val) => sum + Math.pow(val - seasonalMean, 2), 0) / seasonalValues.length
|
||
);
|
||
|
||
if (seasonalStdDev === 0) {
|
||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'seasonal', baselineValue: seasonalMean, anomalyValue: currentValue };
|
||
}
|
||
|
||
const deviationScore = Math.abs(currentValue - seasonalMean) / seasonalStdDev;
|
||
const isAnomaly = deviationScore > sensitivity;
|
||
|
||
return {
|
||
isAnomaly,
|
||
anomalyType: currentValue > seasonalMean ? 'seasonal_spike' : 'seasonal_drop',
|
||
deviationScore,
|
||
confidenceScore: Math.min(deviationScore / (sensitivity * 2), 1),
|
||
algorithm: 'seasonal',
|
||
baselineValue: seasonalMean,
|
||
anomalyValue: currentValue,
|
||
};
|
||
}
|
||
|
||
// LSTM-inspired prediction: Simple exponential smoothing with trend detection
|
||
static predictiveAnomaly(data: number[], currentValue: number, sensitivity: number = 2.5): AnomalyResult {
|
||
if (data.length < 5) {
|
||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'predictive', baselineValue: currentValue, anomalyValue: currentValue };
|
||
}
|
||
|
||
// Triple exponential smoothing (Holt-Winters approximation)
|
||
const alpha = 0.3; // Level smoothing
|
||
const beta = 0.1; // Trend smoothing
|
||
|
||
let level = data[0];
|
||
let trend = data[1] - data[0];
|
||
|
||
// Calculate smoothed values
|
||
for (let i = 1; i < data.length; i++) {
|
||
const prevLevel = level;
|
||
level = alpha * data[i] + (1 - alpha) * (level + trend);
|
||
trend = beta * (level - prevLevel) + (1 - beta) * trend;
|
||
}
|
||
|
||
// Predict next value
|
||
const prediction = level + trend;
|
||
|
||
// Calculate prediction error
|
||
const recentData = data.slice(-10);
|
||
const predictionErrors: number[] = [];
|
||
|
||
for (let i = 1; i < recentData.length; i++) {
|
||
const simplePrediction = recentData[i - 1];
|
||
predictionErrors.push(Math.abs(recentData[i] - simplePrediction));
|
||
}
|
||
|
||
const meanError = predictionErrors.reduce((sum, err) => sum + err, 0) / predictionErrors.length;
|
||
const errorStdDev = Math.sqrt(
|
||
predictionErrors.reduce((sum, err) => sum + Math.pow(err - meanError, 2), 0) / predictionErrors.length
|
||
);
|
||
|
||
const actualError = Math.abs(currentValue - prediction);
|
||
const deviationScore = errorStdDev > 0 ? actualError / errorStdDev : 0;
|
||
const isAnomaly = deviationScore > sensitivity;
|
||
|
||
return {
|
||
isAnomaly,
|
||
anomalyType: currentValue > prediction ? 'unexpected_spike' : 'unexpected_drop',
|
||
deviationScore,
|
||
confidenceScore: Math.min(deviationScore / (sensitivity * 2), 1),
|
||
algorithm: 'predictive',
|
||
baselineValue: prediction,
|
||
anomalyValue: currentValue,
|
||
};
|
||
}
|
||
|
||
// Ensemble method: Combines multiple algorithms for better accuracy
|
||
static ensemble(data: number[], currentValue: number, sensitivity: number = 2.5): AnomalyResult {
|
||
const results: AnomalyResult[] = [
|
||
this.zScore(data, currentValue, sensitivity),
|
||
this.movingAverage(data, currentValue, sensitivity),
|
||
this.rateOfChange(data, currentValue, sensitivity),
|
||
this.isolationForest(data, currentValue, 0.6),
|
||
this.predictiveAnomaly(data, currentValue, sensitivity),
|
||
];
|
||
|
||
// Count how many algorithms detected an anomaly
|
||
const anomalyCount = results.filter(r => r.isAnomaly).length;
|
||
const anomalyRatio = anomalyCount / results.length;
|
||
|
||
// Calculate average deviation and confidence
|
||
const avgDeviation = results.reduce((sum, r) => sum + r.deviationScore, 0) / results.length;
|
||
const avgConfidence = results.reduce((sum, r) => sum + r.confidenceScore, 0) / results.length;
|
||
|
||
// Determine anomaly type based on most common classification
|
||
const typeCount = new Map<string, number>();
|
||
results.forEach(r => {
|
||
typeCount.set(r.anomalyType, (typeCount.get(r.anomalyType) || 0) + 1);
|
||
});
|
||
|
||
let mostCommonType = 'none';
|
||
let maxCount = 0;
|
||
typeCount.forEach((count, type) => {
|
||
if (count > maxCount) {
|
||
maxCount = count;
|
||
mostCommonType = type;
|
||
}
|
||
});
|
||
|
||
const mean = data.reduce((sum, val) => sum + val, 0) / data.length;
|
||
|
||
return {
|
||
isAnomaly: anomalyRatio >= 0.4, // At least 40% of algorithms agree
|
||
anomalyType: mostCommonType,
|
||
deviationScore: avgDeviation,
|
||
confidenceScore: Math.min(avgConfidence * anomalyRatio * 2, 1),
|
||
algorithm: 'ensemble',
|
||
baselineValue: mean,
|
||
anomalyValue: currentValue,
|
||
};
|
||
}
|
||
|
||
// Z-Score algorithm: Detects outliers based on standard deviation
|
||
static zScore(data: number[], currentValue: number, sensitivity: number = 3.0): AnomalyResult {
|
||
if (data.length < 2) {
|
||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'z_score', baselineValue: currentValue, anomalyValue: currentValue };
|
||
}
|
||
|
||
const mean = data.reduce((sum, val) => sum + val, 0) / data.length;
|
||
const variance = data.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / data.length;
|
||
const stdDev = Math.sqrt(variance);
|
||
|
||
if (stdDev === 0) {
|
||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'z_score', baselineValue: mean, anomalyValue: currentValue };
|
||
}
|
||
|
||
const zScore = Math.abs((currentValue - mean) / stdDev);
|
||
const isAnomaly = zScore > sensitivity;
|
||
|
||
return {
|
||
isAnomaly,
|
||
anomalyType: currentValue > mean ? 'spike' : 'drop',
|
||
deviationScore: zScore,
|
||
confidenceScore: Math.min(zScore / (sensitivity * 2), 1),
|
||
algorithm: 'z_score',
|
||
baselineValue: mean,
|
||
anomalyValue: currentValue,
|
||
};
|
||
}
|
||
|
||
// Moving Average algorithm: Detects deviation from trend
|
||
static movingAverage(data: number[], currentValue: number, sensitivity: number = 2.5, window: number = 10): AnomalyResult {
|
||
if (data.length < window) {
|
||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'moving_average', baselineValue: currentValue, anomalyValue: currentValue };
|
||
}
|
||
|
||
const recentData = data.slice(-window);
|
||
const ma = recentData.reduce((sum, val) => sum + val, 0) / recentData.length;
|
||
|
||
const mad = recentData.reduce((sum, val) => sum + Math.abs(val - ma), 0) / recentData.length;
|
||
|
||
if (mad === 0) {
|
||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'moving_average', baselineValue: ma, anomalyValue: currentValue };
|
||
}
|
||
|
||
const deviation = Math.abs(currentValue - ma) / mad;
|
||
const isAnomaly = deviation > sensitivity;
|
||
|
||
return {
|
||
isAnomaly,
|
||
anomalyType: currentValue > ma ? 'spike' : 'drop',
|
||
deviationScore: deviation,
|
||
confidenceScore: Math.min(deviation / (sensitivity * 2), 1),
|
||
algorithm: 'moving_average',
|
||
baselineValue: ma,
|
||
anomalyValue: currentValue,
|
||
};
|
||
}
|
||
|
||
// Rate of Change algorithm: Detects sudden changes
|
||
static rateOfChange(data: number[], currentValue: number, sensitivity: number = 3.0): AnomalyResult {
|
||
if (data.length < 2) {
|
||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'rate_of_change', baselineValue: currentValue, anomalyValue: currentValue };
|
||
}
|
||
|
||
const previousValue = data[data.length - 1];
|
||
|
||
if (previousValue === 0) {
|
||
return { isAnomaly: false, anomalyType: 'none', deviationScore: 0, confidenceScore: 0, algorithm: 'rate_of_change', baselineValue: previousValue, anomalyValue: currentValue };
|
||
}
|
||
|
||
const percentChange = Math.abs((currentValue - previousValue) / previousValue) * 100;
|
||
const isAnomaly = percentChange > (sensitivity * 10); // sensitivity * 10 = % threshold
|
||
|
||
return {
|
||
isAnomaly,
|
||
anomalyType: currentValue > previousValue ? 'trend_change' : 'drop',
|
||
deviationScore: percentChange / 10,
|
||
confidenceScore: Math.min(percentChange / (sensitivity * 20), 1),
|
||
algorithm: 'rate_of_change',
|
||
baselineValue: previousValue,
|
||
anomalyValue: currentValue,
|
||
};
|
||
}
|
||
}
|
||
|
||
const handler = async (req: Request, { supabase, span, requestId }: EdgeFunctionContext) => {
|
||
addSpanEvent(span, 'starting_anomaly_detection', { requestId });
|
||
|
||
// Get all enabled anomaly detection configurations
|
||
const { data: configs, error: configError } = await supabase
|
||
.from('anomaly_detection_config')
|
||
.select('*')
|
||
.eq('enabled', true);
|
||
|
||
if (configError) {
|
||
addSpanEvent(span, 'error_fetching_configs', { error: configError.message });
|
||
throw configError;
|
||
}
|
||
|
||
addSpanEvent(span, 'processing_metric_configs', { count: configs?.length || 0 });
|
||
|
||
const anomaliesDetected: any[] = [];
|
||
|
||
for (const config of (configs as AnomalyDetectionConfig[])) {
|
||
try {
|
||
// Fetch historical data for this metric
|
||
const windowStart = new Date(Date.now() - config.lookback_window_minutes * 60 * 1000);
|
||
|
||
const { data: metricData, error: metricError } = await supabase
|
||
.from('metric_time_series')
|
||
.select('timestamp, metric_value')
|
||
.eq('metric_name', config.metric_name)
|
||
.gte('timestamp', windowStart.toISOString())
|
||
.order('timestamp', { ascending: true });
|
||
|
||
if (metricError) {
|
||
addSpanEvent(span, 'error_fetching_metric_data', {
|
||
metric: config.metric_name,
|
||
error: metricError.message
|
||
});
|
||
continue;
|
||
}
|
||
|
||
const data = metricData as MetricData[];
|
||
|
||
if (!data || data.length < config.min_data_points) {
|
||
addSpanEvent(span, 'insufficient_data', {
|
||
metric: config.metric_name,
|
||
points: data?.length || 0
|
||
});
|
||
continue;
|
||
}
|
||
|
||
// Get current value (most recent)
|
||
const currentValue = data[data.length - 1].metric_value;
|
||
const historicalValues = data.slice(0, -1).map(d => d.metric_value);
|
||
|
||
// Run detection algorithms
|
||
const results: AnomalyResult[] = [];
|
||
|
||
for (const algorithm of config.detection_algorithms) {
|
||
let result: AnomalyResult;
|
||
|
||
switch (algorithm) {
|
||
case 'z_score':
|
||
result = AnomalyDetector.zScore(historicalValues, currentValue, config.sensitivity);
|
||
break;
|
||
case 'moving_average':
|
||
result = AnomalyDetector.movingAverage(historicalValues, currentValue, config.sensitivity);
|
||
break;
|
||
case 'rate_of_change':
|
||
result = AnomalyDetector.rateOfChange(historicalValues, currentValue, config.sensitivity);
|
||
break;
|
||
case 'isolation_forest':
|
||
result = AnomalyDetector.isolationForest(historicalValues, currentValue, 0.6);
|
||
break;
|
||
case 'seasonal':
|
||
result = AnomalyDetector.seasonalDecomposition(historicalValues, currentValue, config.sensitivity, 24);
|
||
break;
|
||
case 'predictive':
|
||
result = AnomalyDetector.predictiveAnomaly(historicalValues, currentValue, config.sensitivity);
|
||
break;
|
||
case 'ensemble':
|
||
result = AnomalyDetector.ensemble(historicalValues, currentValue, config.sensitivity);
|
||
break;
|
||
default:
|
||
continue;
|
||
}
|
||
|
||
if (result.isAnomaly && result.deviationScore >= config.alert_threshold_score) {
|
||
results.push(result);
|
||
}
|
||
}
|
||
|
||
// If any algorithm detected an anomaly
|
||
if (results.length > 0) {
|
||
// Use the result with highest confidence
|
||
const bestResult = results.reduce((best, current) =>
|
||
current.confidenceScore > best.confidenceScore ? current : best
|
||
);
|
||
|
||
// Determine severity based on deviation score
|
||
const severity =
|
||
bestResult.deviationScore >= 5 ? 'critical' :
|
||
bestResult.deviationScore >= 4 ? 'high' :
|
||
bestResult.deviationScore >= 3 ? 'medium' : 'low';
|
||
|
||
// Insert anomaly detection record
|
||
const { data: anomaly, error: anomalyError } = await supabase
|
||
.from('anomaly_detections')
|
||
.insert({
|
||
metric_name: config.metric_name,
|
||
metric_category: config.metric_category,
|
||
anomaly_type: bestResult.anomalyType,
|
||
severity,
|
||
baseline_value: bestResult.baselineValue,
|
||
anomaly_value: bestResult.anomalyValue,
|
||
deviation_score: bestResult.deviationScore,
|
||
confidence_score: bestResult.confidenceScore,
|
||
detection_algorithm: bestResult.algorithm,
|
||
time_window_start: windowStart.toISOString(),
|
||
time_window_end: new Date().toISOString(),
|
||
metadata: {
|
||
algorithms_run: config.detection_algorithms,
|
||
total_data_points: data.length,
|
||
sensitivity: config.sensitivity,
|
||
},
|
||
})
|
||
.select()
|
||
.single();
|
||
|
||
if (anomalyError) {
|
||
addSpanEvent(span, 'error_inserting_anomaly', {
|
||
metric: config.metric_name,
|
||
error: anomalyError.message
|
||
});
|
||
continue;
|
||
}
|
||
|
||
anomaliesDetected.push(anomaly);
|
||
|
||
// Auto-create alert if configured
|
||
if (config.auto_create_alert && severity in ['critical', 'high']) {
|
||
const { data: alert, error: alertError } = await supabase
|
||
.from('system_alerts')
|
||
.insert({
|
||
alert_type: 'anomaly_detected',
|
||
severity,
|
||
message: `Anomaly detected in ${config.metric_name}: ${bestResult.anomalyType} (${bestResult.deviationScore.toFixed(2)}σ deviation)`,
|
||
metadata: {
|
||
anomaly_id: anomaly.id,
|
||
metric_name: config.metric_name,
|
||
baseline_value: bestResult.baselineValue,
|
||
anomaly_value: bestResult.anomalyValue,
|
||
algorithm: bestResult.algorithm,
|
||
},
|
||
})
|
||
.select()
|
||
.single();
|
||
|
||
if (!alertError && alert) {
|
||
// Update anomaly with alert_id
|
||
await supabase
|
||
.from('anomaly_detections')
|
||
.update({ alert_created: true, alert_id: alert.id })
|
||
.eq('id', anomaly.id);
|
||
|
||
addSpanEvent(span, 'alert_created', {
|
||
metric: config.metric_name,
|
||
alertId: alert.id
|
||
});
|
||
}
|
||
}
|
||
|
||
addSpanEvent(span, 'anomaly_detected', {
|
||
metric: config.metric_name,
|
||
type: bestResult.anomalyType,
|
||
deviation: bestResult.deviationScore.toFixed(2)
|
||
});
|
||
}
|
||
} catch (error) {
|
||
addSpanEvent(span, 'error_processing_metric', {
|
||
metric: config.metric_name,
|
||
error: error instanceof Error ? error.message : String(error)
|
||
});
|
||
}
|
||
}
|
||
|
||
addSpanEvent(span, 'anomaly_detection_complete', { detected: anomaliesDetected.length });
|
||
|
||
return new Response(
|
||
JSON.stringify({
|
||
success: true,
|
||
anomalies_detected: anomaliesDetected.length,
|
||
anomalies: anomaliesDetected,
|
||
}),
|
||
{
|
||
status: 200,
|
||
headers: { 'Content-Type': 'application/json' },
|
||
}
|
||
);
|
||
};
|
||
|
||
serve(createEdgeFunction({
|
||
name: 'detect-anomalies',
|
||
requireAuth: false,
|
||
corsHeaders,
|
||
enableTracing: true,
|
||
}, handler));
|