-- Alert Correlation System: Detect related issues and create incidents -- Table: Alert correlation rules (defines which alert patterns indicate incidents) CREATE TABLE alert_correlation_rules ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), rule_name TEXT NOT NULL, description TEXT, alert_patterns JSONB NOT NULL, time_window_minutes INTEGER NOT NULL DEFAULT 15, min_alerts_required INTEGER NOT NULL DEFAULT 2, incident_severity TEXT NOT NULL CHECK (incident_severity IN ('critical', 'high', 'medium', 'low')), incident_title_template TEXT NOT NULL, incident_description_template TEXT, auto_create_incident BOOLEAN NOT NULL DEFAULT true, enabled BOOLEAN NOT NULL DEFAULT true, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), created_by UUID REFERENCES auth.users(id) ); -- Table: Incidents (auto-generated or manual from correlated alerts) CREATE TABLE incidents ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), incident_number TEXT UNIQUE NOT NULL, title TEXT NOT NULL, description TEXT, severity TEXT NOT NULL CHECK (severity IN ('critical', 'high', 'medium', 'low')), status TEXT NOT NULL DEFAULT 'open' CHECK (status IN ('open', 'investigating', 'resolved', 'closed')), correlation_rule_id UUID REFERENCES alert_correlation_rules(id), detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), acknowledged_at TIMESTAMPTZ, acknowledged_by UUID REFERENCES auth.users(id), resolved_at TIMESTAMPTZ, resolved_by UUID REFERENCES auth.users(id), resolution_notes TEXT, alert_count INTEGER NOT NULL DEFAULT 0, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- Table: Links alerts to incidents CREATE TABLE incident_alerts ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), incident_id UUID NOT NULL REFERENCES incidents(id) ON DELETE CASCADE, alert_source TEXT NOT NULL CHECK (alert_source IN ('system', 'rate_limit')), alert_id UUID NOT NULL, added_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), UNIQUE(incident_id, alert_source, alert_id) ); -- Generate incident numbers sequentially CREATE SEQUENCE incident_number_seq START 1000; -- Function to generate incident numbers CREATE OR REPLACE FUNCTION generate_incident_number() RETURNS TEXT AS $$ BEGIN RETURN 'INC-' || LPAD(nextval('incident_number_seq')::TEXT, 6, '0'); END; $$ LANGUAGE plpgsql; -- Trigger to auto-generate incident numbers CREATE OR REPLACE FUNCTION set_incident_number() RETURNS TRIGGER AS $$ BEGIN IF NEW.incident_number IS NULL THEN NEW.incident_number := generate_incident_number(); END IF; RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE TRIGGER trigger_set_incident_number BEFORE INSERT ON incidents FOR EACH ROW EXECUTE FUNCTION set_incident_number(); -- Trigger to update incident alert count CREATE OR REPLACE FUNCTION update_incident_alert_count() RETURNS TRIGGER AS $$ BEGIN IF TG_OP = 'INSERT' THEN UPDATE incidents SET alert_count = alert_count + 1, updated_at = NOW() WHERE id = NEW.incident_id; ELSIF TG_OP = 'DELETE' THEN UPDATE incidents SET alert_count = alert_count - 1, updated_at = NOW() WHERE id = OLD.incident_id; END IF; RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE TRIGGER trigger_update_incident_alert_count AFTER INSERT OR DELETE ON incident_alerts FOR EACH ROW EXECUTE FUNCTION update_incident_alert_count(); -- View: Active correlations (alerts matching correlation patterns) CREATE OR REPLACE VIEW alert_correlations_view WITH (security_invoker=on) AS WITH recent_alerts AS ( SELECT 'system'::TEXT AS source, id::TEXT AS alert_id, alert_type, NULL::TEXT AS metric_type, severity, created_at, message AS alert_message FROM system_alerts WHERE created_at > NOW() - INTERVAL '1 hour' AND resolved_at IS NULL UNION ALL SELECT 'rate_limit'::TEXT AS source, id::TEXT AS alert_id, NULL::TEXT AS alert_type, metric_type, 'high'::TEXT AS severity, created_at, alert_message FROM rate_limit_alerts WHERE created_at > NOW() - INTERVAL '1 hour' AND resolved_at IS NULL ), matched_alerts AS ( SELECT acr.id AS rule_id, acr.rule_name, acr.description AS rule_description, acr.incident_severity, acr.incident_title_template, acr.time_window_minutes, acr.min_alerts_required, ra.alert_id, ra.source, ra.alert_message, ra.created_at FROM alert_correlation_rules acr CROSS JOIN LATERAL ( SELECT ra.* FROM recent_alerts ra, jsonb_array_elements(acr.alert_patterns) AS pattern WHERE ra.created_at > NOW() - (acr.time_window_minutes || ' minutes')::INTERVAL AND ( (pattern->>'source' = ra.source) AND ( (pattern->>'alert_type' IS NULL OR pattern->>'alert_type' = ra.alert_type) OR (pattern->>'metric_type' IS NULL OR pattern->>'metric_type' = ra.metric_type) ) AND (pattern->>'severity' IS NULL OR pattern->>'severity' = ra.severity) ) ) ra WHERE acr.enabled = true ) SELECT rule_id, rule_name, rule_description, incident_severity, incident_title_template, time_window_minutes, min_alerts_required, COUNT(DISTINCT alert_id) AS matching_alerts_count, ARRAY_AGG(DISTINCT alert_id) AS alert_ids, ARRAY_AGG(DISTINCT source) AS alert_sources, (ARRAY_AGG(alert_message ORDER BY created_at DESC))[1:5] AS alert_messages, MIN(created_at) AS first_alert_at, MAX(created_at) AS last_alert_at, NOT EXISTS ( SELECT 1 FROM incidents i WHERE i.correlation_rule_id = matched_alerts.rule_id AND i.status IN ('open', 'investigating') AND i.detected_at > NOW() - (matched_alerts.time_window_minutes || ' minutes')::INTERVAL ) AS can_create_incident FROM matched_alerts GROUP BY rule_id, rule_name, rule_description, incident_severity, incident_title_template, time_window_minutes, min_alerts_required HAVING COUNT(DISTINCT alert_id) >= min_alerts_required; -- Grant permissions GRANT SELECT ON alert_correlation_rules TO authenticated; GRANT SELECT ON incidents TO authenticated; GRANT SELECT ON incident_alerts TO authenticated; GRANT SELECT ON alert_correlations_view TO authenticated; -- RLS Policies ALTER TABLE alert_correlation_rules ENABLE ROW LEVEL SECURITY; ALTER TABLE incidents ENABLE ROW LEVEL SECURITY; ALTER TABLE incident_alerts ENABLE ROW LEVEL SECURITY; CREATE POLICY moderators_manage_correlation_rules ON alert_correlation_rules FOR ALL USING ( EXISTS ( SELECT 1 FROM user_roles WHERE user_id = auth.uid() AND role IN ('moderator', 'admin', 'superuser') ) ); CREATE POLICY moderators_view_incidents ON incidents FOR SELECT USING ( EXISTS ( SELECT 1 FROM user_roles WHERE user_id = auth.uid() AND role IN ('moderator', 'admin', 'superuser') ) ); CREATE POLICY moderators_manage_incidents ON incidents FOR ALL USING ( EXISTS ( SELECT 1 FROM user_roles WHERE user_id = auth.uid() AND role IN ('moderator', 'admin', 'superuser') ) ); CREATE POLICY moderators_view_incident_alerts ON incident_alerts FOR SELECT USING ( EXISTS ( SELECT 1 FROM user_roles WHERE user_id = auth.uid() AND role IN ('moderator', 'admin', 'superuser') ) ); -- Insert default correlation rules INSERT INTO alert_correlation_rules (rule_name, description, alert_patterns, time_window_minutes, min_alerts_required, incident_severity, incident_title_template, incident_description_template, enabled) VALUES ('Database Performance Degradation', 'Database issues causing application errors', '[ {"source": "system", "alert_type": "database_connection_pool_exhausted"}, {"source": "system", "alert_type": "high_error_rate"}, {"source": "rate_limit", "metric_type": "rate_limit_violation"} ]'::jsonb, 15, 2, 'critical', 'Database Performance Incident', 'Multiple database-related alerts detected indicating potential database performance degradation affecting application availability.', true), ('Authentication System Issues', 'Auth failures causing user lockouts', '[ {"source": "system", "alert_type": "auth_failure_spike"}, {"source": "system", "alert_type": "high_error_rate"}, {"source": "rate_limit", "metric_type": "rate_limit_violation"} ]'::jsonb, 10, 2, 'high', 'Authentication System Incident', 'Authentication-related alerts detected indicating potential issues with user login and authentication services.', true), ('Rate Limiting Cascade', 'Rate limit violations causing service degradation', '[ {"source": "rate_limit", "metric_type": "rate_limit_violation"}, {"source": "system", "alert_type": "high_error_rate"} ]'::jsonb, 20, 2, 'high', 'Rate Limiting Cascade Incident', 'Multiple rate limit violations detected potentially causing cascading service issues.', true), ('Storage System Issues', 'File upload/storage problems', '[ {"source": "system", "alert_type": "upload_timeout"}, {"source": "system", "alert_type": "storage_quota_exceeded"}, {"source": "system", "alert_type": "high_error_rate"} ]'::jsonb, 15, 2, 'medium', 'Storage System Incident', 'Storage-related alerts detected indicating potential issues with file uploads or storage capacity.', true); -- Create indexes for performance CREATE INDEX idx_incidents_status ON incidents(status) WHERE status IN ('open', 'investigating'); CREATE INDEX idx_incidents_severity ON incidents(severity, detected_at DESC); CREATE INDEX idx_incident_alerts_incident_id ON incident_alerts(incident_id); CREATE INDEX idx_alert_correlation_rules_enabled ON alert_correlation_rules(enabled) WHERE enabled = true;