-- Pipeline Monitoring Alert System Migration -- Adds comprehensive monitoring for critical pipeline metrics -- 1. Expand alert types to include pipeline-specific alerts ALTER TABLE system_alerts DROP CONSTRAINT IF EXISTS system_alerts_alert_type_check; ALTER TABLE system_alerts ADD CONSTRAINT system_alerts_alert_type_check CHECK (alert_type IN ( 'orphaned_images', 'stale_submissions', 'circular_dependency', 'validation_error', 'ban_attempt', 'upload_timeout', 'high_error_rate', 'failed_submissions', 'temp_ref_error', 'submission_queue_backlog', 'slow_approval', 'high_ban_rate' )); -- 2. Monitor Failed Submissions CREATE OR REPLACE FUNCTION monitor_failed_submissions() RETURNS void LANGUAGE plpgsql SECURITY DEFINER SET search_path = public AS $$ DECLARE v_total_last_hour INTEGER; v_failed_last_hour INTEGER; v_failure_rate NUMERIC; v_consecutive_failures INTEGER; BEGIN SELECT COUNT(*), COUNT(*) FILTER (WHERE success = false) INTO v_total_last_hour, v_failed_last_hour FROM approval_transaction_metrics WHERE created_at > now() - interval '1 hour'; IF v_total_last_hour > 0 THEN v_failure_rate := (v_failed_last_hour::NUMERIC / v_total_last_hour::NUMERIC) * 100; IF v_failure_rate > 10 AND v_failed_last_hour >= 3 THEN PERFORM create_system_alert( 'failed_submissions', CASE WHEN v_failure_rate > 50 THEN 'critical' WHEN v_failure_rate > 25 THEN 'high' ELSE 'medium' END, format('High approval failure rate: %.1f%% (%s/%s in last hour)', v_failure_rate, v_failed_last_hour, v_total_last_hour), jsonb_build_object( 'failure_rate', v_failure_rate, 'failed_count', v_failed_last_hour, 'total_count', v_total_last_hour, 'checked_at', now() ) ); END IF; END IF; SELECT COUNT(*) INTO v_consecutive_failures FROM ( SELECT success FROM approval_transaction_metrics ORDER BY created_at DESC LIMIT 5 ) recent WHERE success = false; IF v_consecutive_failures >= 5 THEN PERFORM create_system_alert( 'failed_submissions', 'critical', format('System failure: %s consecutive approval failures', v_consecutive_failures), jsonb_build_object( 'consecutive_failures', v_consecutive_failures, 'checked_at', now() ) ); END IF; END; $$; -- 3. Monitor Ban Attempt Patterns CREATE OR REPLACE FUNCTION monitor_ban_attempts() RETURNS void LANGUAGE plpgsql SECURITY DEFINER SET search_path = public AS $$ DECLARE v_attempts_last_hour INTEGER; v_unique_users INTEGER; BEGIN SELECT COUNT(*), COUNT(DISTINCT (metadata->>'user_id')::UUID) INTO v_attempts_last_hour, v_unique_users FROM system_alerts WHERE alert_type = 'ban_attempt' AND created_at > now() - interval '1 hour'; IF v_attempts_last_hour >= 5 THEN PERFORM create_system_alert( 'high_ban_rate', CASE WHEN v_attempts_last_hour > 20 THEN 'critical' WHEN v_attempts_last_hour > 10 THEN 'high' ELSE 'medium' END, format('High ban attempt rate: %s attempts from %s users in last hour', v_attempts_last_hour, v_unique_users), jsonb_build_object( 'attempt_count', v_attempts_last_hour, 'unique_users', v_unique_users, 'checked_at', now() ) ); END IF; END; $$; -- 4. Monitor Slow Approvals CREATE OR REPLACE FUNCTION monitor_slow_approvals() RETURNS void LANGUAGE plpgsql SECURITY DEFINER SET search_path = public AS $$ DECLARE v_slow_count INTEGER; v_avg_duration NUMERIC; v_max_duration NUMERIC; BEGIN SELECT COUNT(*), AVG(duration_ms), MAX(duration_ms) INTO v_slow_count, v_avg_duration, v_max_duration FROM approval_transaction_metrics WHERE created_at > now() - interval '1 hour' AND duration_ms > 30000; IF v_slow_count >= 3 THEN PERFORM create_system_alert( 'slow_approval', CASE WHEN v_max_duration > 60000 THEN 'high' ELSE 'medium' END, format('Slow approval transactions detected: %s approvals >30s (avg: %sms, max: %sms)', v_slow_count, ROUND(v_avg_duration), ROUND(v_max_duration)), jsonb_build_object( 'slow_count', v_slow_count, 'avg_duration_ms', ROUND(v_avg_duration), 'max_duration_ms', ROUND(v_max_duration), 'checked_at', now() ) ); END IF; END; $$; -- 5. Drop and recreate mark_orphaned_images with escalating alerts DROP FUNCTION IF EXISTS mark_orphaned_images(); CREATE OR REPLACE FUNCTION mark_orphaned_images() RETURNS TABLE(task TEXT, status TEXT, details JSONB) LANGUAGE plpgsql SECURITY DEFINER SET search_path = public AS $$ DECLARE v_orphaned_count INTEGER; BEGIN UPDATE orphaned_images SET marked_for_deletion_at = now() WHERE marked_for_deletion_at IS NULL AND uploaded_at < now() - interval '24 hours' AND NOT EXISTS ( SELECT 1 FROM parks WHERE image_id = orphaned_images.image_id UNION ALL SELECT 1 FROM rides WHERE image_id = orphaned_images.image_id ); GET DIAGNOSTICS v_orphaned_count = ROW_COUNT; SELECT COUNT(*) INTO v_orphaned_count FROM orphaned_images WHERE marked_for_deletion_at IS NOT NULL; RETURN QUERY SELECT 'mark_orphaned_images'::TEXT, 'success'::TEXT, jsonb_build_object('count', v_orphaned_count); IF v_orphaned_count >= 500 THEN PERFORM create_system_alert( 'orphaned_images', 'critical', format('CRITICAL: %s orphaned images require cleanup', v_orphaned_count), jsonb_build_object('count', v_orphaned_count) ); ELSIF v_orphaned_count >= 100 THEN PERFORM create_system_alert( 'orphaned_images', 'high', format('High number of orphaned images: %s', v_orphaned_count), jsonb_build_object('count', v_orphaned_count) ); ELSIF v_orphaned_count >= 50 THEN PERFORM create_system_alert( 'orphaned_images', 'medium', format('Moderate orphaned images detected: %s', v_orphaned_count), jsonb_build_object('count', v_orphaned_count) ); END IF; EXCEPTION WHEN OTHERS THEN RETURN QUERY SELECT 'mark_orphaned_images'::TEXT, 'error'::TEXT, jsonb_build_object('error', SQLERRM); END; $$; -- 6. Master Monitoring Function CREATE OR REPLACE FUNCTION run_pipeline_monitoring() RETURNS TABLE(check_name TEXT, status TEXT, details JSONB) LANGUAGE plpgsql SECURITY DEFINER SET search_path = public AS $$ BEGIN BEGIN PERFORM monitor_failed_submissions(); RETURN QUERY SELECT 'monitor_failed_submissions'::TEXT, 'success'::TEXT, '{}'::JSONB; EXCEPTION WHEN OTHERS THEN RETURN QUERY SELECT 'monitor_failed_submissions'::TEXT, 'error'::TEXT, jsonb_build_object('error', SQLERRM); END; BEGIN PERFORM monitor_ban_attempts(); RETURN QUERY SELECT 'monitor_ban_attempts'::TEXT, 'success'::TEXT, '{}'::JSONB; EXCEPTION WHEN OTHERS THEN RETURN QUERY SELECT 'monitor_ban_attempts'::TEXT, 'error'::TEXT, jsonb_build_object('error', SQLERRM); END; BEGIN PERFORM monitor_slow_approvals(); RETURN QUERY SELECT 'monitor_slow_approvals'::TEXT, 'success'::TEXT, '{}'::JSONB; EXCEPTION WHEN OTHERS THEN RETURN QUERY SELECT 'monitor_slow_approvals'::TEXT, 'error'::TEXT, jsonb_build_object('error', SQLERRM); END; RETURN; END; $$; GRANT EXECUTE ON FUNCTION run_pipeline_monitoring() TO authenticated; GRANT EXECUTE ON FUNCTION monitor_failed_submissions() TO authenticated; GRANT EXECUTE ON FUNCTION monitor_ban_attempts() TO authenticated; GRANT EXECUTE ON FUNCTION monitor_slow_approvals() TO authenticated;