mirror of
https://github.com/pacnpal/thrilltrack-explorer.git
synced 2025-12-20 07:11:12 -05:00
Approve and implement the Supabase migration for the pipeline monitoring alert system. This includes expanding alert types, adding new monitoring functions, and updating existing ones with escalating thresholds.
288 lines
7.6 KiB
PL/PgSQL
288 lines
7.6 KiB
PL/PgSQL
-- Pipeline Monitoring Alert System Migration
|
|
-- Adds comprehensive monitoring for critical pipeline metrics
|
|
|
|
-- 1. Expand alert types to include pipeline-specific alerts
|
|
ALTER TABLE system_alerts
|
|
DROP CONSTRAINT IF EXISTS system_alerts_alert_type_check;
|
|
|
|
ALTER TABLE system_alerts
|
|
ADD CONSTRAINT system_alerts_alert_type_check CHECK (alert_type IN (
|
|
'orphaned_images',
|
|
'stale_submissions',
|
|
'circular_dependency',
|
|
'validation_error',
|
|
'ban_attempt',
|
|
'upload_timeout',
|
|
'high_error_rate',
|
|
'failed_submissions',
|
|
'temp_ref_error',
|
|
'submission_queue_backlog',
|
|
'slow_approval',
|
|
'high_ban_rate'
|
|
));
|
|
|
|
-- 2. Monitor Failed Submissions
|
|
CREATE OR REPLACE FUNCTION monitor_failed_submissions()
|
|
RETURNS void
|
|
LANGUAGE plpgsql
|
|
SECURITY DEFINER
|
|
SET search_path = public
|
|
AS $$
|
|
DECLARE
|
|
v_total_last_hour INTEGER;
|
|
v_failed_last_hour INTEGER;
|
|
v_failure_rate NUMERIC;
|
|
v_consecutive_failures INTEGER;
|
|
BEGIN
|
|
SELECT
|
|
COUNT(*),
|
|
COUNT(*) FILTER (WHERE success = false)
|
|
INTO v_total_last_hour, v_failed_last_hour
|
|
FROM approval_transaction_metrics
|
|
WHERE created_at > now() - interval '1 hour';
|
|
|
|
IF v_total_last_hour > 0 THEN
|
|
v_failure_rate := (v_failed_last_hour::NUMERIC / v_total_last_hour::NUMERIC) * 100;
|
|
|
|
IF v_failure_rate > 10 AND v_failed_last_hour >= 3 THEN
|
|
PERFORM create_system_alert(
|
|
'failed_submissions',
|
|
CASE
|
|
WHEN v_failure_rate > 50 THEN 'critical'
|
|
WHEN v_failure_rate > 25 THEN 'high'
|
|
ELSE 'medium'
|
|
END,
|
|
format('High approval failure rate: %.1f%% (%s/%s in last hour)',
|
|
v_failure_rate, v_failed_last_hour, v_total_last_hour),
|
|
jsonb_build_object(
|
|
'failure_rate', v_failure_rate,
|
|
'failed_count', v_failed_last_hour,
|
|
'total_count', v_total_last_hour,
|
|
'checked_at', now()
|
|
)
|
|
);
|
|
END IF;
|
|
END IF;
|
|
|
|
SELECT COUNT(*) INTO v_consecutive_failures
|
|
FROM (
|
|
SELECT success
|
|
FROM approval_transaction_metrics
|
|
ORDER BY created_at DESC
|
|
LIMIT 5
|
|
) recent
|
|
WHERE success = false;
|
|
|
|
IF v_consecutive_failures >= 5 THEN
|
|
PERFORM create_system_alert(
|
|
'failed_submissions',
|
|
'critical',
|
|
format('System failure: %s consecutive approval failures', v_consecutive_failures),
|
|
jsonb_build_object(
|
|
'consecutive_failures', v_consecutive_failures,
|
|
'checked_at', now()
|
|
)
|
|
);
|
|
END IF;
|
|
END;
|
|
$$;
|
|
|
|
-- 3. Monitor Ban Attempt Patterns
|
|
CREATE OR REPLACE FUNCTION monitor_ban_attempts()
|
|
RETURNS void
|
|
LANGUAGE plpgsql
|
|
SECURITY DEFINER
|
|
SET search_path = public
|
|
AS $$
|
|
DECLARE
|
|
v_attempts_last_hour INTEGER;
|
|
v_unique_users INTEGER;
|
|
BEGIN
|
|
SELECT
|
|
COUNT(*),
|
|
COUNT(DISTINCT (metadata->>'user_id')::UUID)
|
|
INTO v_attempts_last_hour, v_unique_users
|
|
FROM system_alerts
|
|
WHERE alert_type = 'ban_attempt'
|
|
AND created_at > now() - interval '1 hour';
|
|
|
|
IF v_attempts_last_hour >= 5 THEN
|
|
PERFORM create_system_alert(
|
|
'high_ban_rate',
|
|
CASE
|
|
WHEN v_attempts_last_hour > 20 THEN 'critical'
|
|
WHEN v_attempts_last_hour > 10 THEN 'high'
|
|
ELSE 'medium'
|
|
END,
|
|
format('High ban attempt rate: %s attempts from %s users in last hour',
|
|
v_attempts_last_hour, v_unique_users),
|
|
jsonb_build_object(
|
|
'attempt_count', v_attempts_last_hour,
|
|
'unique_users', v_unique_users,
|
|
'checked_at', now()
|
|
)
|
|
);
|
|
END IF;
|
|
END;
|
|
$$;
|
|
|
|
-- 4. Monitor Slow Approvals
|
|
CREATE OR REPLACE FUNCTION monitor_slow_approvals()
|
|
RETURNS void
|
|
LANGUAGE plpgsql
|
|
SECURITY DEFINER
|
|
SET search_path = public
|
|
AS $$
|
|
DECLARE
|
|
v_slow_count INTEGER;
|
|
v_avg_duration NUMERIC;
|
|
v_max_duration NUMERIC;
|
|
BEGIN
|
|
SELECT
|
|
COUNT(*),
|
|
AVG(duration_ms),
|
|
MAX(duration_ms)
|
|
INTO v_slow_count, v_avg_duration, v_max_duration
|
|
FROM approval_transaction_metrics
|
|
WHERE created_at > now() - interval '1 hour'
|
|
AND duration_ms > 30000;
|
|
|
|
IF v_slow_count >= 3 THEN
|
|
PERFORM create_system_alert(
|
|
'slow_approval',
|
|
CASE
|
|
WHEN v_max_duration > 60000 THEN 'high'
|
|
ELSE 'medium'
|
|
END,
|
|
format('Slow approval transactions detected: %s approvals >30s (avg: %sms, max: %sms)',
|
|
v_slow_count, ROUND(v_avg_duration), ROUND(v_max_duration)),
|
|
jsonb_build_object(
|
|
'slow_count', v_slow_count,
|
|
'avg_duration_ms', ROUND(v_avg_duration),
|
|
'max_duration_ms', ROUND(v_max_duration),
|
|
'checked_at', now()
|
|
)
|
|
);
|
|
END IF;
|
|
END;
|
|
$$;
|
|
|
|
-- 5. Drop and recreate mark_orphaned_images with escalating alerts
|
|
DROP FUNCTION IF EXISTS mark_orphaned_images();
|
|
|
|
CREATE OR REPLACE FUNCTION mark_orphaned_images()
|
|
RETURNS TABLE(task TEXT, status TEXT, details JSONB)
|
|
LANGUAGE plpgsql
|
|
SECURITY DEFINER
|
|
SET search_path = public
|
|
AS $$
|
|
DECLARE
|
|
v_orphaned_count INTEGER;
|
|
BEGIN
|
|
UPDATE orphaned_images
|
|
SET marked_for_deletion_at = now()
|
|
WHERE marked_for_deletion_at IS NULL
|
|
AND uploaded_at < now() - interval '24 hours'
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM parks WHERE image_id = orphaned_images.image_id
|
|
UNION ALL
|
|
SELECT 1 FROM rides WHERE image_id = orphaned_images.image_id
|
|
);
|
|
|
|
GET DIAGNOSTICS v_orphaned_count = ROW_COUNT;
|
|
|
|
SELECT COUNT(*) INTO v_orphaned_count
|
|
FROM orphaned_images
|
|
WHERE marked_for_deletion_at IS NOT NULL;
|
|
|
|
RETURN QUERY SELECT
|
|
'mark_orphaned_images'::TEXT,
|
|
'success'::TEXT,
|
|
jsonb_build_object('count', v_orphaned_count);
|
|
|
|
IF v_orphaned_count >= 500 THEN
|
|
PERFORM create_system_alert(
|
|
'orphaned_images',
|
|
'critical',
|
|
format('CRITICAL: %s orphaned images require cleanup', v_orphaned_count),
|
|
jsonb_build_object('count', v_orphaned_count)
|
|
);
|
|
ELSIF v_orphaned_count >= 100 THEN
|
|
PERFORM create_system_alert(
|
|
'orphaned_images',
|
|
'high',
|
|
format('High number of orphaned images: %s', v_orphaned_count),
|
|
jsonb_build_object('count', v_orphaned_count)
|
|
);
|
|
ELSIF v_orphaned_count >= 50 THEN
|
|
PERFORM create_system_alert(
|
|
'orphaned_images',
|
|
'medium',
|
|
format('Moderate orphaned images detected: %s', v_orphaned_count),
|
|
jsonb_build_object('count', v_orphaned_count)
|
|
);
|
|
END IF;
|
|
|
|
EXCEPTION WHEN OTHERS THEN
|
|
RETURN QUERY SELECT
|
|
'mark_orphaned_images'::TEXT,
|
|
'error'::TEXT,
|
|
jsonb_build_object('error', SQLERRM);
|
|
END;
|
|
$$;
|
|
|
|
-- 6. Master Monitoring Function
|
|
CREATE OR REPLACE FUNCTION run_pipeline_monitoring()
|
|
RETURNS TABLE(check_name TEXT, status TEXT, details JSONB)
|
|
LANGUAGE plpgsql
|
|
SECURITY DEFINER
|
|
SET search_path = public
|
|
AS $$
|
|
BEGIN
|
|
BEGIN
|
|
PERFORM monitor_failed_submissions();
|
|
RETURN QUERY SELECT
|
|
'monitor_failed_submissions'::TEXT,
|
|
'success'::TEXT,
|
|
'{}'::JSONB;
|
|
EXCEPTION WHEN OTHERS THEN
|
|
RETURN QUERY SELECT
|
|
'monitor_failed_submissions'::TEXT,
|
|
'error'::TEXT,
|
|
jsonb_build_object('error', SQLERRM);
|
|
END;
|
|
|
|
BEGIN
|
|
PERFORM monitor_ban_attempts();
|
|
RETURN QUERY SELECT
|
|
'monitor_ban_attempts'::TEXT,
|
|
'success'::TEXT,
|
|
'{}'::JSONB;
|
|
EXCEPTION WHEN OTHERS THEN
|
|
RETURN QUERY SELECT
|
|
'monitor_ban_attempts'::TEXT,
|
|
'error'::TEXT,
|
|
jsonb_build_object('error', SQLERRM);
|
|
END;
|
|
|
|
BEGIN
|
|
PERFORM monitor_slow_approvals();
|
|
RETURN QUERY SELECT
|
|
'monitor_slow_approvals'::TEXT,
|
|
'success'::TEXT,
|
|
'{}'::JSONB;
|
|
EXCEPTION WHEN OTHERS THEN
|
|
RETURN QUERY SELECT
|
|
'monitor_slow_approvals'::TEXT,
|
|
'error'::TEXT,
|
|
jsonb_build_object('error', SQLERRM);
|
|
END;
|
|
|
|
RETURN;
|
|
END;
|
|
$$;
|
|
|
|
GRANT EXECUTE ON FUNCTION run_pipeline_monitoring() TO authenticated;
|
|
GRANT EXECUTE ON FUNCTION monitor_failed_submissions() TO authenticated;
|
|
GRANT EXECUTE ON FUNCTION monitor_ban_attempts() TO authenticated;
|
|
GRANT EXECUTE ON FUNCTION monitor_slow_approvals() TO authenticated; |