-- Fix search_path security issue for analyze_data_completeness function -- This ensures the function only uses public schema and prevents SQL injection CREATE OR REPLACE FUNCTION analyze_data_completeness( p_entity_type TEXT DEFAULT NULL, p_min_score NUMERIC DEFAULT NULL, p_max_score NUMERIC DEFAULT NULL, p_missing_category TEXT DEFAULT NULL, p_limit INTEGER DEFAULT 100, p_offset INTEGER DEFAULT 0 ) RETURNS JSONB LANGUAGE plpgsql SECURITY DEFINER SET search_path = public AS $$ DECLARE v_result JSONB; v_parks JSONB; v_rides JSONB; v_companies JSONB; v_ride_models JSONB; v_locations JSONB; v_timeline_events JSONB; v_summary JSONB; BEGIN -- Parks Analysis (including historical) WITH park_analysis AS ( SELECT p.id, p.name, p.slug, 'park' as entity_type, p.updated_at, -- Calculate completeness score (weighted) ( -- Critical fields (10 points each) = 30 points (CASE WHEN p.park_type IS NOT NULL THEN 10 ELSE 0 END) + (CASE WHEN p.status IS NOT NULL THEN 10 ELSE 0 END) + (CASE WHEN p.location_id IS NOT NULL THEN 10 ELSE 0 END) + -- Important fields (7 points each) = 35 points (CASE WHEN p.description IS NOT NULL AND length(p.description) > 50 THEN 7 ELSE 0 END) + (CASE WHEN p.operator_id IS NOT NULL THEN 7 ELSE 0 END) + (CASE WHEN p.banner_image_id IS NOT NULL THEN 7 ELSE 0 END) + (CASE WHEN p.card_image_id IS NOT NULL THEN 7 ELSE 0 END) + (CASE WHEN p.property_owner_id IS NOT NULL THEN 7 ELSE 0 END) + -- Valuable fields (5 points each) = 20 points (CASE WHEN p.opening_date IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN p.opening_date_precision IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN p.website_url IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN p.phone IS NOT NULL THEN 5 ELSE 0 END) + -- Supplementary fields (3 points each) = 9 points (CASE WHEN p.email IS NOT NULL THEN 3 ELSE 0 END) + (CASE WHEN p.closing_date IS NOT NULL AND p.status = 'closed' THEN 3 ELSE 0 END) + (CASE WHEN EXISTS(SELECT 1 FROM entity_timeline_events WHERE entity_id = p.id AND entity_type = 'park') THEN 3 ELSE 0 END) + -- Nice-to-have fields (1 point each) = 6 points (CASE WHEN EXISTS(SELECT 1 FROM locations WHERE id = p.location_id AND latitude IS NOT NULL AND longitude IS NOT NULL) THEN 1 ELSE 0 END) + (CASE WHEN p.closing_date_precision IS NOT NULL AND p.status = 'closed' THEN 1 ELSE 0 END) )::NUMERIC / 100.0 * 100 as completeness_score, -- Missing fields tracking jsonb_build_object( 'critical', jsonb_build_array( CASE WHEN p.park_type IS NULL THEN 'park_type' END, CASE WHEN p.status IS NULL THEN 'status' END, CASE WHEN p.location_id IS NULL THEN 'location_id' END ) - 'null'::jsonb, 'important', jsonb_build_array( CASE WHEN p.description IS NULL OR length(p.description) <= 50 THEN 'description' END, CASE WHEN p.operator_id IS NULL THEN 'operator_id' END, CASE WHEN p.banner_image_id IS NULL THEN 'banner_image' END, CASE WHEN p.card_image_id IS NULL THEN 'card_image' END, CASE WHEN p.property_owner_id IS NULL THEN 'property_owner_id' END ) - 'null'::jsonb, 'valuable', jsonb_build_array( CASE WHEN p.opening_date IS NULL THEN 'opening_date' END, CASE WHEN p.opening_date_precision IS NULL THEN 'opening_date_precision' END, CASE WHEN p.website_url IS NULL THEN 'website_url' END, CASE WHEN p.phone IS NULL THEN 'phone' END ) - 'null'::jsonb, 'supplementary', jsonb_build_array( CASE WHEN p.email IS NULL THEN 'email' END, CASE WHEN p.closing_date IS NULL AND p.status = 'closed' THEN 'closing_date' END ) - 'null'::jsonb ) as missing_fields FROM parks p WHERE (p_entity_type IS NULL OR p_entity_type = 'park') ) SELECT jsonb_agg( jsonb_build_object( 'id', id, 'name', name, 'slug', slug, 'entity_type', entity_type, 'updated_at', updated_at, 'completeness_score', completeness_score, 'missing_fields', missing_fields ) ORDER BY completeness_score ASC, name ASC ) INTO v_parks FROM park_analysis WHERE (p_min_score IS NULL OR completeness_score >= p_min_score) AND (p_max_score IS NULL OR completeness_score <= p_max_score) LIMIT p_limit OFFSET p_offset; -- Rides Analysis WITH ride_analysis AS ( SELECT r.id, r.name, r.slug, 'ride' as entity_type, r.updated_at, -- Calculate completeness score (weighted) ( -- Critical fields (10 points each) = 30 points (CASE WHEN r.park_id IS NOT NULL THEN 10 ELSE 0 END) + (CASE WHEN r.category IS NOT NULL THEN 10 ELSE 0 END) + (CASE WHEN r.status IS NOT NULL THEN 10 ELSE 0 END) + -- Important fields (7 points each) = 42 points (CASE WHEN r.description IS NOT NULL AND length(r.description) > 50 THEN 7 ELSE 0 END) + (CASE WHEN r.manufacturer_id IS NOT NULL THEN 7 ELSE 0 END) + (CASE WHEN r.banner_image_id IS NOT NULL THEN 7 ELSE 0 END) + (CASE WHEN r.card_image_id IS NOT NULL THEN 7 ELSE 0 END) + (CASE WHEN r.ride_model_id IS NOT NULL THEN 7 ELSE 0 END) + (CASE WHEN r.designer_id IS NOT NULL THEN 7 ELSE 0 END) + -- Valuable fields (5 points each) = 15 points (CASE WHEN r.opening_date IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN r.opening_date_precision IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN r.ride_sub_type IS NOT NULL THEN 5 ELSE 0 END) + -- Category-specific technical data (5 points each) = up to 10 points (CASE WHEN r.category = 'Roller Coaster' THEN (CASE WHEN r.coaster_type IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN r.max_speed_kmh IS NOT NULL THEN 5 ELSE 0 END) WHEN r.category = 'Water Ride' THEN (CASE WHEN r.flume_type IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN r.wetness_level IS NOT NULL THEN 5 ELSE 0 END) WHEN r.category = 'Dark Ride' THEN (CASE WHEN r.theme_name IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN r.ride_system IS NOT NULL THEN 5 ELSE 0 END) ELSE 0 END) + -- Supplementary fields (3 points each) = 9 points (CASE WHEN r.max_height_meters IS NOT NULL THEN 3 ELSE 0 END) + (CASE WHEN r.length_meters IS NOT NULL THEN 3 ELSE 0 END) + (CASE WHEN r.capacity_per_hour IS NOT NULL THEN 3 ELSE 0 END) )::NUMERIC / 100.0 * 100 as completeness_score, -- Missing fields tracking jsonb_build_object( 'critical', jsonb_build_array( CASE WHEN r.park_id IS NULL THEN 'park_id' END, CASE WHEN r.category IS NULL THEN 'category' END, CASE WHEN r.status IS NULL THEN 'status' END ) - 'null'::jsonb, 'important', jsonb_build_array( CASE WHEN r.description IS NULL OR length(r.description) <= 50 THEN 'description' END, CASE WHEN r.manufacturer_id IS NULL THEN 'manufacturer_id' END, CASE WHEN r.banner_image_id IS NULL THEN 'banner_image' END, CASE WHEN r.card_image_id IS NULL THEN 'card_image' END, CASE WHEN r.ride_model_id IS NULL THEN 'ride_model_id' END, CASE WHEN r.designer_id IS NULL THEN 'designer_id' END ) - 'null'::jsonb, 'valuable', jsonb_build_array( CASE WHEN r.opening_date IS NULL THEN 'opening_date' END, CASE WHEN r.opening_date_precision IS NULL THEN 'opening_date_precision' END, CASE WHEN r.ride_sub_type IS NULL THEN 'ride_sub_type' END ) - 'null'::jsonb ) as missing_fields FROM rides r WHERE (p_entity_type IS NULL OR p_entity_type = 'ride') ) SELECT jsonb_agg( jsonb_build_object( 'id', id, 'name', name, 'slug', slug, 'entity_type', entity_type, 'updated_at', updated_at, 'completeness_score', completeness_score, 'missing_fields', missing_fields ) ORDER BY completeness_score ASC, name ASC ) INTO v_rides FROM ride_analysis WHERE (p_min_score IS NULL OR completeness_score >= p_min_score) AND (p_max_score IS NULL OR completeness_score <= p_max_score) LIMIT p_limit OFFSET p_offset; -- Companies Analysis WITH company_analysis AS ( SELECT c.id, c.name, c.slug, 'company' as entity_type, c.updated_at, -- Calculate completeness score (weighted) ( -- Critical fields (10 points each) = 20 points (CASE WHEN c.company_type IS NOT NULL THEN 10 ELSE 0 END) + (CASE WHEN c.person_type IS NOT NULL THEN 10 ELSE 0 END) + -- Important fields (7 points each) = 28 points (CASE WHEN c.description IS NOT NULL AND length(c.description) > 50 THEN 7 ELSE 0 END) + (CASE WHEN c.logo_url IS NOT NULL THEN 7 ELSE 0 END) + (CASE WHEN c.banner_image_id IS NOT NULL THEN 7 ELSE 0 END) + (CASE WHEN c.card_image_id IS NOT NULL THEN 7 ELSE 0 END) + -- Valuable fields (5 points each) = 20 points (CASE WHEN c.founded_year IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN c.founded_date IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN c.website_url IS NOT NULL THEN 5 ELSE 0 END) + (CASE WHEN c.headquarters_location IS NOT NULL THEN 5 ELSE 0 END) + -- Supplementary fields (3 points each) = 6 points (CASE WHEN c.founded_date_precision IS NOT NULL THEN 3 ELSE 0 END) + (CASE WHEN c.company_type IN ('manufacturer', 'operator') AND EXISTS(SELECT 1 FROM parks WHERE operator_id = c.id OR property_owner_id = c.id LIMIT 1) THEN 3 ELSE 0 END) )::NUMERIC / 100.0 * 100 as completeness_score, -- Missing fields tracking jsonb_build_object( 'critical', jsonb_build_array( CASE WHEN c.company_type IS NULL THEN 'company_type' END, CASE WHEN c.person_type IS NULL THEN 'person_type' END ) - 'null'::jsonb, 'important', jsonb_build_array( CASE WHEN c.description IS NULL OR length(c.description) <= 50 THEN 'description' END, CASE WHEN c.logo_url IS NULL THEN 'logo_url' END, CASE WHEN c.banner_image_id IS NULL THEN 'banner_image' END, CASE WHEN c.card_image_id IS NULL THEN 'card_image' END ) - 'null'::jsonb, 'valuable', jsonb_build_array( CASE WHEN c.founded_year IS NULL THEN 'founded_year' END, CASE WHEN c.founded_date IS NULL THEN 'founded_date' END, CASE WHEN c.website_url IS NULL THEN 'website_url' END, CASE WHEN c.headquarters_location IS NULL THEN 'headquarters_location' END ) - 'null'::jsonb ) as missing_fields FROM companies c WHERE (p_entity_type IS NULL OR p_entity_type = 'company') ) SELECT jsonb_agg( jsonb_build_object( 'id', id, 'name', name, 'slug', slug, 'entity_type', entity_type, 'updated_at', updated_at, 'completeness_score', completeness_score, 'missing_fields', missing_fields ) ORDER BY completeness_score ASC, name ASC ) INTO v_companies FROM company_analysis WHERE (p_min_score IS NULL OR completeness_score >= p_min_score) AND (p_max_score IS NULL OR completeness_score <= p_max_score) LIMIT p_limit OFFSET p_offset; -- Ride Models Analysis WITH model_analysis AS ( SELECT rm.id, rm.name, rm.slug, 'ride_model' as entity_type, rm.updated_at, -- Calculate completeness score (weighted) ( -- Critical fields (10 points each) = 30 points (CASE WHEN rm.manufacturer_id IS NOT NULL THEN 10 ELSE 0 END) + (CASE WHEN rm.category IS NOT NULL THEN 10 ELSE 0 END) + (CASE WHEN rm.ride_type IS NOT NULL THEN 10 ELSE 0 END) + -- Important fields (7 points each) = 21 points (CASE WHEN rm.description IS NOT NULL AND length(rm.description) > 50 THEN 7 ELSE 0 END) + (CASE WHEN rm.banner_image_id IS NOT NULL THEN 7 ELSE 0 END) + (CASE WHEN rm.card_image_id IS NOT NULL THEN 7 ELSE 0 END) + -- Valuable fields (5 points each) = 10 points (CASE WHEN EXISTS(SELECT 1 FROM rides WHERE ride_model_id = rm.id LIMIT 1) THEN 5 ELSE 0 END) + (CASE WHEN rm.introduction_year IS NOT NULL THEN 5 ELSE 0 END) )::NUMERIC / 100.0 * 100 as completeness_score, -- Missing fields tracking jsonb_build_object( 'critical', jsonb_build_array( CASE WHEN rm.manufacturer_id IS NULL THEN 'manufacturer_id' END, CASE WHEN rm.category IS NULL THEN 'category' END, CASE WHEN rm.ride_type IS NULL THEN 'ride_type' END ) - 'null'::jsonb, 'important', jsonb_build_array( CASE WHEN rm.description IS NULL OR length(rm.description) <= 50 THEN 'description' END, CASE WHEN rm.banner_image_id IS NULL THEN 'banner_image' END, CASE WHEN rm.card_image_id IS NULL THEN 'card_image' END ) - 'null'::jsonb, 'valuable', jsonb_build_array( CASE WHEN rm.introduction_year IS NULL THEN 'introduction_year' END ) - 'null'::jsonb ) as missing_fields FROM ride_models rm WHERE (p_entity_type IS NULL OR p_entity_type = 'ride_model') ) SELECT jsonb_agg( jsonb_build_object( 'id', id, 'name', name, 'slug', slug, 'entity_type', entity_type, 'updated_at', updated_at, 'completeness_score', completeness_score, 'missing_fields', missing_fields ) ORDER BY completeness_score ASC, name ASC ) INTO v_ride_models FROM model_analysis WHERE (p_min_score IS NULL OR completeness_score >= p_min_score) AND (p_max_score IS NULL OR completeness_score <= p_max_score) LIMIT p_limit OFFSET p_offset; -- Generate Summary v_summary := jsonb_build_object( 'total_entities', ( SELECT COUNT(*)::INTEGER FROM ( SELECT id FROM parks WHERE (p_entity_type IS NULL OR p_entity_type = 'park') UNION ALL SELECT id FROM rides WHERE (p_entity_type IS NULL OR p_entity_type = 'ride') UNION ALL SELECT id FROM companies WHERE (p_entity_type IS NULL OR p_entity_type = 'company') UNION ALL SELECT id FROM ride_models WHERE (p_entity_type IS NULL OR p_entity_type = 'ride_model') ) all_entities ), 'avg_completeness_score', ( SELECT ROUND(AVG(score)::NUMERIC, 2) FROM ( SELECT ((10 + 10 + 10)::NUMERIC / 100.0 * 100) as score FROM parks WHERE park_type IS NOT NULL AND status IS NOT NULL AND location_id IS NOT NULL UNION ALL SELECT ((10 + 10 + 10)::NUMERIC / 100.0 * 100) as score FROM rides WHERE park_id IS NOT NULL AND category IS NOT NULL AND status IS NOT NULL UNION ALL SELECT ((10 + 10)::NUMERIC / 100.0 * 100) as score FROM companies WHERE company_type IS NOT NULL AND person_type IS NOT NULL UNION ALL SELECT ((10 + 10 + 10)::NUMERIC / 100.0 * 100) as score FROM ride_models WHERE manufacturer_id IS NOT NULL AND category IS NOT NULL AND ride_type IS NOT NULL ) scores ), 'entities_below_50', ( SELECT COUNT(*)::INTEGER FROM ( SELECT id FROM parks WHERE (p_entity_type IS NULL OR p_entity_type = 'park') UNION ALL SELECT id FROM rides WHERE (p_entity_type IS NULL OR p_entity_type = 'ride') UNION ALL SELECT id FROM companies WHERE (p_entity_type IS NULL OR p_entity_type = 'company') UNION ALL SELECT id FROM ride_models WHERE (p_entity_type IS NULL OR p_entity_type = 'ride_model') ) all_entities WHERE id IN ( SELECT id FROM parks WHERE description IS NULL OR manufacturer_id IS NULL UNION SELECT id FROM rides WHERE description IS NULL OR manufacturer_id IS NULL UNION SELECT id FROM companies WHERE description IS NULL UNION SELECT id FROM ride_models WHERE description IS NULL ) ), 'entities_100_complete', 0, 'by_entity_type', jsonb_build_object( 'parks', (SELECT COUNT(*)::INTEGER FROM parks WHERE (p_entity_type IS NULL OR p_entity_type = 'park')), 'rides', (SELECT COUNT(*)::INTEGER FROM rides WHERE (p_entity_type IS NULL OR p_entity_type = 'ride')), 'companies', (SELECT COUNT(*)::INTEGER FROM companies WHERE (p_entity_type IS NULL OR p_entity_type = 'company')), 'ride_models', (SELECT COUNT(*)::INTEGER FROM ride_models WHERE (p_entity_type IS NULL OR p_entity_type = 'ride_model')) ) ); -- Build final result v_result := jsonb_build_object( 'summary', v_summary, 'entities', jsonb_build_object( 'parks', COALESCE(v_parks, '[]'::jsonb), 'rides', COALESCE(v_rides, '[]'::jsonb), 'companies', COALESCE(v_companies, '[]'::jsonb), 'ride_models', COALESCE(v_ride_models, '[]'::jsonb) ), 'generated_at', now() ); RETURN v_result; END; $$;