-- Fix analyze_data_completeness function to check appropriate fields per entity type -- Parks don't have manufacturer_id, so we check relevant fields like description, park_type, status, location_id -- Rides and ride_models DO have manufacturer_id, so we check that -- Companies don't have manufacturer_id, so we check description and company_type CREATE OR REPLACE FUNCTION analyze_data_completeness() RETURNS jsonb LANGUAGE plpgsql SECURITY DEFINER AS $$ DECLARE result jsonb; park_data jsonb; ride_data jsonb; company_data jsonb; ride_model_data jsonb; total_entities integer; avg_score numeric; below_50_count integer; complete_count integer; BEGIN -- Analyze Parks WITH park_completeness AS ( SELECT id, name, slug, 'park'::text as entity_type, updated_at, CASE WHEN description IS NULL OR length(description) <= 50 THEN 0 ELSE 20 END + CASE WHEN park_type IS NULL THEN 0 ELSE 15 END + CASE WHEN status IS NULL THEN 0 ELSE 10 END + CASE WHEN location_id IS NULL THEN 0 ELSE 15 END + CASE WHEN opening_date IS NULL THEN 0 ELSE 10 END + CASE WHEN website_url IS NULL THEN 0 ELSE 5 END + CASE WHEN card_image_url IS NULL THEN 0 ELSE 10 END + CASE WHEN banner_image_url IS NULL THEN 0 ELSE 5 END + CASE WHEN phone IS NULL THEN 0 ELSE 5 END + CASE WHEN email IS NULL THEN 0 ELSE 5 END as completeness_score FROM parks ), park_missing AS ( SELECT id, jsonb_build_object( 'critical', ARRAY_REMOVE(ARRAY[ CASE WHEN description IS NULL OR length(description) <= 50 THEN 'description' END, CASE WHEN park_type IS NULL THEN 'park_type' END, CASE WHEN status IS NULL THEN 'status' END ], NULL), 'important', ARRAY_REMOVE(ARRAY[ CASE WHEN location_id IS NULL THEN 'location_id' END, CASE WHEN opening_date IS NULL THEN 'opening_date' END ], NULL), 'valuable', ARRAY_REMOVE(ARRAY[ CASE WHEN website_url IS NULL THEN 'website_url' END, CASE WHEN card_image_url IS NULL THEN 'card_image_url' END ], NULL), 'supplementary', ARRAY_REMOVE(ARRAY[ CASE WHEN banner_image_url IS NULL THEN 'banner_image_url' END, CASE WHEN phone IS NULL THEN 'phone' END, CASE WHEN email IS NULL THEN 'email' END ], NULL) ) as missing_fields FROM parks ) SELECT jsonb_agg( jsonb_build_object( 'id', pc.id, 'name', pc.name, 'slug', pc.slug, 'entity_type', pc.entity_type, 'updated_at', pc.updated_at, 'completeness_score', pc.completeness_score, 'missing_fields', pm.missing_fields ) ) INTO park_data FROM park_completeness pc JOIN park_missing pm ON pc.id = pm.id; -- Analyze Rides WITH ride_completeness AS ( SELECT id, name, slug, 'ride'::text as entity_type, updated_at, CASE WHEN description IS NULL OR length(description) <= 50 THEN 0 ELSE 20 END + CASE WHEN manufacturer_id IS NULL THEN 0 ELSE 15 END + CASE WHEN category IS NULL THEN 0 ELSE 15 END + CASE WHEN status IS NULL THEN 0 ELSE 10 END + CASE WHEN opening_date IS NULL THEN 0 ELSE 10 END + CASE WHEN ride_model_id IS NULL THEN 0 ELSE 10 END + CASE WHEN card_image_url IS NULL THEN 0 ELSE 10 END + CASE WHEN banner_image_url IS NULL THEN 0 ELSE 5 END + CASE WHEN height_restriction IS NULL THEN 0 ELSE 3 END + CASE WHEN max_speed IS NULL THEN 0 ELSE 2 END as completeness_score FROM rides ), ride_missing AS ( SELECT id, jsonb_build_object( 'critical', ARRAY_REMOVE(ARRAY[ CASE WHEN description IS NULL OR length(description) <= 50 THEN 'description' END, CASE WHEN manufacturer_id IS NULL THEN 'manufacturer_id' END, CASE WHEN category IS NULL THEN 'category' END, CASE WHEN status IS NULL THEN 'status' END ], NULL), 'important', ARRAY_REMOVE(ARRAY[ CASE WHEN opening_date IS NULL THEN 'opening_date' END, CASE WHEN ride_model_id IS NULL THEN 'ride_model_id' END ], NULL), 'valuable', ARRAY_REMOVE(ARRAY[ CASE WHEN card_image_url IS NULL THEN 'card_image_url' END, CASE WHEN banner_image_url IS NULL THEN 'banner_image_url' END ], NULL), 'supplementary', ARRAY_REMOVE(ARRAY[ CASE WHEN height_restriction IS NULL THEN 'height_restriction' END, CASE WHEN max_speed IS NULL THEN 'max_speed' END ], NULL) ) as missing_fields FROM rides ) SELECT jsonb_agg( jsonb_build_object( 'id', rc.id, 'name', rc.name, 'slug', rc.slug, 'entity_type', rc.entity_type, 'updated_at', rc.updated_at, 'completeness_score', rc.completeness_score, 'missing_fields', rm.missing_fields ) ) INTO ride_data FROM ride_completeness rc JOIN ride_missing rm ON rc.id = rm.id; -- Analyze Companies WITH company_completeness AS ( SELECT id, name, slug, 'company'::text as entity_type, updated_at, CASE WHEN description IS NULL OR length(description) <= 50 THEN 0 ELSE 25 END + CASE WHEN company_type IS NULL THEN 0 ELSE 20 END + CASE WHEN headquarters_location IS NULL THEN 0 ELSE 15 END + CASE WHEN founded_year IS NULL THEN 0 ELSE 10 END + CASE WHEN website_url IS NULL THEN 0 ELSE 10 END + CASE WHEN logo_url IS NULL THEN 0 ELSE 10 END + CASE WHEN card_image_url IS NULL THEN 0 ELSE 5 END + CASE WHEN banner_image_url IS NULL THEN 0 ELSE 5 END as completeness_score FROM companies ), company_missing AS ( SELECT id, jsonb_build_object( 'critical', ARRAY_REMOVE(ARRAY[ CASE WHEN description IS NULL OR length(description) <= 50 THEN 'description' END, CASE WHEN company_type IS NULL THEN 'company_type' END ], NULL), 'important', ARRAY_REMOVE(ARRAY[ CASE WHEN headquarters_location IS NULL THEN 'headquarters_location' END, CASE WHEN founded_year IS NULL THEN 'founded_year' END ], NULL), 'valuable', ARRAY_REMOVE(ARRAY[ CASE WHEN website_url IS NULL THEN 'website_url' END, CASE WHEN logo_url IS NULL THEN 'logo_url' END ], NULL), 'supplementary', ARRAY_REMOVE(ARRAY[ CASE WHEN card_image_url IS NULL THEN 'card_image_url' END, CASE WHEN banner_image_url IS NULL THEN 'banner_image_url' END ], NULL) ) as missing_fields FROM companies ) SELECT jsonb_agg( jsonb_build_object( 'id', cc.id, 'name', cc.name, 'slug', cc.slug, 'entity_type', cc.entity_type, 'updated_at', cc.updated_at, 'completeness_score', cc.completeness_score, 'missing_fields', cm.missing_fields ) ) INTO company_data FROM company_completeness cc JOIN company_missing cm ON cc.id = cm.id; -- Analyze Ride Models WITH ride_model_completeness AS ( SELECT id, name, slug, 'ride_model'::text as entity_type, updated_at, CASE WHEN description IS NULL OR length(description) <= 50 THEN 0 ELSE 25 END + CASE WHEN manufacturer_id IS NULL THEN 0 ELSE 20 END + CASE WHEN category IS NULL THEN 0 ELSE 20 END + CASE WHEN card_image_url IS NULL THEN 0 ELSE 15 END + CASE WHEN banner_image_url IS NULL THEN 0 ELSE 10 END + CASE WHEN max_speed IS NULL THEN 0 ELSE 5 END + CASE WHEN height IS NULL THEN 0 ELSE 5 END as completeness_score FROM ride_models ), ride_model_missing AS ( SELECT id, jsonb_build_object( 'critical', ARRAY_REMOVE(ARRAY[ CASE WHEN description IS NULL OR length(description) <= 50 THEN 'description' END, CASE WHEN manufacturer_id IS NULL THEN 'manufacturer_id' END, CASE WHEN category IS NULL THEN 'category' END ], NULL), 'important', ARRAY_REMOVE(ARRAY[ CASE WHEN card_image_url IS NULL THEN 'card_image_url' END ], NULL), 'valuable', ARRAY_REMOVE(ARRAY[ CASE WHEN banner_image_url IS NULL THEN 'banner_image_url' END ], NULL), 'supplementary', ARRAY_REMOVE(ARRAY[ CASE WHEN max_speed IS NULL THEN 'max_speed' END, CASE WHEN height IS NULL THEN 'height' END ], NULL) ) as missing_fields FROM ride_models ) SELECT jsonb_agg( jsonb_build_object( 'id', rmc.id, 'name', rmc.name, 'slug', rmc.slug, 'entity_type', rmc.entity_type, 'updated_at', rmc.updated_at, 'completeness_score', rmc.completeness_score, 'missing_fields', rmm.missing_fields ) ) INTO ride_model_data FROM ride_model_completeness rmc JOIN ride_model_missing rmm ON rmc.id = rmm.id; -- Calculate summary statistics WITH all_scores AS ( SELECT (value->>'completeness_score')::numeric as score FROM jsonb_array_elements(COALESCE(park_data, '[]'::jsonb)) UNION ALL SELECT (value->>'completeness_score')::numeric FROM jsonb_array_elements(COALESCE(ride_data, '[]'::jsonb)) UNION ALL SELECT (value->>'completeness_score')::numeric FROM jsonb_array_elements(COALESCE(company_data, '[]'::jsonb)) UNION ALL SELECT (value->>'completeness_score')::numeric FROM jsonb_array_elements(COALESCE(ride_model_data, '[]'::jsonb)) ) SELECT COUNT(*)::integer, ROUND(AVG(score), 2), COUNT(CASE WHEN score < 50 THEN 1 END)::integer, COUNT(CASE WHEN score = 100 THEN 1 END)::integer INTO total_entities, avg_score, below_50_count, complete_count FROM all_scores; -- Build final result result := jsonb_build_object( 'summary', jsonb_build_object( 'total_entities', COALESCE(total_entities, 0), 'avg_completeness_score', COALESCE(avg_score, 0), 'entities_below_50', COALESCE(below_50_count, 0), 'entities_100_complete', COALESCE(complete_count, 0), 'by_entity_type', jsonb_build_object( 'parks', COALESCE(jsonb_array_length(park_data), 0), 'rides', COALESCE(jsonb_array_length(ride_data), 0), 'companies', COALESCE(jsonb_array_length(company_data), 0), 'ride_models', COALESCE(jsonb_array_length(ride_model_data), 0) ) ), 'entities', jsonb_build_object( 'parks', COALESCE(park_data, '[]'::jsonb), 'rides', COALESCE(ride_data, '[]'::jsonb), 'companies', COALESCE(company_data, '[]'::jsonb), 'ride_models', COALESCE(ride_model_data, '[]'::jsonb) ), 'generated_at', to_jsonb(now()) ); RETURN result; END; $$;