diff --git a/supabase/migrations/20251112015238_631cd0f1-06a6-4aeb-bc05-b88e8e15ca4c.sql b/supabase/migrations/20251112015238_631cd0f1-06a6-4aeb-bc05-b88e8e15ca4c.sql new file mode 100644 index 00000000..8c5e9d05 --- /dev/null +++ b/supabase/migrations/20251112015238_631cd0f1-06a6-4aeb-bc05-b88e8e15ca4c.sql @@ -0,0 +1,398 @@ +-- Fix analyze_data_completeness: Remove non-existent introduction_year column reference +-- The ride_models table doesn't have an introduction_year column + +CREATE OR REPLACE FUNCTION analyze_data_completeness( + p_entity_type TEXT DEFAULT NULL, + p_min_score NUMERIC DEFAULT NULL, + p_max_score NUMERIC DEFAULT NULL, + p_missing_category TEXT DEFAULT NULL, + p_limit INTEGER DEFAULT 100, + p_offset INTEGER DEFAULT 0 +) +RETURNS JSONB +LANGUAGE plpgsql +SECURITY DEFINER +SET search_path = public +AS $$ +DECLARE + v_result JSONB; + v_parks JSONB; + v_rides JSONB; + v_companies JSONB; + v_ride_models JSONB; + v_locations JSONB; + v_timeline_events JSONB; + v_summary JSONB; +BEGIN + -- Parks Analysis (including historical) + WITH park_analysis AS ( + SELECT + p.id, + p.name, + p.slug, + 'park' as entity_type, + p.updated_at, + -- Calculate completeness score (weighted) + ( + -- Critical fields (10 points each) = 30 points + (CASE WHEN p.park_type IS NOT NULL THEN 10 ELSE 0 END) + + (CASE WHEN p.status IS NOT NULL THEN 10 ELSE 0 END) + + (CASE WHEN p.location_id IS NOT NULL THEN 10 ELSE 0 END) + + + -- Important fields (7 points each) = 35 points + (CASE WHEN p.description IS NOT NULL AND length(p.description) > 50 THEN 7 ELSE 0 END) + + (CASE WHEN p.operator_id IS NOT NULL THEN 7 ELSE 0 END) + + (CASE WHEN p.banner_image_id IS NOT NULL THEN 7 ELSE 0 END) + + (CASE WHEN p.card_image_id IS NOT NULL THEN 7 ELSE 0 END) + + (CASE WHEN p.property_owner_id IS NOT NULL THEN 7 ELSE 0 END) + + + -- Valuable fields (5 points each) = 20 points + (CASE WHEN p.opening_date IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN p.opening_date_precision IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN p.website_url IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN p.phone IS NOT NULL THEN 5 ELSE 0 END) + + + -- Supplementary fields (3 points each) = 9 points + (CASE WHEN p.email IS NOT NULL THEN 3 ELSE 0 END) + + (CASE WHEN p.closing_date IS NOT NULL AND p.status = 'closed' THEN 3 ELSE 0 END) + + (CASE WHEN EXISTS(SELECT 1 FROM entity_timeline_events WHERE entity_id = p.id AND entity_type = 'park') THEN 3 ELSE 0 END) + + + -- Nice-to-have fields (1 point each) = 6 points + (CASE WHEN EXISTS(SELECT 1 FROM locations WHERE id = p.location_id AND latitude IS NOT NULL AND longitude IS NOT NULL) THEN 1 ELSE 0 END) + + (CASE WHEN p.closing_date_precision IS NOT NULL AND p.status = 'closed' THEN 1 ELSE 0 END) + )::NUMERIC / 100.0 * 100 as completeness_score, + + -- Missing fields tracking (using helper function) + jsonb_build_object( + 'critical', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN p.park_type IS NULL THEN 'park_type' END, + CASE WHEN p.status IS NULL THEN 'status' END, + CASE WHEN p.location_id IS NULL THEN 'location_id' END + )), + 'important', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN p.description IS NULL OR length(p.description) <= 50 THEN 'description' END, + CASE WHEN p.operator_id IS NULL THEN 'operator_id' END, + CASE WHEN p.banner_image_id IS NULL THEN 'banner_image' END, + CASE WHEN p.card_image_id IS NULL THEN 'card_image' END, + CASE WHEN p.property_owner_id IS NULL THEN 'property_owner_id' END + )), + 'valuable', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN p.opening_date IS NULL THEN 'opening_date' END, + CASE WHEN p.opening_date_precision IS NULL THEN 'opening_date_precision' END, + CASE WHEN p.website_url IS NULL THEN 'website_url' END, + CASE WHEN p.phone IS NULL THEN 'phone' END + )), + 'supplementary', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN p.email IS NULL THEN 'email' END, + CASE WHEN p.closing_date IS NULL AND p.status = 'closed' THEN 'closing_date' END + )) + ) as missing_fields + FROM parks p + WHERE (p_entity_type IS NULL OR p_entity_type = 'park') + ) + SELECT jsonb_agg( + jsonb_build_object( + 'id', id, + 'name', name, + 'slug', slug, + 'entity_type', entity_type, + 'updated_at', updated_at, + 'completeness_score', completeness_score, + 'missing_fields', missing_fields + ) ORDER BY completeness_score ASC, name ASC + ) + INTO v_parks + FROM park_analysis + WHERE (p_min_score IS NULL OR completeness_score >= p_min_score) + AND (p_max_score IS NULL OR completeness_score <= p_max_score) + LIMIT p_limit OFFSET p_offset; + + -- Rides Analysis + WITH ride_analysis AS ( + SELECT + r.id, + r.name, + r.slug, + 'ride' as entity_type, + r.updated_at, + -- Calculate completeness score (weighted) + ( + -- Critical fields (10 points each) = 30 points + (CASE WHEN r.park_id IS NOT NULL THEN 10 ELSE 0 END) + + (CASE WHEN r.category IS NOT NULL THEN 10 ELSE 0 END) + + (CASE WHEN r.status IS NOT NULL THEN 10 ELSE 0 END) + + + -- Important fields (7 points each) = 42 points + (CASE WHEN r.description IS NOT NULL AND length(r.description) > 50 THEN 7 ELSE 0 END) + + (CASE WHEN r.manufacturer_id IS NOT NULL THEN 7 ELSE 0 END) + + (CASE WHEN r.banner_image_id IS NOT NULL THEN 7 ELSE 0 END) + + (CASE WHEN r.card_image_id IS NOT NULL THEN 7 ELSE 0 END) + + (CASE WHEN r.ride_model_id IS NOT NULL THEN 7 ELSE 0 END) + + (CASE WHEN r.designer_id IS NOT NULL THEN 7 ELSE 0 END) + + + -- Valuable fields (5 points each) = 15 points + (CASE WHEN r.opening_date IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN r.opening_date_precision IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN r.ride_sub_type IS NOT NULL THEN 5 ELSE 0 END) + + + -- Category-specific technical data (5 points each) = up to 10 points + (CASE + WHEN r.category = 'Roller Coaster' THEN + (CASE WHEN r.coaster_type IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN r.max_speed_kmh IS NOT NULL THEN 5 ELSE 0 END) + WHEN r.category = 'Water Ride' THEN + (CASE WHEN r.flume_type IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN r.wetness_level IS NOT NULL THEN 5 ELSE 0 END) + WHEN r.category = 'Dark Ride' THEN + (CASE WHEN r.theme_name IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN r.ride_system IS NOT NULL THEN 5 ELSE 0 END) + ELSE 0 + END) + + + -- Supplementary fields (3 points each) = 9 points + (CASE WHEN r.max_height_meters IS NOT NULL THEN 3 ELSE 0 END) + + (CASE WHEN r.length_meters IS NOT NULL THEN 3 ELSE 0 END) + + (CASE WHEN r.capacity_per_hour IS NOT NULL THEN 3 ELSE 0 END) + )::NUMERIC / 100.0 * 100 as completeness_score, + + -- Missing fields tracking (using helper function) + jsonb_build_object( + 'critical', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN r.park_id IS NULL THEN 'park_id' END, + CASE WHEN r.category IS NULL THEN 'category' END, + CASE WHEN r.status IS NULL THEN 'status' END + )), + 'important', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN r.description IS NULL OR length(r.description) <= 50 THEN 'description' END, + CASE WHEN r.manufacturer_id IS NULL THEN 'manufacturer_id' END, + CASE WHEN r.banner_image_id IS NULL THEN 'banner_image' END, + CASE WHEN r.card_image_id IS NULL THEN 'card_image' END, + CASE WHEN r.ride_model_id IS NULL THEN 'ride_model_id' END, + CASE WHEN r.designer_id IS NULL THEN 'designer_id' END + )), + 'valuable', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN r.opening_date IS NULL THEN 'opening_date' END, + CASE WHEN r.opening_date_precision IS NULL THEN 'opening_date_precision' END, + CASE WHEN r.ride_sub_type IS NULL THEN 'ride_sub_type' END + )) + ) as missing_fields + FROM rides r + WHERE (p_entity_type IS NULL OR p_entity_type = 'ride') + ) + SELECT jsonb_agg( + jsonb_build_object( + 'id', id, + 'name', name, + 'slug', slug, + 'entity_type', entity_type, + 'updated_at', updated_at, + 'completeness_score', completeness_score, + 'missing_fields', missing_fields + ) ORDER BY completeness_score ASC, name ASC + ) + INTO v_rides + FROM ride_analysis + WHERE (p_min_score IS NULL OR completeness_score >= p_min_score) + AND (p_max_score IS NULL OR completeness_score <= p_max_score) + LIMIT p_limit OFFSET p_offset; + + -- Companies Analysis + WITH company_analysis AS ( + SELECT + c.id, + c.name, + c.slug, + 'company' as entity_type, + c.updated_at, + -- Calculate completeness score (weighted) + ( + -- Critical fields (10 points each) = 20 points + (CASE WHEN c.company_type IS NOT NULL THEN 10 ELSE 0 END) + + (CASE WHEN c.person_type IS NOT NULL THEN 10 ELSE 0 END) + + + -- Important fields (7 points each) = 28 points + (CASE WHEN c.description IS NOT NULL AND length(c.description) > 50 THEN 7 ELSE 0 END) + + (CASE WHEN c.logo_url IS NOT NULL THEN 7 ELSE 0 END) + + (CASE WHEN c.banner_image_id IS NOT NULL THEN 7 ELSE 0 END) + + (CASE WHEN c.card_image_id IS NOT NULL THEN 7 ELSE 0 END) + + + -- Valuable fields (5 points each) = 20 points + (CASE WHEN c.founded_year IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN c.founded_date IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN c.website_url IS NOT NULL THEN 5 ELSE 0 END) + + (CASE WHEN c.headquarters_location IS NOT NULL THEN 5 ELSE 0 END) + + + -- Supplementary fields (3 points each) = 6 points + (CASE WHEN c.founded_date_precision IS NOT NULL THEN 3 ELSE 0 END) + + (CASE WHEN c.company_type IN ('manufacturer', 'operator') AND EXISTS(SELECT 1 FROM parks WHERE operator_id = c.id OR property_owner_id = c.id LIMIT 1) THEN 3 ELSE 0 END) + )::NUMERIC / 100.0 * 100 as completeness_score, + + -- Missing fields tracking (using helper function) + jsonb_build_object( + 'critical', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN c.company_type IS NULL THEN 'company_type' END, + CASE WHEN c.person_type IS NULL THEN 'person_type' END + )), + 'important', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN c.description IS NULL OR length(c.description) <= 50 THEN 'description' END, + CASE WHEN c.logo_url IS NULL THEN 'logo_url' END, + CASE WHEN c.banner_image_id IS NULL THEN 'banner_image' END, + CASE WHEN c.card_image_id IS NULL THEN 'card_image' END + )), + 'valuable', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN c.founded_year IS NULL THEN 'founded_year' END, + CASE WHEN c.founded_date IS NULL THEN 'founded_date' END, + CASE WHEN c.website_url IS NULL THEN 'website_url' END, + CASE WHEN c.headquarters_location IS NULL THEN 'headquarters_location' END + )) + ) as missing_fields + FROM companies c + WHERE (p_entity_type IS NULL OR p_entity_type = 'company') + ) + SELECT jsonb_agg( + jsonb_build_object( + 'id', id, + 'name', name, + 'slug', slug, + 'entity_type', entity_type, + 'updated_at', updated_at, + 'completeness_score', completeness_score, + 'missing_fields', missing_fields + ) ORDER BY completeness_score ASC, name ASC + ) + INTO v_companies + FROM company_analysis + WHERE (p_min_score IS NULL OR completeness_score >= p_min_score) + AND (p_max_score IS NULL OR completeness_score <= p_max_score) + LIMIT p_limit OFFSET p_offset; + + -- Ride Models Analysis - FIXED: Removed introduction_year references (lines 306, 322) + -- Total points reduced from 70 to 65 (removed 5 points from introduction_year) + WITH model_analysis AS ( + SELECT + rm.id, + rm.name, + rm.slug, + 'ride_model' as entity_type, + rm.updated_at, + -- Calculate completeness score (weighted) + ( + -- Critical fields (10 points each) = 30 points + (CASE WHEN rm.manufacturer_id IS NOT NULL THEN 10 ELSE 0 END) + + (CASE WHEN rm.category IS NOT NULL THEN 10 ELSE 0 END) + + (CASE WHEN rm.ride_type IS NOT NULL THEN 10 ELSE 0 END) + + + -- Important fields (7 points each) = 21 points + (CASE WHEN rm.description IS NOT NULL AND length(rm.description) > 50 THEN 7 ELSE 0 END) + + (CASE WHEN rm.banner_image_id IS NOT NULL THEN 7 ELSE 0 END) + + (CASE WHEN rm.card_image_id IS NOT NULL THEN 7 ELSE 0 END) + + + -- Valuable fields (5 points each) = 5 points + -- REMOVED: introduction_year check (was 5 points) + (CASE WHEN EXISTS(SELECT 1 FROM rides WHERE ride_model_id = rm.id LIMIT 1) THEN 5 ELSE 0 END) + )::NUMERIC / 100.0 * 100 as completeness_score, + + -- Missing fields tracking (using helper function) + jsonb_build_object( + 'critical', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN rm.manufacturer_id IS NULL THEN 'manufacturer_id' END, + CASE WHEN rm.category IS NULL THEN 'category' END, + CASE WHEN rm.ride_type IS NULL THEN 'ride_type' END + )), + 'important', filter_jsonb_array_nulls(jsonb_build_array( + CASE WHEN rm.description IS NULL OR length(rm.description) <= 50 THEN 'description' END, + CASE WHEN rm.banner_image_id IS NULL THEN 'banner_image' END, + CASE WHEN rm.card_image_id IS NULL THEN 'card_image' END + )), + 'valuable', filter_jsonb_array_nulls(jsonb_build_array( + -- REMOVED: introduction_year from missing fields tracking + )) + ) as missing_fields + FROM ride_models rm + WHERE (p_entity_type IS NULL OR p_entity_type = 'ride_model') + ) + SELECT jsonb_agg( + jsonb_build_object( + 'id', id, + 'name', name, + 'slug', slug, + 'entity_type', entity_type, + 'updated_at', updated_at, + 'completeness_score', completeness_score, + 'missing_fields', missing_fields + ) ORDER BY completeness_score ASC, name ASC + ) + INTO v_ride_models + FROM model_analysis + WHERE (p_min_score IS NULL OR completeness_score >= p_min_score) + AND (p_max_score IS NULL OR completeness_score <= p_max_score) + LIMIT p_limit OFFSET p_offset; + + -- Generate Summary + v_summary := jsonb_build_object( + 'total_entities', ( + SELECT COUNT(*)::INTEGER FROM ( + SELECT id FROM parks WHERE (p_entity_type IS NULL OR p_entity_type = 'park') + UNION ALL + SELECT id FROM rides WHERE (p_entity_type IS NULL OR p_entity_type = 'ride') + UNION ALL + SELECT id FROM companies WHERE (p_entity_type IS NULL OR p_entity_type = 'company') + UNION ALL + SELECT id FROM ride_models WHERE (p_entity_type IS NULL OR p_entity_type = 'ride_model') + ) all_entities + ), + 'avg_completeness_score', ( + SELECT ROUND(AVG(score)::NUMERIC, 2) FROM ( + SELECT ((10 + 10 + 10)::NUMERIC / 100.0 * 100) as score FROM parks WHERE park_type IS NOT NULL AND status IS NOT NULL AND location_id IS NOT NULL + UNION ALL + SELECT ((10 + 10 + 10)::NUMERIC / 100.0 * 100) as score FROM rides WHERE park_id IS NOT NULL AND category IS NOT NULL AND status IS NOT NULL + UNION ALL + SELECT ((10 + 10)::NUMERIC / 100.0 * 100) as score FROM companies WHERE company_type IS NOT NULL AND person_type IS NOT NULL + UNION ALL + SELECT ((10 + 10 + 10)::NUMERIC / 100.0 * 100) as score FROM ride_models WHERE manufacturer_id IS NOT NULL AND category IS NOT NULL AND ride_type IS NOT NULL + ) scores + ), + 'entities_below_50', ( + SELECT COUNT(*)::INTEGER FROM ( + SELECT id FROM parks WHERE (p_entity_type IS NULL OR p_entity_type = 'park') + UNION ALL + SELECT id FROM rides WHERE (p_entity_type IS NULL OR p_entity_type = 'ride') + UNION ALL + SELECT id FROM companies WHERE (p_entity_type IS NULL OR p_entity_type = 'company') + UNION ALL + SELECT id FROM ride_models WHERE (p_entity_type IS NULL OR p_entity_type = 'ride_model') + ) all_entities + WHERE id IN ( + SELECT id FROM parks WHERE description IS NULL OR manufacturer_id IS NULL + UNION + SELECT id FROM rides WHERE description IS NULL OR manufacturer_id IS NULL + UNION + SELECT id FROM companies WHERE description IS NULL + UNION + SELECT id FROM ride_models WHERE description IS NULL + ) + ), + 'entities_100_complete', 0, + 'by_entity_type', jsonb_build_object( + 'parks', (SELECT COUNT(*)::INTEGER FROM parks WHERE (p_entity_type IS NULL OR p_entity_type = 'park')), + 'rides', (SELECT COUNT(*)::INTEGER FROM rides WHERE (p_entity_type IS NULL OR p_entity_type = 'ride')), + 'companies', (SELECT COUNT(*)::INTEGER FROM companies WHERE (p_entity_type IS NULL OR p_entity_type = 'company')), + 'ride_models', (SELECT COUNT(*)::INTEGER FROM ride_models WHERE (p_entity_type IS NULL OR p_entity_type = 'ride_model')) + ) + ); + + -- Build final result + v_result := jsonb_build_object( + 'summary', v_summary, + 'entities', jsonb_build_object( + 'parks', COALESCE(v_parks, '[]'::jsonb), + 'rides', COALESCE(v_rides, '[]'::jsonb), + 'companies', COALESCE(v_companies, '[]'::jsonb), + 'ride_models', COALESCE(v_ride_models, '[]'::jsonb) + ), + 'generated_at', now() + ); + + RETURN v_result; +END; +$$; \ No newline at end of file