-- Migration: Clean HTML entities from all text columns -- This removes HTML tags and decodes common HTML entities across all tables -- Created: 2025-08-11 -- Author: Claude Code Assistant -- Function to clean HTML tags and entities from text CREATE OR REPLACE FUNCTION clean_html_entities(input_text TEXT) RETURNS TEXT AS $$ BEGIN -- Return NULL if input is NULL IF input_text IS NULL THEN RETURN NULL; END IF; -- Remove HTML tags using regex -- Clean common HTML entities RETURN TRIM( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( input_text, '<[^>]*>', '', 'g' -- Remove HTML tags ), ' ', ' ', 'g' -- Non-breaking space ), '&', '&', 'g' -- Ampersand ), '<', '<', 'g' -- Less than ), '>', '>', 'g' -- Greater than ), '"', '"', 'g' -- Double quote ), ''', '''', 'g' -- Single quote/apostrophe ) ); END; $$ LANGUAGE plpgsql; -- Start transaction BEGIN; -- Log the start of migration DO $$ BEGIN RAISE NOTICE 'Starting HTML entity cleanup migration at %', NOW(); END $$; -- Clean bulletins table DO $$ DECLARE updated_count INTEGER; BEGIN UPDATE bulletins SET title = clean_html_entities(title), sabbath_school = clean_html_entities(sabbath_school), divine_worship = clean_html_entities(divine_worship), scripture_reading = clean_html_entities(scripture_reading), sunset = clean_html_entities(sunset), updated_at = NOW() WHERE title != clean_html_entities(title) OR sabbath_school != clean_html_entities(sabbath_school) OR divine_worship != clean_html_entities(divine_worship) OR scripture_reading != clean_html_entities(scripture_reading) OR sunset != clean_html_entities(sunset) OR title ~ '<[^>]*>' OR sabbath_school ~ '<[^>]*>' OR divine_worship ~ '<[^>]*>' OR scripture_reading ~ '<[^>]*>' OR sunset ~ '<[^>]*>' OR title ~ '&(nbsp|amp|lt|gt|quot|#39);' OR sabbath_school ~ '&(nbsp|amp|lt|gt|quot|#39);' OR divine_worship ~ '&(nbsp|amp|lt|gt|quot|#39);' OR scripture_reading ~ '&(nbsp|amp|lt|gt|quot|#39);' OR sunset ~ '&(nbsp|amp|lt|gt|quot|#39);'; GET DIAGNOSTICS updated_count = ROW_COUNT; RAISE NOTICE 'Cleaned % rows in bulletins table', updated_count; END $$; -- Clean events table DO $$ DECLARE updated_count INTEGER; BEGIN UPDATE events SET title = clean_html_entities(title), description = clean_html_entities(description), location = clean_html_entities(location), location_url = clean_html_entities(location_url), approved_from = clean_html_entities(approved_from), updated_at = NOW() WHERE title != clean_html_entities(title) OR description != clean_html_entities(description) OR location != clean_html_entities(location) OR location_url != clean_html_entities(location_url) OR approved_from != clean_html_entities(approved_from) OR title ~ '<[^>]*>' OR description ~ '<[^>]*>' OR location ~ '<[^>]*>' OR location_url ~ '<[^>]*>' OR approved_from ~ '<[^>]*>' OR title ~ '&(nbsp|amp|lt|gt|quot|#39);' OR description ~ '&(nbsp|amp|lt|gt|quot|#39);' OR location ~ '&(nbsp|amp|lt|gt|quot|#39);' OR location_url ~ '&(nbsp|amp|lt|gt|quot|#39);' OR approved_from ~ '&(nbsp|amp|lt|gt|quot|#39);'; GET DIAGNOSTICS updated_count = ROW_COUNT; RAISE NOTICE 'Cleaned % rows in events table', updated_count; END $$; -- Clean pending_events table DO $$ DECLARE updated_count INTEGER; BEGIN UPDATE pending_events SET title = clean_html_entities(title), description = clean_html_entities(description), location = clean_html_entities(location), location_url = clean_html_entities(location_url), admin_notes = clean_html_entities(admin_notes), submitter_email = clean_html_entities(submitter_email), bulletin_week = clean_html_entities(bulletin_week), updated_at = NOW() WHERE title != clean_html_entities(title) OR description != clean_html_entities(description) OR location != clean_html_entities(location) OR location_url != clean_html_entities(location_url) OR admin_notes != clean_html_entities(admin_notes) OR submitter_email != clean_html_entities(submitter_email) OR bulletin_week != clean_html_entities(bulletin_week) OR title ~ '<[^>]*>' OR description ~ '<[^>]*>' OR location ~ '<[^>]*>' OR location_url ~ '<[^>]*>' OR admin_notes ~ '<[^>]*>' OR submitter_email ~ '<[^>]*>' OR bulletin_week ~ '<[^>]*>' OR title ~ '&(nbsp|amp|lt|gt|quot|#39);' OR description ~ '&(nbsp|amp|lt|gt|quot|#39);' OR location ~ '&(nbsp|amp|lt|gt|quot|#39);' OR location_url ~ '&(nbsp|amp|lt|gt|quot|#39);' OR admin_notes ~ '&(nbsp|amp|lt|gt|quot|#39);' OR submitter_email ~ '&(nbsp|amp|lt|gt|quot|#39);' OR bulletin_week ~ '&(nbsp|amp|lt|gt|quot|#39);'; GET DIAGNOSTICS updated_count = ROW_COUNT; RAISE NOTICE 'Cleaned % rows in pending_events table', updated_count; END $$; -- Clean members table DO $$ DECLARE updated_count INTEGER; BEGIN UPDATE members SET first_name = clean_html_entities(first_name), last_name = clean_html_entities(last_name), address = clean_html_entities(address), notes = clean_html_entities(notes), emergency_contact_name = clean_html_entities(emergency_contact_name), membership_status = clean_html_entities(membership_status), updated_at = NOW() WHERE first_name != clean_html_entities(first_name) OR last_name != clean_html_entities(last_name) OR address != clean_html_entities(address) OR notes != clean_html_entities(notes) OR emergency_contact_name != clean_html_entities(emergency_contact_name) OR membership_status != clean_html_entities(membership_status) OR first_name ~ '<[^>]*>' OR last_name ~ '<[^>]*>' OR address ~ '<[^>]*>' OR notes ~ '<[^>]*>' OR emergency_contact_name ~ '<[^>]*>' OR membership_status ~ '<[^>]*>' OR first_name ~ '&(nbsp|amp|lt|gt|quot|#39);' OR last_name ~ '&(nbsp|amp|lt|gt|quot|#39);' OR address ~ '&(nbsp|amp|lt|gt|quot|#39);' OR notes ~ '&(nbsp|amp|lt|gt|quot|#39);' OR emergency_contact_name ~ '&(nbsp|amp|lt|gt|quot|#39);' OR membership_status ~ '&(nbsp|amp|lt|gt|quot|#39);'; GET DIAGNOSTICS updated_count = ROW_COUNT; RAISE NOTICE 'Cleaned % rows in members table', updated_count; END $$; -- Clean church_config table DO $$ DECLARE updated_count INTEGER; BEGIN UPDATE church_config SET church_name = clean_html_entities(church_name), contact_email = clean_html_entities(contact_email), church_address = clean_html_entities(church_address), po_box = clean_html_entities(po_box), google_maps_url = clean_html_entities(google_maps_url), about_text = clean_html_entities(about_text), updated_at = NOW() WHERE church_name != clean_html_entities(church_name) OR contact_email != clean_html_entities(contact_email) OR church_address != clean_html_entities(church_address) OR po_box != clean_html_entities(po_box) OR google_maps_url != clean_html_entities(google_maps_url) OR about_text != clean_html_entities(about_text) OR church_name ~ '<[^>]*>' OR contact_email ~ '<[^>]*>' OR church_address ~ '<[^>]*>' OR po_box ~ '<[^>]*>' OR google_maps_url ~ '<[^>]*>' OR about_text ~ '<[^>]*>' OR church_name ~ '&(nbsp|amp|lt|gt|quot|#39);' OR contact_email ~ '&(nbsp|amp|lt|gt|quot|#39);' OR church_address ~ '&(nbsp|amp|lt|gt|quot|#39);' OR po_box ~ '&(nbsp|amp|lt|gt|quot|#39);' OR google_maps_url ~ '&(nbsp|amp|lt|gt|quot|#39);' OR about_text ~ '&(nbsp|amp|lt|gt|quot|#39);'; GET DIAGNOSTICS updated_count = ROW_COUNT; RAISE NOTICE 'Cleaned % rows in church_config table', updated_count; END $$; -- Clean media_items table (if exists) DO $$ DECLARE updated_count INTEGER; BEGIN -- Check if table exists IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'media_items') THEN UPDATE media_items SET title = clean_html_entities(title), speaker = clean_html_entities(speaker), description = clean_html_entities(description), scripture_reading = clean_html_entities(scripture_reading), updated_at = NOW() WHERE title != clean_html_entities(title) OR speaker != clean_html_entities(speaker) OR description != clean_html_entities(description) OR scripture_reading != clean_html_entities(scripture_reading) OR title ~ '<[^>]*>' OR speaker ~ '<[^>]*>' OR description ~ '<[^>]*>' OR scripture_reading ~ '<[^>]*>' OR title ~ '&(nbsp|amp|lt|gt|quot|#39);' OR speaker ~ '&(nbsp|amp|lt|gt|quot|#39);' OR description ~ '&(nbsp|amp|lt|gt|quot|#39);' OR scripture_reading ~ '&(nbsp|amp|lt|gt|quot|#39);'; GET DIAGNOSTICS updated_count = ROW_COUNT; RAISE NOTICE 'Cleaned % rows in media_items table', updated_count; ELSE RAISE NOTICE 'media_items table does not exist, skipping'; END IF; END $$; -- Clean transcoded_media table (if exists) DO $$ DECLARE updated_count INTEGER; BEGIN -- Check if table exists IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'transcoded_media') THEN UPDATE transcoded_media SET error_message = clean_html_entities(error_message), transcoding_method = clean_html_entities(transcoding_method), updated_at = NOW() WHERE error_message != clean_html_entities(error_message) OR transcoding_method != clean_html_entities(transcoding_method) OR error_message ~ '<[^>]*>' OR transcoding_method ~ '<[^>]*>' OR error_message ~ '&(nbsp|amp|lt|gt|quot|#39);' OR transcoding_method ~ '&(nbsp|amp|lt|gt|quot|#39);'; GET DIAGNOSTICS updated_count = ROW_COUNT; RAISE NOTICE 'Cleaned % rows in transcoded_media table', updated_count; ELSE RAISE NOTICE 'transcoded_media table does not exist, skipping'; END IF; END $$; -- Clean users table DO $$ DECLARE updated_count INTEGER; BEGIN UPDATE users SET username = clean_html_entities(username), email = clean_html_entities(email), name = clean_html_entities(name), avatar_url = clean_html_entities(avatar_url), role = clean_html_entities(role), updated_at = NOW() WHERE username != clean_html_entities(username) OR email != clean_html_entities(email) OR name != clean_html_entities(name) OR avatar_url != clean_html_entities(avatar_url) OR role != clean_html_entities(role) OR username ~ '<[^>]*>' OR email ~ '<[^>]*>' OR name ~ '<[^>]*>' OR avatar_url ~ '<[^>]*>' OR role ~ '<[^>]*>' OR username ~ '&(nbsp|amp|lt|gt|quot|#39);' OR email ~ '&(nbsp|amp|lt|gt|quot|#39);' OR name ~ '&(nbsp|amp|lt|gt|quot|#39);' OR avatar_url ~ '&(nbsp|amp|lt|gt|quot|#39);' OR role ~ '&(nbsp|amp|lt|gt|quot|#39);'; GET DIAGNOSTICS updated_count = ROW_COUNT; RAISE NOTICE 'Cleaned % rows in users table', updated_count; END $$; -- Log completion DO $$ BEGIN RAISE NOTICE 'HTML entity cleanup migration completed at %', NOW(); END $$; -- Clean up the helper function (optional - comment out if you want to keep it for future use) -- DROP FUNCTION clean_html_entities(TEXT); COMMIT;