church-api/migrations/20250811000001_clean_html_entities.sql
Benjamin Slingo 0c06e159bb Initial commit: Church API Rust implementation
Complete church management system with bulletin management, media processing, live streaming integration, and web interface. Includes authentication, email notifications, database migrations, and comprehensive test suite.
2025-08-19 20:56:41 -04:00

302 lines
12 KiB
PL/PgSQL

-- Migration: Clean HTML entities from all text columns
-- This removes HTML tags and decodes common HTML entities across all tables
-- Created: 2025-08-11
-- Author: Claude Code Assistant
-- Function to clean HTML tags and entities from text
CREATE OR REPLACE FUNCTION clean_html_entities(input_text TEXT)
RETURNS TEXT AS $$
BEGIN
-- Return NULL if input is NULL
IF input_text IS NULL THEN
RETURN NULL;
END IF;
-- Remove HTML tags using regex
-- Clean common HTML entities
RETURN TRIM(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
input_text,
'<[^>]*>', '', 'g' -- Remove HTML tags
),
'&nbsp;', ' ', 'g' -- Non-breaking space
),
'&amp;', '&', 'g' -- Ampersand
),
'&lt;', '<', 'g' -- Less than
),
'&gt;', '>', 'g' -- Greater than
),
'&quot;', '"', 'g' -- Double quote
),
'&#39;', '''', 'g' -- Single quote/apostrophe
)
);
END;
$$ LANGUAGE plpgsql;
-- Start transaction
BEGIN;
-- Log the start of migration
DO $$
BEGIN
RAISE NOTICE 'Starting HTML entity cleanup migration at %', NOW();
END $$;
-- Clean bulletins table
DO $$
DECLARE
updated_count INTEGER;
BEGIN
UPDATE bulletins
SET
title = clean_html_entities(title),
sabbath_school = clean_html_entities(sabbath_school),
divine_worship = clean_html_entities(divine_worship),
scripture_reading = clean_html_entities(scripture_reading),
sunset = clean_html_entities(sunset),
updated_at = NOW()
WHERE
title != clean_html_entities(title) OR
sabbath_school != clean_html_entities(sabbath_school) OR
divine_worship != clean_html_entities(divine_worship) OR
scripture_reading != clean_html_entities(scripture_reading) OR
sunset != clean_html_entities(sunset) OR
title ~ '<[^>]*>' OR sabbath_school ~ '<[^>]*>' OR divine_worship ~ '<[^>]*>' OR
scripture_reading ~ '<[^>]*>' OR sunset ~ '<[^>]*>' OR
title ~ '&(nbsp|amp|lt|gt|quot|#39);' OR sabbath_school ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
divine_worship ~ '&(nbsp|amp|lt|gt|quot|#39);' OR scripture_reading ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
sunset ~ '&(nbsp|amp|lt|gt|quot|#39);';
GET DIAGNOSTICS updated_count = ROW_COUNT;
RAISE NOTICE 'Cleaned % rows in bulletins table', updated_count;
END $$;
-- Clean events table
DO $$
DECLARE
updated_count INTEGER;
BEGIN
UPDATE events
SET
title = clean_html_entities(title),
description = clean_html_entities(description),
location = clean_html_entities(location),
location_url = clean_html_entities(location_url),
approved_from = clean_html_entities(approved_from),
updated_at = NOW()
WHERE
title != clean_html_entities(title) OR
description != clean_html_entities(description) OR
location != clean_html_entities(location) OR
location_url != clean_html_entities(location_url) OR
approved_from != clean_html_entities(approved_from) OR
title ~ '<[^>]*>' OR description ~ '<[^>]*>' OR location ~ '<[^>]*>' OR
location_url ~ '<[^>]*>' OR approved_from ~ '<[^>]*>' OR
title ~ '&(nbsp|amp|lt|gt|quot|#39);' OR description ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
location ~ '&(nbsp|amp|lt|gt|quot|#39);' OR location_url ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
approved_from ~ '&(nbsp|amp|lt|gt|quot|#39);';
GET DIAGNOSTICS updated_count = ROW_COUNT;
RAISE NOTICE 'Cleaned % rows in events table', updated_count;
END $$;
-- Clean pending_events table
DO $$
DECLARE
updated_count INTEGER;
BEGIN
UPDATE pending_events
SET
title = clean_html_entities(title),
description = clean_html_entities(description),
location = clean_html_entities(location),
location_url = clean_html_entities(location_url),
admin_notes = clean_html_entities(admin_notes),
submitter_email = clean_html_entities(submitter_email),
bulletin_week = clean_html_entities(bulletin_week),
updated_at = NOW()
WHERE
title != clean_html_entities(title) OR
description != clean_html_entities(description) OR
location != clean_html_entities(location) OR
location_url != clean_html_entities(location_url) OR
admin_notes != clean_html_entities(admin_notes) OR
submitter_email != clean_html_entities(submitter_email) OR
bulletin_week != clean_html_entities(bulletin_week) OR
title ~ '<[^>]*>' OR description ~ '<[^>]*>' OR location ~ '<[^>]*>' OR
location_url ~ '<[^>]*>' OR admin_notes ~ '<[^>]*>' OR submitter_email ~ '<[^>]*>' OR
bulletin_week ~ '<[^>]*>' OR
title ~ '&(nbsp|amp|lt|gt|quot|#39);' OR description ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
location ~ '&(nbsp|amp|lt|gt|quot|#39);' OR location_url ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
admin_notes ~ '&(nbsp|amp|lt|gt|quot|#39);' OR submitter_email ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
bulletin_week ~ '&(nbsp|amp|lt|gt|quot|#39);';
GET DIAGNOSTICS updated_count = ROW_COUNT;
RAISE NOTICE 'Cleaned % rows in pending_events table', updated_count;
END $$;
-- Clean members table
DO $$
DECLARE
updated_count INTEGER;
BEGIN
UPDATE members
SET
first_name = clean_html_entities(first_name),
last_name = clean_html_entities(last_name),
address = clean_html_entities(address),
notes = clean_html_entities(notes),
emergency_contact_name = clean_html_entities(emergency_contact_name),
membership_status = clean_html_entities(membership_status),
updated_at = NOW()
WHERE
first_name != clean_html_entities(first_name) OR
last_name != clean_html_entities(last_name) OR
address != clean_html_entities(address) OR
notes != clean_html_entities(notes) OR
emergency_contact_name != clean_html_entities(emergency_contact_name) OR
membership_status != clean_html_entities(membership_status) OR
first_name ~ '<[^>]*>' OR last_name ~ '<[^>]*>' OR address ~ '<[^>]*>' OR
notes ~ '<[^>]*>' OR emergency_contact_name ~ '<[^>]*>' OR membership_status ~ '<[^>]*>' OR
first_name ~ '&(nbsp|amp|lt|gt|quot|#39);' OR last_name ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
address ~ '&(nbsp|amp|lt|gt|quot|#39);' OR notes ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
emergency_contact_name ~ '&(nbsp|amp|lt|gt|quot|#39);' OR membership_status ~ '&(nbsp|amp|lt|gt|quot|#39);';
GET DIAGNOSTICS updated_count = ROW_COUNT;
RAISE NOTICE 'Cleaned % rows in members table', updated_count;
END $$;
-- Clean church_config table
DO $$
DECLARE
updated_count INTEGER;
BEGIN
UPDATE church_config
SET
church_name = clean_html_entities(church_name),
contact_email = clean_html_entities(contact_email),
church_address = clean_html_entities(church_address),
po_box = clean_html_entities(po_box),
google_maps_url = clean_html_entities(google_maps_url),
about_text = clean_html_entities(about_text),
updated_at = NOW()
WHERE
church_name != clean_html_entities(church_name) OR
contact_email != clean_html_entities(contact_email) OR
church_address != clean_html_entities(church_address) OR
po_box != clean_html_entities(po_box) OR
google_maps_url != clean_html_entities(google_maps_url) OR
about_text != clean_html_entities(about_text) OR
church_name ~ '<[^>]*>' OR contact_email ~ '<[^>]*>' OR church_address ~ '<[^>]*>' OR
po_box ~ '<[^>]*>' OR google_maps_url ~ '<[^>]*>' OR about_text ~ '<[^>]*>' OR
church_name ~ '&(nbsp|amp|lt|gt|quot|#39);' OR contact_email ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
church_address ~ '&(nbsp|amp|lt|gt|quot|#39);' OR po_box ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
google_maps_url ~ '&(nbsp|amp|lt|gt|quot|#39);' OR about_text ~ '&(nbsp|amp|lt|gt|quot|#39);';
GET DIAGNOSTICS updated_count = ROW_COUNT;
RAISE NOTICE 'Cleaned % rows in church_config table', updated_count;
END $$;
-- Clean media_items table (if exists)
DO $$
DECLARE
updated_count INTEGER;
BEGIN
-- Check if table exists
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'media_items') THEN
UPDATE media_items
SET
title = clean_html_entities(title),
speaker = clean_html_entities(speaker),
description = clean_html_entities(description),
scripture_reading = clean_html_entities(scripture_reading),
updated_at = NOW()
WHERE
title != clean_html_entities(title) OR
speaker != clean_html_entities(speaker) OR
description != clean_html_entities(description) OR
scripture_reading != clean_html_entities(scripture_reading) OR
title ~ '<[^>]*>' OR speaker ~ '<[^>]*>' OR description ~ '<[^>]*>' OR
scripture_reading ~ '<[^>]*>' OR
title ~ '&(nbsp|amp|lt|gt|quot|#39);' OR speaker ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
description ~ '&(nbsp|amp|lt|gt|quot|#39);' OR scripture_reading ~ '&(nbsp|amp|lt|gt|quot|#39);';
GET DIAGNOSTICS updated_count = ROW_COUNT;
RAISE NOTICE 'Cleaned % rows in media_items table', updated_count;
ELSE
RAISE NOTICE 'media_items table does not exist, skipping';
END IF;
END $$;
-- Clean transcoded_media table (if exists)
DO $$
DECLARE
updated_count INTEGER;
BEGIN
-- Check if table exists
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'transcoded_media') THEN
UPDATE transcoded_media
SET
error_message = clean_html_entities(error_message),
transcoding_method = clean_html_entities(transcoding_method),
updated_at = NOW()
WHERE
error_message != clean_html_entities(error_message) OR
transcoding_method != clean_html_entities(transcoding_method) OR
error_message ~ '<[^>]*>' OR transcoding_method ~ '<[^>]*>' OR
error_message ~ '&(nbsp|amp|lt|gt|quot|#39);' OR transcoding_method ~ '&(nbsp|amp|lt|gt|quot|#39);';
GET DIAGNOSTICS updated_count = ROW_COUNT;
RAISE NOTICE 'Cleaned % rows in transcoded_media table', updated_count;
ELSE
RAISE NOTICE 'transcoded_media table does not exist, skipping';
END IF;
END $$;
-- Clean users table
DO $$
DECLARE
updated_count INTEGER;
BEGIN
UPDATE users
SET
username = clean_html_entities(username),
email = clean_html_entities(email),
name = clean_html_entities(name),
avatar_url = clean_html_entities(avatar_url),
role = clean_html_entities(role),
updated_at = NOW()
WHERE
username != clean_html_entities(username) OR
email != clean_html_entities(email) OR
name != clean_html_entities(name) OR
avatar_url != clean_html_entities(avatar_url) OR
role != clean_html_entities(role) OR
username ~ '<[^>]*>' OR email ~ '<[^>]*>' OR name ~ '<[^>]*>' OR
avatar_url ~ '<[^>]*>' OR role ~ '<[^>]*>' OR
username ~ '&(nbsp|amp|lt|gt|quot|#39);' OR email ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
name ~ '&(nbsp|amp|lt|gt|quot|#39);' OR avatar_url ~ '&(nbsp|amp|lt|gt|quot|#39);' OR
role ~ '&(nbsp|amp|lt|gt|quot|#39);';
GET DIAGNOSTICS updated_count = ROW_COUNT;
RAISE NOTICE 'Cleaned % rows in users table', updated_count;
END $$;
-- Log completion
DO $$
BEGIN
RAISE NOTICE 'HTML entity cleanup migration completed at %', NOW();
END $$;
-- Clean up the helper function (optional - comment out if you want to keep it for future use)
-- DROP FUNCTION clean_html_entities(TEXT);
COMMIT;