refactor: consolidate 3 migrations into single 01-schema.sql, remove legacy files

This commit is contained in:
2026-06-17 13:42:14 +02:00
parent c4745579ed
commit 46c1b01e98
4 changed files with 48 additions and 131 deletions
+1 -3
View File
@@ -13,9 +13,7 @@ services:
- ./data/db:/var/lib/postgresql/data - ./data/db:/var/lib/postgresql/data
- ./supabase/pg_hba.conf:/etc/postgresql/pg_hba.conf:ro - ./supabase/pg_hba.conf:/etc/postgresql/pg_hba.conf:ro
- ./supabase/migrations/00-run-init.sh:/docker-entrypoint-initdb.d/00-run-init.sh:ro - ./supabase/migrations/00-run-init.sh:/docker-entrypoint-initdb.d/00-run-init.sh:ro
- ./supabase/migrations/01-init.sql:/docker-entrypoint-initdb.d/01-init.sql:ro - ./supabase/migrations/01-schema.sql:/docker-entrypoint-initdb.d/01-schema.sql:ro
- ./supabase/migrations/02-image-and-pricing.sql:/docker-entrypoint-initdb.d/02-image-and-pricing.sql:ro
- ./supabase/migrations/03-global-keywords.sql:/docker-entrypoint-initdb.d/03-global-keywords.sql:ro
- ./supabase/migrations/post-boot.sql:/docker-entrypoint-initdb.d/post-boot.sql:ro - ./supabase/migrations/post-boot.sql:/docker-entrypoint-initdb.d/post-boot.sql:ro
command: > command: >
postgres postgres
@@ -1,5 +1,6 @@
-- ============================================================ -- ============================================================
-- willhaben-tracker — initial schema migration -- willhaben-tracker — consolidated schema (single source of truth)
-- Merged from: 01-init.sql, 02-image-and-pricing.sql, 03-global-keywords.sql
-- ============================================================ -- ============================================================
-- ----------------------------------------------------------- -- -----------------------------------------------------------
@@ -16,20 +17,33 @@ CREATE TABLE IF NOT EXISTS users (
); );
-- ----------------------------------------------------------- -- -----------------------------------------------------------
-- 2. search_queries (saved searches per user) -- 2. keywords (global search keywords — deduplicated across users)
-- ----------------------------------------------------------- -- -----------------------------------------------------------
CREATE TABLE IF NOT EXISTS search_queries ( CREATE TABLE IF NOT EXISTS keywords (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(), id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
user_id uuid REFERENCES users(id) ON DELETE CASCADE NOT NULL,
keyword text NOT NULL, keyword text NOT NULL,
interval_minutes int NOT NULL DEFAULT 60, interval_minutes int NOT NULL DEFAULT 60,
is_active boolean NOT NULL DEFAULT true, is_active boolean NOT NULL DEFAULT true,
initial_loaded boolean NOT NULL DEFAULT false,
last_scraped_at timestamptz, last_scraped_at timestamptz,
created_at timestamptz NOT NULL DEFAULT now() created_at timestamptz NOT NULL DEFAULT now()
); );
CREATE UNIQUE INDEX IF NOT EXISTS idx_keywords_unique_lower
ON keywords(LOWER(keyword));
-- ----------------------------------------------------------- -- -----------------------------------------------------------
-- 3. ads (raw ad snapshots, globally deduplicated by wh_ad_id) -- 3. keyword_subscriptions (many-to-many: user ↔ keyword)
-- -----------------------------------------------------------
CREATE TABLE IF NOT EXISTS keyword_subscriptions (
keyword_id uuid REFERENCES keywords(id) ON DELETE CASCADE NOT NULL,
user_id uuid REFERENCES users(id) ON DELETE CASCADE NOT NULL,
PRIMARY KEY (keyword_id, user_id),
created_at timestamptz NOT NULL DEFAULT now()
);
-- -----------------------------------------------------------
-- 4. ads (raw ad snapshots, globally deduplicated by wh_ad_id)
-- ----------------------------------------------------------- -- -----------------------------------------------------------
CREATE TABLE IF NOT EXISTS ads ( CREATE TABLE IF NOT EXISTS ads (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(), id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
@@ -40,22 +54,26 @@ CREATE TABLE IF NOT EXISTS ads (
location text, location text,
url text, url text,
published_at timestamptz, published_at timestamptz,
first_seen_at timestamptz NOT NULL DEFAULT now()
);
-- -----------------------------------------------------------
-- 4. query_ads (junction: which query found which ad)
-- -----------------------------------------------------------
CREATE TABLE IF NOT EXISTS query_ads (
search_query_id uuid REFERENCES search_queries(id) ON DELETE CASCADE NOT NULL,
ad_id uuid REFERENCES ads(id) ON DELETE CASCADE NOT NULL,
first_seen_at timestamptz NOT NULL DEFAULT now(), first_seen_at timestamptz NOT NULL DEFAULT now(),
is_notified boolean NOT NULL DEFAULT false, main_image_url text,
PRIMARY KEY (search_query_id, ad_id) postcode text,
modified_at timestamptz
); );
-- ----------------------------------------------------------- -- -----------------------------------------------------------
-- 5. notifications (audit log of sent Telegram messages) -- 5. price_history (track price changes per ad)
-- -----------------------------------------------------------
CREATE TABLE IF NOT EXISTS price_history (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
ad_id uuid REFERENCES ads(id) ON DELETE CASCADE NOT NULL,
old_price numeric NOT NULL,
new_price numeric NOT NULL,
changed_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (ad_id, old_price, new_price)
);
-- -----------------------------------------------------------
-- 6. notifications (audit log of sent Telegram messages)
-- ----------------------------------------------------------- -- -----------------------------------------------------------
CREATE TABLE IF NOT EXISTS notifications ( CREATE TABLE IF NOT EXISTS notifications (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(), id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
@@ -66,11 +84,11 @@ CREATE TABLE IF NOT EXISTS notifications (
); );
-- ----------------------------------------------------------- -- -----------------------------------------------------------
-- 6. scrape_logs (worker health / debugging) -- 7. scrape_logs (worker health / debugging)
-- ----------------------------------------------------------- -- -----------------------------------------------------------
CREATE TABLE IF NOT EXISTS scrape_logs ( CREATE TABLE IF NOT EXISTS scrape_logs (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(), id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
search_query_id uuid REFERENCES search_queries(id) ON DELETE CASCADE NOT NULL, keyword_id uuid REFERENCES keywords(id) ON DELETE CASCADE NOT NULL,
status text NOT NULL CHECK (status IN ('success', 'error', 'rate_limited')), status text NOT NULL CHECK (status IN ('success', 'error', 'rate_limited')),
ads_found int NOT NULL DEFAULT 0, ads_found int NOT NULL DEFAULT 0,
new_ads int NOT NULL DEFAULT 0, new_ads int NOT NULL DEFAULT 0,
@@ -82,23 +100,24 @@ CREATE TABLE IF NOT EXISTS scrape_logs (
-- Indexes -- Indexes
-- ============================================================ -- ============================================================
-- Users: fast lookup by telegram_id (unique constraint already implies an index) -- Keywords: fast lookup for active keywords ordered by last scrape time
CREATE INDEX IF NOT EXISTS idx_keywords_active_scraped
ON keywords(is_active, last_scraped_at) WHERE is_active = true;
-- Search queries: user membership lookups -- Keyword subscriptions: find all subscribers of a keyword
CREATE INDEX IF NOT EXISTS idx_search_queries_user_id ON search_queries(user_id); CREATE INDEX IF NOT EXISTS idx_keyword_subscriptions_user_id
ON keyword_subscriptions(user_id);
-- Scheduler polling: active queries ordered by last scraped time -- Ads: fast lookup by willhaben ad ID (unique constraint already implies an index)
CREATE INDEX IF NOT EXISTS idx_search_queries_active_scraped
ON search_queries(is_active, last_scraped_at) WHERE is_active = true;
-- Notifier lookups: un-notified ads per query -- Price history: look up changes for a specific ad
CREATE INDEX IF NOT EXISTS idx_query_ads_notified CREATE INDEX IF NOT EXISTS idx_price_history_ad_id
ON query_ads(search_query_id, is_notified) WHERE is_notified = false; ON price_history(ad_id);
-- Notifications: recent messages per user -- Notifications: recent messages per user
CREATE INDEX IF NOT EXISTS idx_notifications_user_sent CREATE INDEX IF NOT EXISTS idx_notifications_user_sent
ON notifications(user_id, sent_at DESC); ON notifications(user_id, sent_at DESC);
-- Scrape logs: latest runs per query -- Scrape logs: latest runs per keyword
CREATE INDEX IF NOT EXISTS idx_scrape_logs_query_at CREATE INDEX IF NOT EXISTS idx_scrape_logs_keyword_at
ON scrape_logs(search_query_id, scraped_at DESC); ON scrape_logs(keyword_id, scraped_at DESC);
@@ -1,26 +0,0 @@
-- ============================================================
-- Migration 02 — image column on ads + price_history tracking
-- ============================================================
-- -----------------------------------------------------------
-- 1. Add main_image_url to ads (nullable)
-- -----------------------------------------------------------
ALTER TABLE ads
ADD COLUMN IF NOT EXISTS main_image_url TEXT;
-- -----------------------------------------------------------
-- 2. price_history — record every price change per ad
-- -----------------------------------------------------------
CREATE TABLE IF NOT EXISTS price_history (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
ad_id uuid NOT NULL REFERENCES ads(id) ON DELETE CASCADE,
old_price numeric NOT NULL,
new_price numeric NOT NULL,
changed_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (ad_id, old_price, new_price)
);
CREATE INDEX IF NOT EXISTS idx_price_history_ad_id
ON price_history(ad_id);
-- Note: supabase_admin role creation + grants moved to post-boot.sql.
@@ -1,74 +0,0 @@
CREATE TABLE IF NOT EXISTS keywords (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
keyword text NOT NULL,
interval_minutes int NOT NULL DEFAULT 5,
is_active boolean NOT NULL DEFAULT true,
last_scraped_at timestamptz,
initial_loaded boolean NOT NULL DEFAULT false,
created_at timestamptz NOT NULL DEFAULT now()
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_keywords_unique_lower ON keywords (LOWER(keyword));
CREATE TABLE IF NOT EXISTS keyword_subscriptions (
keyword_id uuid NOT NULL REFERENCES keywords(id) ON DELETE CASCADE,
user_id uuid NOT NULL REFERENCES users(id) ON DELETE CASCADE,
subscribed_at timestamptz NOT NULL DEFAULT now(),
PRIMARY KEY (keyword_id, user_id)
);
CREATE INDEX IF NOT EXISTS idx_keyword_subscriptions_user ON keyword_subscriptions(user_id);
ALTER TABLE ads ADD COLUMN IF NOT EXISTS postcode TEXT;
ALTER TABLE ads ADD COLUMN IF NOT EXISTS modified_at TIMESTAMPTZ;
DO $$
DECLARE
sq_record RECORD;
kw_id uuid;
BEGIN
FOR sq_record IN SELECT DISTINCT LOWER(keyword) AS keyword, interval_minutes, is_active, last_scraped_at FROM search_queries LOOP
INSERT INTO keywords (keyword, interval_minutes, is_active, last_scraped_at)
VALUES (sq_record.keyword, sq_record.interval_minutes, sq_record.is_active, sq_record.last_scraped_at)
ON CONFLICT DO NOTHING;
END LOOP;
FOR sq_record IN SELECT user_id, LOWER(keyword) AS keyword FROM search_queries LOOP
SELECT id INTO kw_id FROM keywords WHERE LOWER(keyword) = sq_record.keyword LIMIT 1;
IF kw_id IS NOT NULL THEN
INSERT INTO keyword_subscriptions (keyword_id, user_id)
VALUES (kw_id, sq_record.user_id)
ON CONFLICT DO NOTHING;
END IF;
END LOOP;
END $$;
UPDATE keywords SET initial_loaded = true
WHERE id IN (
SELECT DISTINCT kw.id
FROM keywords kw
INNER JOIN search_queries sq ON LOWER(sq.keyword) = LOWER(kw.keyword)
WHERE EXISTS (
SELECT 1 FROM scrape_logs sl
WHERE sl.search_query_id = sq.id AND sl.status = 'success'
)
);
ALTER TABLE scrape_logs DROP CONSTRAINT IF EXISTS scrape_logs_search_query_id_fkey;
ALTER TABLE scrape_logs RENAME COLUMN search_query_id TO keyword_id;
UPDATE scrape_logs sl SET keyword_id = kw.id
FROM search_queries sq, keywords kw
WHERE sl.keyword_id = sq.id
AND LOWER(sq.keyword) = LOWER(kw.keyword);
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.table_constraints WHERE constraint_name = 'fk_scrape_logs_keyword') THEN
ALTER TABLE scrape_logs ADD CONSTRAINT fk_scrape_logs_keyword
FOREIGN KEY (keyword_id) REFERENCES keywords(id) ON DELETE CASCADE;
END IF;
END $$;
DROP TABLE IF EXISTS query_ads CASCADE;
DROP TABLE IF EXISTS search_queries CASCADE;