diff --git a/docker-compose.yml b/docker-compose.yml index 9d94bcc..1902e36 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,9 +13,7 @@ services: - ./data/db:/var/lib/postgresql/data - ./supabase/pg_hba.conf:/etc/postgresql/pg_hba.conf:ro - ./supabase/migrations/00-run-init.sh:/docker-entrypoint-initdb.d/00-run-init.sh:ro - - ./supabase/migrations/01-init.sql:/docker-entrypoint-initdb.d/01-init.sql:ro - - ./supabase/migrations/02-image-and-pricing.sql:/docker-entrypoint-initdb.d/02-image-and-pricing.sql:ro - - ./supabase/migrations/03-global-keywords.sql:/docker-entrypoint-initdb.d/03-global-keywords.sql:ro + - ./supabase/migrations/01-schema.sql:/docker-entrypoint-initdb.d/01-schema.sql:ro - ./supabase/migrations/post-boot.sql:/docker-entrypoint-initdb.d/post-boot.sql:ro command: > postgres diff --git a/supabase/migrations/01-init.sql b/supabase/migrations/01-schema.sql similarity index 54% rename from supabase/migrations/01-init.sql rename to supabase/migrations/01-schema.sql index fe7484d..6a56885 100644 --- a/supabase/migrations/01-init.sql +++ b/supabase/migrations/01-schema.sql @@ -1,5 +1,6 @@ -- ============================================================ --- willhaben-tracker — initial schema migration +-- willhaben-tracker — consolidated schema (single source of truth) +-- Merged from: 01-init.sql, 02-image-and-pricing.sql, 03-global-keywords.sql -- ============================================================ -- ----------------------------------------------------------- @@ -16,20 +17,33 @@ CREATE TABLE IF NOT EXISTS users ( ); -- ----------------------------------------------------------- --- 2. search_queries (saved searches per user) +-- 2. keywords (global search keywords — deduplicated across users) -- ----------------------------------------------------------- -CREATE TABLE IF NOT EXISTS search_queries ( +CREATE TABLE IF NOT EXISTS keywords ( id uuid PRIMARY KEY DEFAULT gen_random_uuid(), - user_id uuid REFERENCES users(id) ON DELETE CASCADE NOT NULL, keyword text NOT NULL, interval_minutes int NOT NULL DEFAULT 60, is_active boolean NOT NULL DEFAULT true, + initial_loaded boolean NOT NULL DEFAULT false, last_scraped_at timestamptz, created_at timestamptz NOT NULL DEFAULT now() ); +CREATE UNIQUE INDEX IF NOT EXISTS idx_keywords_unique_lower + ON keywords(LOWER(keyword)); + -- ----------------------------------------------------------- --- 3. ads (raw ad snapshots, globally deduplicated by wh_ad_id) +-- 3. keyword_subscriptions (many-to-many: user ↔ keyword) +-- ----------------------------------------------------------- +CREATE TABLE IF NOT EXISTS keyword_subscriptions ( + keyword_id uuid REFERENCES keywords(id) ON DELETE CASCADE NOT NULL, + user_id uuid REFERENCES users(id) ON DELETE CASCADE NOT NULL, + PRIMARY KEY (keyword_id, user_id), + created_at timestamptz NOT NULL DEFAULT now() +); + +-- ----------------------------------------------------------- +-- 4. ads (raw ad snapshots, globally deduplicated by wh_ad_id) -- ----------------------------------------------------------- CREATE TABLE IF NOT EXISTS ads ( id uuid PRIMARY KEY DEFAULT gen_random_uuid(), @@ -40,22 +54,26 @@ CREATE TABLE IF NOT EXISTS ads ( location text, url text, published_at timestamptz, - first_seen_at timestamptz NOT NULL DEFAULT now() + first_seen_at timestamptz NOT NULL DEFAULT now(), + main_image_url text, + postcode text, + modified_at timestamptz ); -- ----------------------------------------------------------- --- 4. query_ads (junction: which query found which ad) +-- 5. price_history (track price changes per ad) -- ----------------------------------------------------------- -CREATE TABLE IF NOT EXISTS query_ads ( - search_query_id uuid REFERENCES search_queries(id) ON DELETE CASCADE NOT NULL, - ad_id uuid REFERENCES ads(id) ON DELETE CASCADE NOT NULL, - first_seen_at timestamptz NOT NULL DEFAULT now(), - is_notified boolean NOT NULL DEFAULT false, - PRIMARY KEY (search_query_id, ad_id) +CREATE TABLE IF NOT EXISTS price_history ( + id uuid PRIMARY KEY DEFAULT gen_random_uuid(), + ad_id uuid REFERENCES ads(id) ON DELETE CASCADE NOT NULL, + old_price numeric NOT NULL, + new_price numeric NOT NULL, + changed_at timestamptz NOT NULL DEFAULT now(), + UNIQUE (ad_id, old_price, new_price) ); -- ----------------------------------------------------------- --- 5. notifications (audit log of sent Telegram messages) +-- 6. notifications (audit log of sent Telegram messages) -- ----------------------------------------------------------- CREATE TABLE IF NOT EXISTS notifications ( id uuid PRIMARY KEY DEFAULT gen_random_uuid(), @@ -66,11 +84,11 @@ CREATE TABLE IF NOT EXISTS notifications ( ); -- ----------------------------------------------------------- --- 6. scrape_logs (worker health / debugging) +-- 7. scrape_logs (worker health / debugging) -- ----------------------------------------------------------- CREATE TABLE IF NOT EXISTS scrape_logs ( id uuid PRIMARY KEY DEFAULT gen_random_uuid(), - search_query_id uuid REFERENCES search_queries(id) ON DELETE CASCADE NOT NULL, + keyword_id uuid REFERENCES keywords(id) ON DELETE CASCADE NOT NULL, status text NOT NULL CHECK (status IN ('success', 'error', 'rate_limited')), ads_found int NOT NULL DEFAULT 0, new_ads int NOT NULL DEFAULT 0, @@ -82,23 +100,24 @@ CREATE TABLE IF NOT EXISTS scrape_logs ( -- Indexes -- ============================================================ --- Users: fast lookup by telegram_id (unique constraint already implies an index) +-- Keywords: fast lookup for active keywords ordered by last scrape time +CREATE INDEX IF NOT EXISTS idx_keywords_active_scraped + ON keywords(is_active, last_scraped_at) WHERE is_active = true; --- Search queries: user membership lookups -CREATE INDEX IF NOT EXISTS idx_search_queries_user_id ON search_queries(user_id); +-- Keyword subscriptions: find all subscribers of a keyword +CREATE INDEX IF NOT EXISTS idx_keyword_subscriptions_user_id + ON keyword_subscriptions(user_id); --- Scheduler polling: active queries ordered by last scraped time -CREATE INDEX IF NOT EXISTS idx_search_queries_active_scraped - ON search_queries(is_active, last_scraped_at) WHERE is_active = true; +-- Ads: fast lookup by willhaben ad ID (unique constraint already implies an index) --- Notifier lookups: un-notified ads per query -CREATE INDEX IF NOT EXISTS idx_query_ads_notified - ON query_ads(search_query_id, is_notified) WHERE is_notified = false; +-- Price history: look up changes for a specific ad +CREATE INDEX IF NOT EXISTS idx_price_history_ad_id + ON price_history(ad_id); -- Notifications: recent messages per user CREATE INDEX IF NOT EXISTS idx_notifications_user_sent ON notifications(user_id, sent_at DESC); --- Scrape logs: latest runs per query -CREATE INDEX IF NOT EXISTS idx_scrape_logs_query_at - ON scrape_logs(search_query_id, scraped_at DESC); +-- Scrape logs: latest runs per keyword +CREATE INDEX IF NOT EXISTS idx_scrape_logs_keyword_at + ON scrape_logs(keyword_id, scraped_at DESC); diff --git a/supabase/migrations/02-image-and-pricing.sql b/supabase/migrations/02-image-and-pricing.sql deleted file mode 100644 index 4fa9dbd..0000000 --- a/supabase/migrations/02-image-and-pricing.sql +++ /dev/null @@ -1,26 +0,0 @@ --- ============================================================ --- Migration 02 — image column on ads + price_history tracking --- ============================================================ - --- ----------------------------------------------------------- --- 1. Add main_image_url to ads (nullable) --- ----------------------------------------------------------- -ALTER TABLE ads - ADD COLUMN IF NOT EXISTS main_image_url TEXT; - --- ----------------------------------------------------------- --- 2. price_history — record every price change per ad --- ----------------------------------------------------------- -CREATE TABLE IF NOT EXISTS price_history ( - id uuid PRIMARY KEY DEFAULT gen_random_uuid(), - ad_id uuid NOT NULL REFERENCES ads(id) ON DELETE CASCADE, - old_price numeric NOT NULL, - new_price numeric NOT NULL, - changed_at timestamptz NOT NULL DEFAULT now(), - UNIQUE (ad_id, old_price, new_price) -); - -CREATE INDEX IF NOT EXISTS idx_price_history_ad_id - ON price_history(ad_id); - --- Note: supabase_admin role creation + grants moved to post-boot.sql. diff --git a/supabase/migrations/03-global-keywords.sql b/supabase/migrations/03-global-keywords.sql deleted file mode 100644 index d885b00..0000000 --- a/supabase/migrations/03-global-keywords.sql +++ /dev/null @@ -1,74 +0,0 @@ -CREATE TABLE IF NOT EXISTS keywords ( - id uuid PRIMARY KEY DEFAULT gen_random_uuid(), - keyword text NOT NULL, - interval_minutes int NOT NULL DEFAULT 5, - is_active boolean NOT NULL DEFAULT true, - last_scraped_at timestamptz, - initial_loaded boolean NOT NULL DEFAULT false, - created_at timestamptz NOT NULL DEFAULT now() -); - -CREATE UNIQUE INDEX IF NOT EXISTS idx_keywords_unique_lower ON keywords (LOWER(keyword)); - -CREATE TABLE IF NOT EXISTS keyword_subscriptions ( - keyword_id uuid NOT NULL REFERENCES keywords(id) ON DELETE CASCADE, - user_id uuid NOT NULL REFERENCES users(id) ON DELETE CASCADE, - subscribed_at timestamptz NOT NULL DEFAULT now(), - PRIMARY KEY (keyword_id, user_id) -); - -CREATE INDEX IF NOT EXISTS idx_keyword_subscriptions_user ON keyword_subscriptions(user_id); - -ALTER TABLE ads ADD COLUMN IF NOT EXISTS postcode TEXT; -ALTER TABLE ads ADD COLUMN IF NOT EXISTS modified_at TIMESTAMPTZ; - -DO $$ -DECLARE - sq_record RECORD; - kw_id uuid; -BEGIN - FOR sq_record IN SELECT DISTINCT LOWER(keyword) AS keyword, interval_minutes, is_active, last_scraped_at FROM search_queries LOOP - INSERT INTO keywords (keyword, interval_minutes, is_active, last_scraped_at) - VALUES (sq_record.keyword, sq_record.interval_minutes, sq_record.is_active, sq_record.last_scraped_at) - ON CONFLICT DO NOTHING; - END LOOP; - - FOR sq_record IN SELECT user_id, LOWER(keyword) AS keyword FROM search_queries LOOP - SELECT id INTO kw_id FROM keywords WHERE LOWER(keyword) = sq_record.keyword LIMIT 1; - IF kw_id IS NOT NULL THEN - INSERT INTO keyword_subscriptions (keyword_id, user_id) - VALUES (kw_id, sq_record.user_id) - ON CONFLICT DO NOTHING; - END IF; - END LOOP; -END $$; - -UPDATE keywords SET initial_loaded = true -WHERE id IN ( - SELECT DISTINCT kw.id - FROM keywords kw - INNER JOIN search_queries sq ON LOWER(sq.keyword) = LOWER(kw.keyword) - WHERE EXISTS ( - SELECT 1 FROM scrape_logs sl - WHERE sl.search_query_id = sq.id AND sl.status = 'success' - ) -); - -ALTER TABLE scrape_logs DROP CONSTRAINT IF EXISTS scrape_logs_search_query_id_fkey; -ALTER TABLE scrape_logs RENAME COLUMN search_query_id TO keyword_id; - -UPDATE scrape_logs sl SET keyword_id = kw.id -FROM search_queries sq, keywords kw -WHERE sl.keyword_id = sq.id - AND LOWER(sq.keyword) = LOWER(kw.keyword); - -DO $$ -BEGIN - IF NOT EXISTS (SELECT 1 FROM information_schema.table_constraints WHERE constraint_name = 'fk_scrape_logs_keyword') THEN - ALTER TABLE scrape_logs ADD CONSTRAINT fk_scrape_logs_keyword - FOREIGN KEY (keyword_id) REFERENCES keywords(id) ON DELETE CASCADE; - END IF; -END $$; - -DROP TABLE IF EXISTS query_ads CASCADE; -DROP TABLE IF EXISTS search_queries CASCADE;