fix: remove stale mark_notified import, full main.py scheduler refactor, fix scraper datetime+image extraction

This commit is contained in:
2026-06-17 08:27:34 +02:00
parent b93811bb1b
commit a21c310eeb
5 changed files with 752 additions and 120 deletions
+21 -3
View File
@@ -78,11 +78,26 @@ def extract_ad_fields(ad_dict: dict[str, Any]) -> dict[str, Any]:
# Published time from CHANGED_String or PUBLISHED_String (ISO 8601)
published_raw = attrs.get("PUBLISHED_String") or attrs.get("CHANGED_String")
published_at: str | None = None
published_at: datetime | None = None
if published_raw:
try:
dt = datetime.fromisoformat(published_raw.replace("Z", "+00:00"))
published_at = dt.isoformat()
published_at = datetime.fromisoformat(published_raw.replace("Z", "+00:00"))
except (ValueError, TypeError):
pass
# Main image from the first advertImage entry
images = ad_dict.get("advertImageList", {}).get("advertImage", [])
main_image_url: str | None = None
if images and isinstance(images[0], dict):
main_image_url = images[0].get("referenceImageUrl")
postcode = attrs.get("POSTCODE")
modified_raw = attrs.get("CHANGED_String")
modified_at: datetime | None = None
if modified_raw:
try:
modified_at = datetime.fromisoformat(modified_raw.replace("Z", "+00:00"))
except (ValueError, TypeError):
pass
@@ -93,4 +108,7 @@ def extract_ad_fields(ad_dict: dict[str, Any]) -> dict[str, Any]:
"location": location,
"url": url,
"published_at": published_at,
"main_image_url": main_image_url,
"postcode": postcode,
"modified_at": modified_at,
}