fix: remove stale mark_notified import, full main.py scheduler refactor, fix scraper datetime+image extraction
This commit is contained in:
+21
-3
@@ -78,11 +78,26 @@ def extract_ad_fields(ad_dict: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
# Published time from CHANGED_String or PUBLISHED_String (ISO 8601)
|
||||
published_raw = attrs.get("PUBLISHED_String") or attrs.get("CHANGED_String")
|
||||
published_at: str | None = None
|
||||
published_at: datetime | None = None
|
||||
if published_raw:
|
||||
try:
|
||||
dt = datetime.fromisoformat(published_raw.replace("Z", "+00:00"))
|
||||
published_at = dt.isoformat()
|
||||
published_at = datetime.fromisoformat(published_raw.replace("Z", "+00:00"))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# Main image from the first advertImage entry
|
||||
images = ad_dict.get("advertImageList", {}).get("advertImage", [])
|
||||
main_image_url: str | None = None
|
||||
if images and isinstance(images[0], dict):
|
||||
main_image_url = images[0].get("referenceImageUrl")
|
||||
|
||||
postcode = attrs.get("POSTCODE")
|
||||
|
||||
modified_raw = attrs.get("CHANGED_String")
|
||||
modified_at: datetime | None = None
|
||||
if modified_raw:
|
||||
try:
|
||||
modified_at = datetime.fromisoformat(modified_raw.replace("Z", "+00:00"))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
@@ -93,4 +108,7 @@ def extract_ad_fields(ad_dict: dict[str, Any]) -> dict[str, Any]:
|
||||
"location": location,
|
||||
"url": url,
|
||||
"published_at": published_at,
|
||||
"main_image_url": main_image_url,
|
||||
"postcode": postcode,
|
||||
"modified_at": modified_at,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user