Feat/incremental sync #1

Merged
Lago merged 7 commits from feat/incremental-sync into main 2026-06-12 10:06:42 +02:00
Showing only changes of commit 64d078f457 - Show all commits
+212 -154
View File
@@ -1,182 +1,240 @@
import os
import signal
import time
import hashlib
import logging
import threading
import requests import requests
import caldav import caldav
from caldav.elements import dav, cdav from datetime import datetime, timezone
from datetime import datetime
import os
import time
import concurrent.futures
# --- CONFIGURACIÓN --- from config import validate, HEADERS, Config
# Default to 5 minutes from state import SyncState
SYNC_FREQUENCY_MINUTES = int(os.getenv("SYNC_FREQUENCY", 5)) from diff import parse_ics_events, compute_diff, parse_ics_events_with_data
SYNC_FREQUENCY_SECONDS = SYNC_FREQUENCY_MINUTES * 60 from apply import apply_adds, apply_updates, apply_deletes
from health import HealthServer
# Tu URL de Outlook logger = logging.getLogger(__name__)
ICS_URL = os.getenv("ICS_URL") shutdown_event = threading.Event()
# Tu Baïkal
BAIKAL_URL = os.getenv("BAIKAL_URL")
BAIKAL_USER = os.getenv("BAIKAL_USER")
BAIKAL_PASS = os.getenv("BAIKAL_PASS")
CALENDAR_ID = os.getenv("CALENDAR_ID")
# Headers para parecer un navegador real y evitar 'Connection Reset'
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Unraid-Sync/1.0"
}
def normalize_url(url): def setup_logging():
"""Normalize URLs to make matching robust against trailing slash differences.""" logging.basicConfig(
return str(url).strip().rstrip("/") level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def find_calendar_by_url(calendars, target_url): def find_calendar(client, config):
"""Find a calendar whose URL matches the configured Baikal calendar URL.""" principal = client.principal()
normalized_target = normalize_url(target_url) calendars = principal.calendars()
calendar_id = os.environ.get("CALENDAR_ID")
if calendar_id:
for cal in calendars:
if calendar_id in str(cal.url):
return cal
logger.error("Calendar with ID '%s' not found", calendar_id)
for c in calendars:
logger.error(" Available: %s", c.url)
return None
target = config.baikal_url.rstrip("/")
for cal in calendars: for cal in calendars:
if normalize_url(cal.url) == normalized_target: if target in str(cal.url) or str(cal.url).rstrip("/") == target:
return cal return cal
if calendars:
return calendars[0]
return None return None
def delete_event(event):
"""Helper function to delete a single event.""" def sync_once(state: SyncState, health: HealthServer, config: Config) -> bool:
start_time = time.time()
logger.info("Starting sync cycle...")
try: try:
event.delete() r = requests.head(config.ics_url, headers=HEADERS, timeout=30, allow_redirects=True)
return True r.raise_for_status()
except Exception as e: remote_etag = r.headers.get("ETag")
print(f"!!! Error deleting event {event}: {e}") cached_hash, cached_etag, _ = state.get_ics_cache()
if remote_etag and cached_etag == remote_etag:
logger.info("No changes detected (ETag match). Skipping sync.")
return True
response = requests.get(config.ics_url, headers=HEADERS, timeout=30)
response.raise_for_status()
ics_text = response.text
ics_hash = hashlib.sha256(ics_text.encode("utf-8")).hexdigest()
if cached_hash == ics_hash:
logger.info("No changes detected (hash match). Skipping sync.")
if remote_etag:
state.set_ics_cache(ics_hash, remote_etag)
return True
state.set_ics_cache(ics_hash, remote_etag)
logger.info("ICS changed. Downloaded %d bytes, hash %s", len(ics_text), ics_hash[:12])
ics_uids = parse_ics_events(ics_text)
known_uids = {}
for uid in state.get_event_uids():
h = state.get_event_hash(uid)
if h:
known_uids[uid] = h
deltas = compute_diff(ics_uids, known_uids)
to_add = deltas["to_add"]
to_update = deltas["to_update"]
to_delete = deltas["to_delete"]
if not to_add and not to_update and not to_delete:
logger.info("Calendar is already in sync.")
duration = time.time() - start_time
health.update_status(
datetime.now(timezone.utc),
duration,
True,
len(ics_uids),
)
return True
logger.info(
"Delta: %d to add, %d to update, %d to delete",
len(to_add),
len(to_update),
len(to_delete),
)
client = caldav.DAVClient(
url=config.baikal_url,
username=config.baikal_user,
password=config.baikal_pass,
headers=HEADERS,
ssl_verify_cert=True,
)
calendar = find_calendar(client, config)
if not calendar:
logger.error("Failed to find calendar")
duration = time.time() - start_time
health.update_status(
datetime.now(timezone.utc),
duration,
False,
0,
)
return False
snapshot = state.snapshot()
events_data = parse_ics_events_with_data(ics_text)
add_events = {uid: events_data[uid] for uid, _ in to_add if uid in events_data}
update_events = {uid: events_data[uid] for uid, _ in to_update if uid in events_data}
delete_uids = to_delete
try:
logger.info("Phase 1: Adding %d events...", len(add_events))
if add_events:
s, e = apply_adds(calendar, add_events)
logger.info("Added %d/%d events (%d errors)", s, len(add_events), e)
logger.info("Phase 2: Updating %d events...", len(update_events))
if update_events:
s, e = apply_updates(calendar, update_events)
logger.info("Updated %d/%d events (%d errors)", s, len(update_events), e)
logger.info("Phase 3: Deleting %d events...", len(delete_uids))
if delete_uids:
s, e = apply_deletes(calendar, delete_uids)
logger.info("Deleted %d/%d events (%d errors)", s, len(delete_uids), e)
for uid, h in ics_uids.items():
state.upsert_event(uid, h)
for uid in delete_uids:
state.delete_event(uid)
total = len(ics_uids)
duration = time.time() - start_time
logger.info("Sync completed in %.1fs. Total events: %d", duration, total)
health.update_status(
datetime.now(timezone.utc),
duration,
True,
total,
)
return True
except Exception as exc:
logger.error("Sync failed: %s. Rolling back state.", exc)
state.restore_snapshot(snapshot)
duration = time.time() - start_time
health.update_status(
datetime.now(timezone.utc),
duration,
False,
0,
)
return False
except Exception as exc:
logger.error("Sync error: %s", exc)
duration = time.time() - start_time
health.update_status(
datetime.now(timezone.utc),
duration,
False,
0,
)
return False return False
def delete_all_events(calendar):
""" def main():
Deletes all events in the calendar as fast as possible using threads. setup_logging()
""" logger.info("Starting Baikal Sync service...")
print("-> Buscando eventos para borrar...")
try: try:
events = calendar.events() config = validate()
except Exception as e: except ValueError as exc:
print(f"!!! Error al obtener eventos: {e}") logger.error("Configuration error: %s", exc)
return raise SystemExit(1)
total_events = len(events) logger.info("Sync frequency: %d minutes", config.sync_frequency)
if total_events == 0: state = SyncState("./sync.db")
print("-> El calendario ya está vacío.") health = HealthServer(8081)
return health.start()
logger.info("Health endpoint on :8081")
print(f"-> Borrando {total_events} eventos rápidamente...") backoff = 0
# Usamos ThreadPoolExecutor para borrar en paralelo
deleted_count = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
results = list(executor.map(delete_event, events))
deleted_count = results.count(True)
print(f"-> Limpieza completada. Borrados {deleted_count}/{total_events} eventos.") def handle_signal(signum, frame):
logger.info("Received signal %s. Shutting down...", signum)
shutdown_event.set()
def sync(): signal.signal(signal.SIGTERM, handle_signal)
if not all([ICS_URL, BAIKAL_URL, BAIKAL_USER, BAIKAL_PASS]): signal.signal(signal.SIGINT, handle_signal)
print(f"[{datetime.now()}] !!! Error: Faltan variables de entorno. Asegúrate de configurar ICS_URL, BAIKAL_URL, BAIKAL_USER y BAIKAL_PASS.")
return
print(f"[{datetime.now()}] Iniciando sincronización...") while not shutdown_event.is_set():
success = sync_once(state, health, config)
# 1. Descargar ICS de Outlook
print("-> Descargando calendario de Outlook...")
try:
response = requests.get(ICS_URL, headers=HEADERS, timeout=30)
response.raise_for_status()
ics_data = response.text
print(f"-> Descarga exitosa ({len(ics_data)} bytes).")
except Exception as e:
print(f"!!! Error descargando Outlook: {e}")
return
# 2. Conectar a Baïkal if success:
print("-> Conectando a Baïkal...") backoff = 0
try: sleep_time = config.sync_frequency * 60
client = caldav.DAVClient(
url=BAIKAL_URL,
username=BAIKAL_USER,
password=BAIKAL_PASS,
headers=HEADERS, # Clave para evitar el bloqueo
ssl_verify_cert=True # Cambiar a False si tienes problemas de certificado SSL auto-firmado
)
principal = client.principal()
calendars = principal.calendars()
# Buscar el calendario correcto por ID si se proporciona
calendar = None
if CALENDAR_ID:
print(f"-> Buscando calendario con ID: {CALENDAR_ID}")
for cal in calendars:
# Comprobamos si el ID está en la URL del calendario
if CALENDAR_ID in str(cal.url):
calendar = cal
break
if not calendar:
print(f"!!! Error: No se encontró ningún calendario con el ID '{CALENDAR_ID}'. Calendarios disponibles:")
for c in calendars:
print(f" - {c.url}")
return
else: else:
# Si no hay CALENDAR_ID, seleccionar por BAIKAL_URL backoff = max(1, min(backoff * 2 if backoff > 0 else 1, 30))
if not calendars: sleep_time = backoff * 60
print("!!! No se encontró ningún calendario en esa URL.") logger.info("Sync failed. Backing off %d minutes...", backoff)
return
calendar = find_calendar_by_url(calendars, BAIKAL_URL) logger.info("Next sync in %d seconds...", sleep_time)
if not calendar: shutdown_event.wait(sleep_time)
print("!!! Error: No se encontró un calendario que coincida con BAIKAL_URL.")
print(f" BAIKAL_URL configurado: {BAIKAL_URL}")
print(" Calendarios disponibles:")
for c in calendars:
print(f" - {c.url}")
return
print(f"-> Calendario seleccionado: {calendar}") state.close()
print(f"-> URL del calendario seleccionado: {calendar.url}") health.stop()
logger.info("Shutdown complete.")
# 3. Borrar eventos antiguos (NUEVO)
delete_all_events(calendar)
# 4. Importar eventos
print("-> Procesando archivo ICS...")
from icalendar import Calendar
cal = Calendar.from_ical(ics_data)
events = cal.walk('vevent')
total_events = len(events)
print(f"-> Encontrados {total_events} eventos para importar.")
success_count = 0
error_count = 0
for i, component in enumerate(events, 1):
try:
# Intentamos pasar el string decodificado
calendar.add_event(component.to_ical().decode('utf-8'))
success_count += 1
except Exception as ev_err:
error_count += 1
# Solo imprimimos los primeros 5 errores para no saturar
if error_count <= 5:
summary = component.get('summary', 'sin titulo')
print(f"!!! Error ({i}/{total_events}) '{summary}': {ev_err}")
# Print progress every 50 events
if i % 50 == 0:
print(f" Procesados {i}/{total_events} (Exitos: {success_count}, Errores: {error_count})")
print(f"-> ¡Sincronización finalizada! Éxitos: {success_count}, Errores: {error_count}")
except Exception as e:
print(f"!!! Error en Baïkal: {e}")
if __name__ == "__main__": if __name__ == "__main__":
print(f"Iniciando servicio de sincronización. Frecuencia: {SYNC_FREQUENCY_MINUTES} minutos ({SYNC_FREQUENCY_SECONDS} segundos).") main()
while True:
sync()
print(f"[{datetime.now()}] Durmiendo {SYNC_FREQUENCY_MINUTES} minutos...")
time.sleep(SYNC_FREQUENCY_SECONDS)