Feat/incremental sync #1

Merged
Lago merged 7 commits from feat/incremental-sync into main 2026-06-12 10:06:42 +02:00
Showing only changes of commit 64d078f457 - Show all commits
+216 -158
View File
@@ -1,182 +1,240 @@
import os
import signal
import time
import hashlib
import logging
import threading
import requests
import caldav
from caldav.elements import dav, cdav
from datetime import datetime
import os
import time
import concurrent.futures
from datetime import datetime, timezone
# --- CONFIGURACIÓN ---
# Default to 5 minutes
SYNC_FREQUENCY_MINUTES = int(os.getenv("SYNC_FREQUENCY", 5))
SYNC_FREQUENCY_SECONDS = SYNC_FREQUENCY_MINUTES * 60
from config import validate, HEADERS, Config
from state import SyncState
from diff import parse_ics_events, compute_diff, parse_ics_events_with_data
from apply import apply_adds, apply_updates, apply_deletes
from health import HealthServer
# Tu URL de Outlook
ICS_URL = os.getenv("ICS_URL")
# Tu Baïkal
BAIKAL_URL = os.getenv("BAIKAL_URL")
BAIKAL_USER = os.getenv("BAIKAL_USER")
BAIKAL_PASS = os.getenv("BAIKAL_PASS")
CALENDAR_ID = os.getenv("CALENDAR_ID")
# Headers para parecer un navegador real y evitar 'Connection Reset'
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Unraid-Sync/1.0"
}
logger = logging.getLogger(__name__)
shutdown_event = threading.Event()
def normalize_url(url):
"""Normalize URLs to make matching robust against trailing slash differences."""
return str(url).strip().rstrip("/")
def find_calendar_by_url(calendars, target_url):
"""Find a calendar whose URL matches the configured Baikal calendar URL."""
normalized_target = normalize_url(target_url)
for cal in calendars:
if normalize_url(cal.url) == normalized_target:
return cal
return None
def delete_event(event):
"""Helper function to delete a single event."""
try:
event.delete()
return True
except Exception as e:
print(f"!!! Error deleting event {event}: {e}")
return False
def delete_all_events(calendar):
"""
Deletes all events in the calendar as fast as possible using threads.
"""
print("-> Buscando eventos para borrar...")
try:
events = calendar.events()
except Exception as e:
print(f"!!! Error al obtener eventos: {e}")
return
total_events = len(events)
if total_events == 0:
print("-> El calendario ya está vacío.")
return
print(f"-> Borrando {total_events} eventos rápidamente...")
# Usamos ThreadPoolExecutor para borrar en paralelo
deleted_count = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
results = list(executor.map(delete_event, events))
deleted_count = results.count(True)
print(f"-> Limpieza completada. Borrados {deleted_count}/{total_events} eventos.")
def sync():
if not all([ICS_URL, BAIKAL_URL, BAIKAL_USER, BAIKAL_PASS]):
print(f"[{datetime.now()}] !!! Error: Faltan variables de entorno. Asegúrate de configurar ICS_URL, BAIKAL_URL, BAIKAL_USER y BAIKAL_PASS.")
return
print(f"[{datetime.now()}] Iniciando sincronización...")
# 1. Descargar ICS de Outlook
print("-> Descargando calendario de Outlook...")
try:
response = requests.get(ICS_URL, headers=HEADERS, timeout=30)
response.raise_for_status()
ics_data = response.text
print(f"-> Descarga exitosa ({len(ics_data)} bytes).")
except Exception as e:
print(f"!!! Error descargando Outlook: {e}")
return
# 2. Conectar a Baïkal
print("-> Conectando a Baïkal...")
try:
client = caldav.DAVClient(
url=BAIKAL_URL,
username=BAIKAL_USER,
password=BAIKAL_PASS,
headers=HEADERS, # Clave para evitar el bloqueo
ssl_verify_cert=True # Cambiar a False si tienes problemas de certificado SSL auto-firmado
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def find_calendar(client, config):
principal = client.principal()
calendars = principal.calendars()
calendar_id = os.environ.get("CALENDAR_ID")
# Buscar el calendario correcto por ID si se proporciona
calendar = None
if CALENDAR_ID:
print(f"-> Buscando calendario con ID: {CALENDAR_ID}")
if calendar_id:
for cal in calendars:
# Comprobamos si el ID está en la URL del calendario
if CALENDAR_ID in str(cal.url):
calendar = cal
break
if not calendar:
print(f"!!! Error: No se encontró ningún calendario con el ID '{CALENDAR_ID}'. Calendarios disponibles:")
if calendar_id in str(cal.url):
return cal
logger.error("Calendar with ID '%s' not found", calendar_id)
for c in calendars:
print(f" - {c.url}")
return
else:
# Si no hay CALENDAR_ID, seleccionar por BAIKAL_URL
if not calendars:
print("!!! No se encontró ningún calendario en esa URL.")
return
logger.error(" Available: %s", c.url)
return None
calendar = find_calendar_by_url(calendars, BAIKAL_URL)
if not calendar:
print("!!! Error: No se encontró un calendario que coincida con BAIKAL_URL.")
print(f" BAIKAL_URL configurado: {BAIKAL_URL}")
print(" Calendarios disponibles:")
for c in calendars:
print(f" - {c.url}")
return
target = config.baikal_url.rstrip("/")
for cal in calendars:
if target in str(cal.url) or str(cal.url).rstrip("/") == target:
return cal
print(f"-> Calendario seleccionado: {calendar}")
print(f"-> URL del calendario seleccionado: {calendar.url}")
if calendars:
return calendars[0]
# 3. Borrar eventos antiguos (NUEVO)
delete_all_events(calendar)
return None
# 4. Importar eventos
print("-> Procesando archivo ICS...")
from icalendar import Calendar
cal = Calendar.from_ical(ics_data)
events = cal.walk('vevent')
total_events = len(events)
print(f"-> Encontrados {total_events} eventos para importar.")
def sync_once(state: SyncState, health: HealthServer, config: Config) -> bool:
start_time = time.time()
logger.info("Starting sync cycle...")
success_count = 0
error_count = 0
for i, component in enumerate(events, 1):
try:
# Intentamos pasar el string decodificado
calendar.add_event(component.to_ical().decode('utf-8'))
success_count += 1
except Exception as ev_err:
error_count += 1
# Solo imprimimos los primeros 5 errores para no saturar
if error_count <= 5:
summary = component.get('summary', 'sin titulo')
print(f"!!! Error ({i}/{total_events}) '{summary}': {ev_err}")
r = requests.head(config.ics_url, headers=HEADERS, timeout=30, allow_redirects=True)
r.raise_for_status()
remote_etag = r.headers.get("ETag")
cached_hash, cached_etag, _ = state.get_ics_cache()
# Print progress every 50 events
if i % 50 == 0:
print(f" Procesados {i}/{total_events} (Exitos: {success_count}, Errores: {error_count})")
if remote_etag and cached_etag == remote_etag:
logger.info("No changes detected (ETag match). Skipping sync.")
return True
print(f"-> ¡Sincronización finalizada! Éxitos: {success_count}, Errores: {error_count}")
response = requests.get(config.ics_url, headers=HEADERS, timeout=30)
response.raise_for_status()
ics_text = response.text
ics_hash = hashlib.sha256(ics_text.encode("utf-8")).hexdigest()
if cached_hash == ics_hash:
logger.info("No changes detected (hash match). Skipping sync.")
if remote_etag:
state.set_ics_cache(ics_hash, remote_etag)
return True
state.set_ics_cache(ics_hash, remote_etag)
logger.info("ICS changed. Downloaded %d bytes, hash %s", len(ics_text), ics_hash[:12])
ics_uids = parse_ics_events(ics_text)
known_uids = {}
for uid in state.get_event_uids():
h = state.get_event_hash(uid)
if h:
known_uids[uid] = h
deltas = compute_diff(ics_uids, known_uids)
to_add = deltas["to_add"]
to_update = deltas["to_update"]
to_delete = deltas["to_delete"]
if not to_add and not to_update and not to_delete:
logger.info("Calendar is already in sync.")
duration = time.time() - start_time
health.update_status(
datetime.now(timezone.utc),
duration,
True,
len(ics_uids),
)
return True
logger.info(
"Delta: %d to add, %d to update, %d to delete",
len(to_add),
len(to_update),
len(to_delete),
)
client = caldav.DAVClient(
url=config.baikal_url,
username=config.baikal_user,
password=config.baikal_pass,
headers=HEADERS,
ssl_verify_cert=True,
)
calendar = find_calendar(client, config)
if not calendar:
logger.error("Failed to find calendar")
duration = time.time() - start_time
health.update_status(
datetime.now(timezone.utc),
duration,
False,
0,
)
return False
snapshot = state.snapshot()
events_data = parse_ics_events_with_data(ics_text)
add_events = {uid: events_data[uid] for uid, _ in to_add if uid in events_data}
update_events = {uid: events_data[uid] for uid, _ in to_update if uid in events_data}
delete_uids = to_delete
try:
logger.info("Phase 1: Adding %d events...", len(add_events))
if add_events:
s, e = apply_adds(calendar, add_events)
logger.info("Added %d/%d events (%d errors)", s, len(add_events), e)
logger.info("Phase 2: Updating %d events...", len(update_events))
if update_events:
s, e = apply_updates(calendar, update_events)
logger.info("Updated %d/%d events (%d errors)", s, len(update_events), e)
logger.info("Phase 3: Deleting %d events...", len(delete_uids))
if delete_uids:
s, e = apply_deletes(calendar, delete_uids)
logger.info("Deleted %d/%d events (%d errors)", s, len(delete_uids), e)
for uid, h in ics_uids.items():
state.upsert_event(uid, h)
for uid in delete_uids:
state.delete_event(uid)
total = len(ics_uids)
duration = time.time() - start_time
logger.info("Sync completed in %.1fs. Total events: %d", duration, total)
health.update_status(
datetime.now(timezone.utc),
duration,
True,
total,
)
return True
except Exception as exc:
logger.error("Sync failed: %s. Rolling back state.", exc)
state.restore_snapshot(snapshot)
duration = time.time() - start_time
health.update_status(
datetime.now(timezone.utc),
duration,
False,
0,
)
return False
except Exception as exc:
logger.error("Sync error: %s", exc)
duration = time.time() - start_time
health.update_status(
datetime.now(timezone.utc),
duration,
False,
0,
)
return False
def main():
setup_logging()
logger.info("Starting Baikal Sync service...")
try:
config = validate()
except ValueError as exc:
logger.error("Configuration error: %s", exc)
raise SystemExit(1)
logger.info("Sync frequency: %d minutes", config.sync_frequency)
state = SyncState("./sync.db")
health = HealthServer(8081)
health.start()
logger.info("Health endpoint on :8081")
backoff = 0
def handle_signal(signum, frame):
logger.info("Received signal %s. Shutting down...", signum)
shutdown_event.set()
signal.signal(signal.SIGTERM, handle_signal)
signal.signal(signal.SIGINT, handle_signal)
while not shutdown_event.is_set():
success = sync_once(state, health, config)
if success:
backoff = 0
sleep_time = config.sync_frequency * 60
else:
backoff = max(1, min(backoff * 2 if backoff > 0 else 1, 30))
sleep_time = backoff * 60
logger.info("Sync failed. Backing off %d minutes...", backoff)
logger.info("Next sync in %d seconds...", sleep_time)
shutdown_event.wait(sleep_time)
state.close()
health.stop()
logger.info("Shutdown complete.")
except Exception as e:
print(f"!!! Error en Baïkal: {e}")
if __name__ == "__main__":
print(f"Iniciando servicio de sincronización. Frecuencia: {SYNC_FREQUENCY_MINUTES} minutos ({SYNC_FREQUENCY_SECONDS} segundos).")
while True:
sync()
print(f"[{datetime.now()}] Durmiendo {SYNC_FREQUENCY_MINUTES} minutos...")
time.sleep(SYNC_FREQUENCY_SECONDS)
main()