From 0aa044eead0078173148665a9bfd506afa5f3574 Mon Sep 17 00:00:00 2001 From: Jeena Date: Wed, 11 Mar 2026 23:35:23 +0000 Subject: [PATCH] ingest: Accept RoomMessageFile events regardless of body content WhatsApp bridge files (e.g. PDFs) may arrive with an empty body field, causing the previous .pdf extension check to silently skip them. Accept all RoomMessageFile events and fall back to "document.pdf" as filename. File content is still validated via magic bytes before upload. --- ingest.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ingest.py b/ingest.py index fd9346c..8f5a6b7 100644 --- a/ingest.py +++ b/ingest.py @@ -204,7 +204,7 @@ def _bad_event_encrypted_file_info(event: BadEvent) -> Optional[dict]: def is_supported_file(event) -> bool: if isinstance(event, RoomMessageFile): - return (event.body or "").lower().endswith(".pdf") + return True # validate magic bytes later; body may be empty (e.g. WhatsApp bridge) if isinstance(event, RoomMessageImage): return True # validate magic bytes later; body may be empty (e.g. WhatsApp bridge) if isinstance(event, BadEvent): @@ -219,7 +219,10 @@ def extract_event_fields(event) -> tuple[str, str, str, Optional[str]]: filename = content.get("body", "unknown") file_info = content.get("file", {}) return event.event_id, filename, file_info["url"], json.dumps(file_info) - filename = event.body or "image.jpg" + if isinstance(event, RoomMessageFile): + filename = event.body or "document.pdf" + else: + filename = event.body or "image.jpg" return event.event_id, filename, event.url, None