diff --git a/ingest.py b/ingest.py index fd9346c..8f5a6b7 100644 --- a/ingest.py +++ b/ingest.py @@ -204,7 +204,7 @@ def _bad_event_encrypted_file_info(event: BadEvent) -> Optional[dict]: def is_supported_file(event) -> bool: if isinstance(event, RoomMessageFile): - return (event.body or "").lower().endswith(".pdf") + return True # validate magic bytes later; body may be empty (e.g. WhatsApp bridge) if isinstance(event, RoomMessageImage): return True # validate magic bytes later; body may be empty (e.g. WhatsApp bridge) if isinstance(event, BadEvent): @@ -219,7 +219,10 @@ def extract_event_fields(event) -> tuple[str, str, str, Optional[str]]: filename = content.get("body", "unknown") file_info = content.get("file", {}) return event.event_id, filename, file_info["url"], json.dumps(file_info) - filename = event.body or "image.jpg" + if isinstance(event, RoomMessageFile): + filename = event.body or "document.pdf" + else: + filename = event.body or "image.jpg" return event.event_id, filename, event.url, None