ingest: Accept RoomMessageFile events regardless of body content

WhatsApp bridge files (e.g. PDFs) may arrive with an empty body field,
causing the previous .pdf extension check to silently skip them. Accept
all RoomMessageFile events and fall back to "document.pdf" as filename.
File content is still validated via magic bytes before upload.
This commit is contained in:
Jeena 2026-03-11 23:35:23 +00:00
parent eec2d076e4
commit 0aa044eead

View file

@ -204,7 +204,7 @@ def _bad_event_encrypted_file_info(event: BadEvent) -> Optional[dict]:
def is_supported_file(event) -> bool:
if isinstance(event, RoomMessageFile):
return (event.body or "").lower().endswith(".pdf")
return True # validate magic bytes later; body may be empty (e.g. WhatsApp bridge)
if isinstance(event, RoomMessageImage):
return True # validate magic bytes later; body may be empty (e.g. WhatsApp bridge)
if isinstance(event, BadEvent):
@ -219,7 +219,10 @@ def extract_event_fields(event) -> tuple[str, str, str, Optional[str]]:
filename = content.get("body", "unknown")
file_info = content.get("file", {})
return event.event_id, filename, file_info["url"], json.dumps(file_info)
filename = event.body or "image.jpg"
if isinstance(event, RoomMessageFile):
filename = event.body or "document.pdf"
else:
filename = event.body or "image.jpg"
return event.event_id, filename, event.url, None