ingest: Accept RoomMessageImage events regardless of body content

WhatsApp bridge images arrive as RoomMessageImage events with an empty
body field, so the previous .jpg/.jpeg extension check silently rejected
all of them. Accept all RoomMessageImage events and fall back to
"image.jpg" as filename when body is empty. File content is still
validated via magic bytes before upload.
This commit is contained in:
Jeena 2026-03-11 23:32:15 +00:00
parent 025228b83c
commit eec2d076e4

View file

@ -206,7 +206,7 @@ def is_supported_file(event) -> bool:
if isinstance(event, RoomMessageFile):
return (event.body or "").lower().endswith(".pdf")
if isinstance(event, RoomMessageImage):
return (event.body or "").lower().endswith((".jpg", ".jpeg"))
return True # validate magic bytes later; body may be empty (e.g. WhatsApp bridge)
if isinstance(event, BadEvent):
return _bad_event_encrypted_file_info(event) is not None
return False
@ -219,7 +219,8 @@ def extract_event_fields(event) -> tuple[str, str, str, Optional[str]]:
filename = content.get("body", "unknown")
file_info = content.get("file", {})
return event.event_id, filename, file_info["url"], json.dumps(file_info)
return event.event_id, event.body, event.url, None
filename = event.body or "image.jpg"
return event.event_id, filename, event.url, None
def content_type_for(filename: str) -> str:
@ -417,6 +418,8 @@ async def catchup_history(
log.info("Starting historical catchup...")
token = start_token
total = 0
batches = 0
events_seen = 0
while True:
response = await matrix_client.room_messages(
@ -430,6 +433,9 @@ async def catchup_history(
log.error("room_messages error: %s", response)
break
batches += 1
events_seen += len(response.chunk)
for event in response.chunk:
if is_supported_file(event):
total += 1
@ -443,7 +449,10 @@ async def catchup_history(
break
token = response.end
log.info("Historical catchup complete — processed %d file event(s).", total)
log.info(
"Historical catchup complete — processed %d file event(s) across %d batches (%d total events).",
total, batches, events_seen,
)
# ---------------------------------------------------------------------------