ingest: Accept RoomMessageImage events regardless of body content

WhatsApp bridge images arrive as RoomMessageImage events with an empty
body field, so the previous .jpg/.jpeg extension check silently rejected
all of them. Accept all RoomMessageImage events and fall back to
"image.jpg" as filename when body is empty. File content is still
validated via magic bytes before upload.
This commit is contained in:
Jeena 2026-03-11 23:32:15 +00:00
parent 025228b83c
commit eec2d076e4

View file

@ -206,7 +206,7 @@ def is_supported_file(event) -> bool:
if isinstance(event, RoomMessageFile): if isinstance(event, RoomMessageFile):
return (event.body or "").lower().endswith(".pdf") return (event.body or "").lower().endswith(".pdf")
if isinstance(event, RoomMessageImage): if isinstance(event, RoomMessageImage):
return (event.body or "").lower().endswith((".jpg", ".jpeg")) return True # validate magic bytes later; body may be empty (e.g. WhatsApp bridge)
if isinstance(event, BadEvent): if isinstance(event, BadEvent):
return _bad_event_encrypted_file_info(event) is not None return _bad_event_encrypted_file_info(event) is not None
return False return False
@ -219,7 +219,8 @@ def extract_event_fields(event) -> tuple[str, str, str, Optional[str]]:
filename = content.get("body", "unknown") filename = content.get("body", "unknown")
file_info = content.get("file", {}) file_info = content.get("file", {})
return event.event_id, filename, file_info["url"], json.dumps(file_info) return event.event_id, filename, file_info["url"], json.dumps(file_info)
return event.event_id, event.body, event.url, None filename = event.body or "image.jpg"
return event.event_id, filename, event.url, None
def content_type_for(filename: str) -> str: def content_type_for(filename: str) -> str:
@ -417,6 +418,8 @@ async def catchup_history(
log.info("Starting historical catchup...") log.info("Starting historical catchup...")
token = start_token token = start_token
total = 0 total = 0
batches = 0
events_seen = 0
while True: while True:
response = await matrix_client.room_messages( response = await matrix_client.room_messages(
@ -430,6 +433,9 @@ async def catchup_history(
log.error("room_messages error: %s", response) log.error("room_messages error: %s", response)
break break
batches += 1
events_seen += len(response.chunk)
for event in response.chunk: for event in response.chunk:
if is_supported_file(event): if is_supported_file(event):
total += 1 total += 1
@ -443,7 +449,10 @@ async def catchup_history(
break break
token = response.end token = response.end
log.info("Historical catchup complete — processed %d file event(s).", total) log.info(
"Historical catchup complete — processed %d file event(s) across %d batches (%d total events).",
total, batches, events_seen,
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------