ingest: Accept RoomMessageImage events regardless of body content
WhatsApp bridge images arrive as RoomMessageImage events with an empty body field, so the previous .jpg/.jpeg extension check silently rejected all of them. Accept all RoomMessageImage events and fall back to "image.jpg" as filename when body is empty. File content is still validated via magic bytes before upload.
This commit is contained in:
parent
025228b83c
commit
eec2d076e4
1 changed files with 12 additions and 3 deletions
15
ingest.py
15
ingest.py
|
|
@ -206,7 +206,7 @@ def is_supported_file(event) -> bool:
|
|||
if isinstance(event, RoomMessageFile):
|
||||
return (event.body or "").lower().endswith(".pdf")
|
||||
if isinstance(event, RoomMessageImage):
|
||||
return (event.body or "").lower().endswith((".jpg", ".jpeg"))
|
||||
return True # validate magic bytes later; body may be empty (e.g. WhatsApp bridge)
|
||||
if isinstance(event, BadEvent):
|
||||
return _bad_event_encrypted_file_info(event) is not None
|
||||
return False
|
||||
|
|
@ -219,7 +219,8 @@ def extract_event_fields(event) -> tuple[str, str, str, Optional[str]]:
|
|||
filename = content.get("body", "unknown")
|
||||
file_info = content.get("file", {})
|
||||
return event.event_id, filename, file_info["url"], json.dumps(file_info)
|
||||
return event.event_id, event.body, event.url, None
|
||||
filename = event.body or "image.jpg"
|
||||
return event.event_id, filename, event.url, None
|
||||
|
||||
|
||||
def content_type_for(filename: str) -> str:
|
||||
|
|
@ -417,6 +418,8 @@ async def catchup_history(
|
|||
log.info("Starting historical catchup...")
|
||||
token = start_token
|
||||
total = 0
|
||||
batches = 0
|
||||
events_seen = 0
|
||||
|
||||
while True:
|
||||
response = await matrix_client.room_messages(
|
||||
|
|
@ -430,6 +433,9 @@ async def catchup_history(
|
|||
log.error("room_messages error: %s", response)
|
||||
break
|
||||
|
||||
batches += 1
|
||||
events_seen += len(response.chunk)
|
||||
|
||||
for event in response.chunk:
|
||||
if is_supported_file(event):
|
||||
total += 1
|
||||
|
|
@ -443,7 +449,10 @@ async def catchup_history(
|
|||
break
|
||||
token = response.end
|
||||
|
||||
log.info("Historical catchup complete — processed %d file event(s).", total)
|
||||
log.info(
|
||||
"Historical catchup complete — processed %d file event(s) across %d batches (%d total events).",
|
||||
total, batches, events_seen,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue