diff --git a/ingest.py b/ingest.py index 6659507..21b7a59 100644 --- a/ingest.py +++ b/ingest.py @@ -214,16 +214,32 @@ def is_supported_file(event) -> bool: return False +def _whatsapp_filename(ts_ms: int, is_pdf: bool, body: str) -> str: + """Generate a filename from the event timestamp, optionally prefixed with the body text.""" + from datetime import datetime, timezone + dt = datetime.fromtimestamp(ts_ms / 1000, tz=timezone.utc) + stamp = dt.strftime("%Y-%m-%d_%H-%M-%S") + ext = ".pdf" if is_pdf else ".jpg" + base = f"whatsapp_{stamp}{ext}" + if body: + return f"{body} - {base}" + return base + + def extract_event_fields(event) -> tuple[str, str, str, Optional[str], bool]: """Returns (event_id, filename, mxc_url, encryption_info_json_or_None, is_pdf).""" if isinstance(event, BadEvent): content = event.source.get("content", {}) - filename = content.get("body", "unknown") + body = content.get("body", "") file_info = content.get("file", {}) is_pdf = content.get("msgtype") == "m.file" + ext = ".pdf" if is_pdf else ".jpg" + filename = body if body.lower().endswith(ext) else _whatsapp_filename(event.server_timestamp, is_pdf, body) return event.event_id, filename, file_info["url"], json.dumps(file_info), is_pdf is_pdf = isinstance(event, RoomMessageFile) - filename = event.body or ("document.pdf" if is_pdf else "image.jpg") + ext = ".pdf" if is_pdf else ".jpg" + body = event.body or "" + filename = body if body.lower().endswith(ext) else _whatsapp_filename(event.server_timestamp, is_pdf, body) return event.event_id, filename, event.url, None, is_pdf @@ -500,7 +516,7 @@ async def retry_loop( for event_id, filename, mxc_url, enc_info in rows: log.info("Retrying %s (%s)", filename, event_id) - is_pdf = (filename or "").lower().endswith(".pdf") + is_pdf = filename.lower().endswith(".pdf") await process_event(event_id, filename, mxc_url, enc_info, is_pdf, matrix_client, db, paperless)