From f49ea1dbc506535ad60589e9ba0513781db631bd Mon Sep 17 00:00:00 2001 From: Jeena Date: Wed, 11 Mar 2026 23:55:00 +0000 Subject: [PATCH] ingest: Assign uploaded documents to a configurable Paperless owner The post_document endpoint does not support setting ownership on upload, so after a successful upload the document is PATCHed to set the owner. Add optional PAPERLESS_OWNER_ID env var. When set, every newly uploaded document is assigned to that Paperless user ID via PATCH /api/documents/{id}/. --- .env.example | 2 ++ ingest.py | 34 ++++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index 14a8157..6e0da38 100644 --- a/.env.example +++ b/.env.example @@ -7,6 +7,8 @@ MATRIX_ROOM_ID=!roomid:jeena.net PAPERLESS_URL=https://paperless.jeena.net PAPERLESS_TOKEN=your_paperless_api_token PAPERLESS_INBOX_TAG_ID=1 +# Optional: assign uploaded documents to this Paperless user ID +# PAPERLESS_OWNER_ID=7 # Optional: path to the SQLite state database (default: state.db next to the script) DB_PATH=state.db diff --git a/ingest.py b/ingest.py index 8f5a6b7..9b0d0ae 100644 --- a/ingest.py +++ b/ingest.py @@ -43,8 +43,7 @@ class _SSLSMTPHandler(logging.handlers.SMTPHandler): def emit(self, record: logging.LogRecord) -> None: import smtplib try: - host, port = self.mailhost - with smtplib.SMTP_SSL(host, port) as smtp: + with smtplib.SMTP_SSL(self.mailhost, self.mailport) as smtp: smtp.login(self.username, self.password) msg = self.format(record) smtp.sendmail(self.fromaddr, self.toaddrs, @@ -82,6 +81,9 @@ MATRIX_ROOM_ID = os.environ["MATRIX_ROOM_ID"] PAPERLESS_URL = os.environ["PAPERLESS_URL"].rstrip("/") PAPERLESS_TOKEN = os.environ["PAPERLESS_TOKEN"] PAPERLESS_INBOX_TAG_ID = int(os.environ["PAPERLESS_INBOX_TAG_ID"]) +PAPERLESS_OWNER_ID: Optional[int] = ( + int(os.environ["PAPERLESS_OWNER_ID"]) if os.environ.get("PAPERLESS_OWNER_ID") else None +) DB_PATH = os.environ.get("DB_PATH", "state.db") UPTIME_KUMA_PUSH_URL = os.environ.get("UPTIME_KUMA_PUSH_URL") @@ -247,10 +249,11 @@ def validate_file(path: Path, filename: str) -> None: # --------------------------------------------------------------------------- class PaperlessClient: - def __init__(self, base_url: str, token: str, inbox_tag_id: int) -> None: + def __init__(self, base_url: str, token: str, inbox_tag_id: int, owner_id: Optional[int] = None) -> None: self.base_url = base_url self.headers = {"Authorization": f"Token {token}"} self.inbox_tag_id = inbox_tag_id + self.owner_id = owner_id async def find_by_checksum(self, checksum: str) -> Optional[int]: """Return the Paperless document ID if the checksum already exists.""" @@ -319,6 +322,17 @@ class PaperlessClient: raise RuntimeError(f"Paperless task {task_id} timed out after {PAPERLESS_TASK_TIMEOUT}s") + async def set_owner(self, doc_id: int, owner_id: int) -> None: + """Set the owner of a document.""" + async with httpx.AsyncClient() as client: + r = await client.patch( + f"{self.base_url}/api/documents/{doc_id}/", + headers=self.headers, + json={"owner": owner_id}, + timeout=30, + ) + r.raise_for_status() + # --------------------------------------------------------------------------- # Core processing @@ -361,8 +375,13 @@ async def process_event( else: tmp.write(dl.body) - # Validate the file looks like what it claims to be - validate_file(tmp_path, filename) + # Validate the file looks like what it claims to be; skip unsupported formats + try: + validate_file(tmp_path, filename) + except ValueError as exc: + log.info("Skipping unsupported file %s: %s", filename, exc) + await upsert_event(db, event_id, filename, mxc_url, "skipped", encryption_info) + return # Deduplicate against Paperless by content checksum checksum = hashlib.md5(tmp_path.read_bytes()).hexdigest() @@ -385,6 +404,9 @@ async def process_event( encryption_info, checksum) else: log.info("Confirmed in Paperless as document id=%d: %s", doc_id, filename) + if paperless.owner_id is not None: + await paperless.set_owner(doc_id, paperless.owner_id) + log.info("Set owner of document id=%d to user id=%d", doc_id, paperless.owner_id) await upsert_event(db, event_id, filename, mxc_url, "uploaded", encryption_info, checksum, doc_id) @@ -505,7 +527,7 @@ async def heartbeat_loop() -> None: # --------------------------------------------------------------------------- async def main() -> None: - paperless = PaperlessClient(PAPERLESS_URL, PAPERLESS_TOKEN, PAPERLESS_INBOX_TAG_ID) + paperless = PaperlessClient(PAPERLESS_URL, PAPERLESS_TOKEN, PAPERLESS_INBOX_TAG_ID, PAPERLESS_OWNER_ID) async with aiosqlite.connect(DB_PATH) as db: await init_db(db)