ingest: Assign uploaded documents to a configurable Paperless owner

The post_document endpoint does not support setting ownership on upload,
so after a successful upload the document is PATCHed to set the owner.

Add optional PAPERLESS_OWNER_ID env var. When set, every newly uploaded
document is assigned to that Paperless user ID via PATCH /api/documents/{id}/.
This commit is contained in:
Jeena 2026-03-11 23:55:00 +00:00
parent 0aa044eead
commit f49ea1dbc5
2 changed files with 30 additions and 6 deletions

View file

@ -7,6 +7,8 @@ MATRIX_ROOM_ID=!roomid:jeena.net
PAPERLESS_URL=https://paperless.jeena.net PAPERLESS_URL=https://paperless.jeena.net
PAPERLESS_TOKEN=your_paperless_api_token PAPERLESS_TOKEN=your_paperless_api_token
PAPERLESS_INBOX_TAG_ID=1 PAPERLESS_INBOX_TAG_ID=1
# Optional: assign uploaded documents to this Paperless user ID
# PAPERLESS_OWNER_ID=7
# Optional: path to the SQLite state database (default: state.db next to the script) # Optional: path to the SQLite state database (default: state.db next to the script)
DB_PATH=state.db DB_PATH=state.db

View file

@ -43,8 +43,7 @@ class _SSLSMTPHandler(logging.handlers.SMTPHandler):
def emit(self, record: logging.LogRecord) -> None: def emit(self, record: logging.LogRecord) -> None:
import smtplib import smtplib
try: try:
host, port = self.mailhost with smtplib.SMTP_SSL(self.mailhost, self.mailport) as smtp:
with smtplib.SMTP_SSL(host, port) as smtp:
smtp.login(self.username, self.password) smtp.login(self.username, self.password)
msg = self.format(record) msg = self.format(record)
smtp.sendmail(self.fromaddr, self.toaddrs, smtp.sendmail(self.fromaddr, self.toaddrs,
@ -82,6 +81,9 @@ MATRIX_ROOM_ID = os.environ["MATRIX_ROOM_ID"]
PAPERLESS_URL = os.environ["PAPERLESS_URL"].rstrip("/") PAPERLESS_URL = os.environ["PAPERLESS_URL"].rstrip("/")
PAPERLESS_TOKEN = os.environ["PAPERLESS_TOKEN"] PAPERLESS_TOKEN = os.environ["PAPERLESS_TOKEN"]
PAPERLESS_INBOX_TAG_ID = int(os.environ["PAPERLESS_INBOX_TAG_ID"]) PAPERLESS_INBOX_TAG_ID = int(os.environ["PAPERLESS_INBOX_TAG_ID"])
PAPERLESS_OWNER_ID: Optional[int] = (
int(os.environ["PAPERLESS_OWNER_ID"]) if os.environ.get("PAPERLESS_OWNER_ID") else None
)
DB_PATH = os.environ.get("DB_PATH", "state.db") DB_PATH = os.environ.get("DB_PATH", "state.db")
UPTIME_KUMA_PUSH_URL = os.environ.get("UPTIME_KUMA_PUSH_URL") UPTIME_KUMA_PUSH_URL = os.environ.get("UPTIME_KUMA_PUSH_URL")
@ -247,10 +249,11 @@ def validate_file(path: Path, filename: str) -> None:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
class PaperlessClient: class PaperlessClient:
def __init__(self, base_url: str, token: str, inbox_tag_id: int) -> None: def __init__(self, base_url: str, token: str, inbox_tag_id: int, owner_id: Optional[int] = None) -> None:
self.base_url = base_url self.base_url = base_url
self.headers = {"Authorization": f"Token {token}"} self.headers = {"Authorization": f"Token {token}"}
self.inbox_tag_id = inbox_tag_id self.inbox_tag_id = inbox_tag_id
self.owner_id = owner_id
async def find_by_checksum(self, checksum: str) -> Optional[int]: async def find_by_checksum(self, checksum: str) -> Optional[int]:
"""Return the Paperless document ID if the checksum already exists.""" """Return the Paperless document ID if the checksum already exists."""
@ -319,6 +322,17 @@ class PaperlessClient:
raise RuntimeError(f"Paperless task {task_id} timed out after {PAPERLESS_TASK_TIMEOUT}s") raise RuntimeError(f"Paperless task {task_id} timed out after {PAPERLESS_TASK_TIMEOUT}s")
async def set_owner(self, doc_id: int, owner_id: int) -> None:
"""Set the owner of a document."""
async with httpx.AsyncClient() as client:
r = await client.patch(
f"{self.base_url}/api/documents/{doc_id}/",
headers=self.headers,
json={"owner": owner_id},
timeout=30,
)
r.raise_for_status()
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Core processing # Core processing
@ -361,8 +375,13 @@ async def process_event(
else: else:
tmp.write(dl.body) tmp.write(dl.body)
# Validate the file looks like what it claims to be # Validate the file looks like what it claims to be; skip unsupported formats
validate_file(tmp_path, filename) try:
validate_file(tmp_path, filename)
except ValueError as exc:
log.info("Skipping unsupported file %s: %s", filename, exc)
await upsert_event(db, event_id, filename, mxc_url, "skipped", encryption_info)
return
# Deduplicate against Paperless by content checksum # Deduplicate against Paperless by content checksum
checksum = hashlib.md5(tmp_path.read_bytes()).hexdigest() checksum = hashlib.md5(tmp_path.read_bytes()).hexdigest()
@ -385,6 +404,9 @@ async def process_event(
encryption_info, checksum) encryption_info, checksum)
else: else:
log.info("Confirmed in Paperless as document id=%d: %s", doc_id, filename) log.info("Confirmed in Paperless as document id=%d: %s", doc_id, filename)
if paperless.owner_id is not None:
await paperless.set_owner(doc_id, paperless.owner_id)
log.info("Set owner of document id=%d to user id=%d", doc_id, paperless.owner_id)
await upsert_event(db, event_id, filename, mxc_url, "uploaded", await upsert_event(db, event_id, filename, mxc_url, "uploaded",
encryption_info, checksum, doc_id) encryption_info, checksum, doc_id)
@ -505,7 +527,7 @@ async def heartbeat_loop() -> None:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
async def main() -> None: async def main() -> None:
paperless = PaperlessClient(PAPERLESS_URL, PAPERLESS_TOKEN, PAPERLESS_INBOX_TAG_ID) paperless = PaperlessClient(PAPERLESS_URL, PAPERLESS_TOKEN, PAPERLESS_INBOX_TAG_ID, PAPERLESS_OWNER_ID)
async with aiosqlite.connect(DB_PATH) as db: async with aiosqlite.connect(DB_PATH) as db:
await init_db(db) await init_db(db)