ingest: Assign uploaded documents to a configurable Paperless owner
The post_document endpoint does not support setting ownership on upload,
so after a successful upload the document is PATCHed to set the owner.
Add optional PAPERLESS_OWNER_ID env var. When set, every newly uploaded
document is assigned to that Paperless user ID via PATCH /api/documents/{id}/.
This commit is contained in:
parent
0aa044eead
commit
f49ea1dbc5
2 changed files with 30 additions and 6 deletions
34
ingest.py
34
ingest.py
|
|
@ -43,8 +43,7 @@ class _SSLSMTPHandler(logging.handlers.SMTPHandler):
|
|||
def emit(self, record: logging.LogRecord) -> None:
|
||||
import smtplib
|
||||
try:
|
||||
host, port = self.mailhost
|
||||
with smtplib.SMTP_SSL(host, port) as smtp:
|
||||
with smtplib.SMTP_SSL(self.mailhost, self.mailport) as smtp:
|
||||
smtp.login(self.username, self.password)
|
||||
msg = self.format(record)
|
||||
smtp.sendmail(self.fromaddr, self.toaddrs,
|
||||
|
|
@ -82,6 +81,9 @@ MATRIX_ROOM_ID = os.environ["MATRIX_ROOM_ID"]
|
|||
PAPERLESS_URL = os.environ["PAPERLESS_URL"].rstrip("/")
|
||||
PAPERLESS_TOKEN = os.environ["PAPERLESS_TOKEN"]
|
||||
PAPERLESS_INBOX_TAG_ID = int(os.environ["PAPERLESS_INBOX_TAG_ID"])
|
||||
PAPERLESS_OWNER_ID: Optional[int] = (
|
||||
int(os.environ["PAPERLESS_OWNER_ID"]) if os.environ.get("PAPERLESS_OWNER_ID") else None
|
||||
)
|
||||
|
||||
DB_PATH = os.environ.get("DB_PATH", "state.db")
|
||||
UPTIME_KUMA_PUSH_URL = os.environ.get("UPTIME_KUMA_PUSH_URL")
|
||||
|
|
@ -247,10 +249,11 @@ def validate_file(path: Path, filename: str) -> None:
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
class PaperlessClient:
|
||||
def __init__(self, base_url: str, token: str, inbox_tag_id: int) -> None:
|
||||
def __init__(self, base_url: str, token: str, inbox_tag_id: int, owner_id: Optional[int] = None) -> None:
|
||||
self.base_url = base_url
|
||||
self.headers = {"Authorization": f"Token {token}"}
|
||||
self.inbox_tag_id = inbox_tag_id
|
||||
self.owner_id = owner_id
|
||||
|
||||
async def find_by_checksum(self, checksum: str) -> Optional[int]:
|
||||
"""Return the Paperless document ID if the checksum already exists."""
|
||||
|
|
@ -319,6 +322,17 @@ class PaperlessClient:
|
|||
|
||||
raise RuntimeError(f"Paperless task {task_id} timed out after {PAPERLESS_TASK_TIMEOUT}s")
|
||||
|
||||
async def set_owner(self, doc_id: int, owner_id: int) -> None:
|
||||
"""Set the owner of a document."""
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.patch(
|
||||
f"{self.base_url}/api/documents/{doc_id}/",
|
||||
headers=self.headers,
|
||||
json={"owner": owner_id},
|
||||
timeout=30,
|
||||
)
|
||||
r.raise_for_status()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core processing
|
||||
|
|
@ -361,8 +375,13 @@ async def process_event(
|
|||
else:
|
||||
tmp.write(dl.body)
|
||||
|
||||
# Validate the file looks like what it claims to be
|
||||
validate_file(tmp_path, filename)
|
||||
# Validate the file looks like what it claims to be; skip unsupported formats
|
||||
try:
|
||||
validate_file(tmp_path, filename)
|
||||
except ValueError as exc:
|
||||
log.info("Skipping unsupported file %s: %s", filename, exc)
|
||||
await upsert_event(db, event_id, filename, mxc_url, "skipped", encryption_info)
|
||||
return
|
||||
|
||||
# Deduplicate against Paperless by content checksum
|
||||
checksum = hashlib.md5(tmp_path.read_bytes()).hexdigest()
|
||||
|
|
@ -385,6 +404,9 @@ async def process_event(
|
|||
encryption_info, checksum)
|
||||
else:
|
||||
log.info("Confirmed in Paperless as document id=%d: %s", doc_id, filename)
|
||||
if paperless.owner_id is not None:
|
||||
await paperless.set_owner(doc_id, paperless.owner_id)
|
||||
log.info("Set owner of document id=%d to user id=%d", doc_id, paperless.owner_id)
|
||||
await upsert_event(db, event_id, filename, mxc_url, "uploaded",
|
||||
encryption_info, checksum, doc_id)
|
||||
|
||||
|
|
@ -505,7 +527,7 @@ async def heartbeat_loop() -> None:
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def main() -> None:
|
||||
paperless = PaperlessClient(PAPERLESS_URL, PAPERLESS_TOKEN, PAPERLESS_INBOX_TAG_ID)
|
||||
paperless = PaperlessClient(PAPERLESS_URL, PAPERLESS_TOKEN, PAPERLESS_INBOX_TAG_ID, PAPERLESS_OWNER_ID)
|
||||
|
||||
async with aiosqlite.connect(DB_PATH) as db:
|
||||
await init_db(db)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue