ingest: Assign uploaded documents to a configurable Paperless owner
The post_document endpoint does not support setting ownership on upload,
so after a successful upload the document is PATCHed to set the owner.
Add optional PAPERLESS_OWNER_ID env var. When set, every newly uploaded
document is assigned to that Paperless user ID via PATCH /api/documents/{id}/.
This commit is contained in:
parent
0aa044eead
commit
f49ea1dbc5
2 changed files with 30 additions and 6 deletions
|
|
@ -7,6 +7,8 @@ MATRIX_ROOM_ID=!roomid:jeena.net
|
||||||
PAPERLESS_URL=https://paperless.jeena.net
|
PAPERLESS_URL=https://paperless.jeena.net
|
||||||
PAPERLESS_TOKEN=your_paperless_api_token
|
PAPERLESS_TOKEN=your_paperless_api_token
|
||||||
PAPERLESS_INBOX_TAG_ID=1
|
PAPERLESS_INBOX_TAG_ID=1
|
||||||
|
# Optional: assign uploaded documents to this Paperless user ID
|
||||||
|
# PAPERLESS_OWNER_ID=7
|
||||||
|
|
||||||
# Optional: path to the SQLite state database (default: state.db next to the script)
|
# Optional: path to the SQLite state database (default: state.db next to the script)
|
||||||
DB_PATH=state.db
|
DB_PATH=state.db
|
||||||
|
|
|
||||||
32
ingest.py
32
ingest.py
|
|
@ -43,8 +43,7 @@ class _SSLSMTPHandler(logging.handlers.SMTPHandler):
|
||||||
def emit(self, record: logging.LogRecord) -> None:
|
def emit(self, record: logging.LogRecord) -> None:
|
||||||
import smtplib
|
import smtplib
|
||||||
try:
|
try:
|
||||||
host, port = self.mailhost
|
with smtplib.SMTP_SSL(self.mailhost, self.mailport) as smtp:
|
||||||
with smtplib.SMTP_SSL(host, port) as smtp:
|
|
||||||
smtp.login(self.username, self.password)
|
smtp.login(self.username, self.password)
|
||||||
msg = self.format(record)
|
msg = self.format(record)
|
||||||
smtp.sendmail(self.fromaddr, self.toaddrs,
|
smtp.sendmail(self.fromaddr, self.toaddrs,
|
||||||
|
|
@ -82,6 +81,9 @@ MATRIX_ROOM_ID = os.environ["MATRIX_ROOM_ID"]
|
||||||
PAPERLESS_URL = os.environ["PAPERLESS_URL"].rstrip("/")
|
PAPERLESS_URL = os.environ["PAPERLESS_URL"].rstrip("/")
|
||||||
PAPERLESS_TOKEN = os.environ["PAPERLESS_TOKEN"]
|
PAPERLESS_TOKEN = os.environ["PAPERLESS_TOKEN"]
|
||||||
PAPERLESS_INBOX_TAG_ID = int(os.environ["PAPERLESS_INBOX_TAG_ID"])
|
PAPERLESS_INBOX_TAG_ID = int(os.environ["PAPERLESS_INBOX_TAG_ID"])
|
||||||
|
PAPERLESS_OWNER_ID: Optional[int] = (
|
||||||
|
int(os.environ["PAPERLESS_OWNER_ID"]) if os.environ.get("PAPERLESS_OWNER_ID") else None
|
||||||
|
)
|
||||||
|
|
||||||
DB_PATH = os.environ.get("DB_PATH", "state.db")
|
DB_PATH = os.environ.get("DB_PATH", "state.db")
|
||||||
UPTIME_KUMA_PUSH_URL = os.environ.get("UPTIME_KUMA_PUSH_URL")
|
UPTIME_KUMA_PUSH_URL = os.environ.get("UPTIME_KUMA_PUSH_URL")
|
||||||
|
|
@ -247,10 +249,11 @@ def validate_file(path: Path, filename: str) -> None:
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
class PaperlessClient:
|
class PaperlessClient:
|
||||||
def __init__(self, base_url: str, token: str, inbox_tag_id: int) -> None:
|
def __init__(self, base_url: str, token: str, inbox_tag_id: int, owner_id: Optional[int] = None) -> None:
|
||||||
self.base_url = base_url
|
self.base_url = base_url
|
||||||
self.headers = {"Authorization": f"Token {token}"}
|
self.headers = {"Authorization": f"Token {token}"}
|
||||||
self.inbox_tag_id = inbox_tag_id
|
self.inbox_tag_id = inbox_tag_id
|
||||||
|
self.owner_id = owner_id
|
||||||
|
|
||||||
async def find_by_checksum(self, checksum: str) -> Optional[int]:
|
async def find_by_checksum(self, checksum: str) -> Optional[int]:
|
||||||
"""Return the Paperless document ID if the checksum already exists."""
|
"""Return the Paperless document ID if the checksum already exists."""
|
||||||
|
|
@ -319,6 +322,17 @@ class PaperlessClient:
|
||||||
|
|
||||||
raise RuntimeError(f"Paperless task {task_id} timed out after {PAPERLESS_TASK_TIMEOUT}s")
|
raise RuntimeError(f"Paperless task {task_id} timed out after {PAPERLESS_TASK_TIMEOUT}s")
|
||||||
|
|
||||||
|
async def set_owner(self, doc_id: int, owner_id: int) -> None:
|
||||||
|
"""Set the owner of a document."""
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
r = await client.patch(
|
||||||
|
f"{self.base_url}/api/documents/{doc_id}/",
|
||||||
|
headers=self.headers,
|
||||||
|
json={"owner": owner_id},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Core processing
|
# Core processing
|
||||||
|
|
@ -361,8 +375,13 @@ async def process_event(
|
||||||
else:
|
else:
|
||||||
tmp.write(dl.body)
|
tmp.write(dl.body)
|
||||||
|
|
||||||
# Validate the file looks like what it claims to be
|
# Validate the file looks like what it claims to be; skip unsupported formats
|
||||||
|
try:
|
||||||
validate_file(tmp_path, filename)
|
validate_file(tmp_path, filename)
|
||||||
|
except ValueError as exc:
|
||||||
|
log.info("Skipping unsupported file %s: %s", filename, exc)
|
||||||
|
await upsert_event(db, event_id, filename, mxc_url, "skipped", encryption_info)
|
||||||
|
return
|
||||||
|
|
||||||
# Deduplicate against Paperless by content checksum
|
# Deduplicate against Paperless by content checksum
|
||||||
checksum = hashlib.md5(tmp_path.read_bytes()).hexdigest()
|
checksum = hashlib.md5(tmp_path.read_bytes()).hexdigest()
|
||||||
|
|
@ -385,6 +404,9 @@ async def process_event(
|
||||||
encryption_info, checksum)
|
encryption_info, checksum)
|
||||||
else:
|
else:
|
||||||
log.info("Confirmed in Paperless as document id=%d: %s", doc_id, filename)
|
log.info("Confirmed in Paperless as document id=%d: %s", doc_id, filename)
|
||||||
|
if paperless.owner_id is not None:
|
||||||
|
await paperless.set_owner(doc_id, paperless.owner_id)
|
||||||
|
log.info("Set owner of document id=%d to user id=%d", doc_id, paperless.owner_id)
|
||||||
await upsert_event(db, event_id, filename, mxc_url, "uploaded",
|
await upsert_event(db, event_id, filename, mxc_url, "uploaded",
|
||||||
encryption_info, checksum, doc_id)
|
encryption_info, checksum, doc_id)
|
||||||
|
|
||||||
|
|
@ -505,7 +527,7 @@ async def heartbeat_loop() -> None:
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
async def main() -> None:
|
async def main() -> None:
|
||||||
paperless = PaperlessClient(PAPERLESS_URL, PAPERLESS_TOKEN, PAPERLESS_INBOX_TAG_ID)
|
paperless = PaperlessClient(PAPERLESS_URL, PAPERLESS_TOKEN, PAPERLESS_INBOX_TAG_ID, PAPERLESS_OWNER_ID)
|
||||||
|
|
||||||
async with aiosqlite.connect(DB_PATH) as db:
|
async with aiosqlite.connect(DB_PATH) as db:
|
||||||
await init_db(db)
|
await init_db(db)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue