diff --git a/.github/workflows/docker-build-and-push.yml b/.github/workflows/docker-build-and-push.yml
index 361f876..09e65d6 100644
--- a/.github/workflows/docker-build-and-push.yml
+++ b/.github/workflows/docker-build-and-push.yml
@@ -96,3 +96,7 @@ jobs:
cache-from: type=gha
cache-to: type=gha,mode=max
tags: ${{ env.TAGS }}
+ build-args: |
+ VERSION=${{ github.ref_type == 'tag' && github.ref_name || github.sha }}
+ COMMIT=${{ github.sha }}
+ BUILD_DATE=${{ github.event.repository.pushed_at }}
diff --git a/Dockerfile b/Dockerfile
index 92e32c9..42b5697 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,17 +1,33 @@
+# Define top-level build arguments
+ARG VERSION=docker-dev
+ARG COMMIT=unknown
+ARG BUILD_DATE=unknown
+
# Stage 1: Build the Go binary
-FROM golang:1.22-alpine AS builder
+FROM golang:1.23.4-alpine3.21 AS builder
# Set the working directory inside the container
WORKDIR /app
-# Install necessary packages
-RUN apk add --no-cache \
- git \
- gcc \
- musl-dev \
- mupdf \
- mupdf-dev
+# Package versions for Renovate
+# renovate: datasource=repology depName=alpine_3_21/gcc versioning=loose
+ENV GCC_VERSION=14.2.0-r4
+# renovate: datasource=repology depName=alpine_3_21/musl-dev versioning=loose
+ENV MUSL_DEV_VERSION=1.2.5-r8
+# renovate: datasource=repology depName=alpine_3_21/mupdf versioning=loose
+ENV MUPDF_VERSION=1.24.10-r0
+# renovate: datasource=repology depName=alpine_3_21/mupdf-dev versioning=loose
+ENV MUPDF_DEV_VERSION=1.24.10-r0
+# renovate: datasource=repology depName=alpine_3_21/sed versioning=loose
+ENV SED_VERSION=4.9-r2
+# Install necessary packages with pinned versions
+RUN apk add --no-cache \
+ "gcc=${GCC_VERSION}" \
+ "musl-dev=${MUSL_DEV_VERSION}" \
+ "mupdf=${MUPDF_VERSION}" \
+ "mupdf-dev=${MUPDF_DEV_VERSION}" \
+ "sed=${SED_VERSION}"
# Copy go.mod and go.sum files
COPY go.mod go.sum ./
@@ -24,6 +40,18 @@ RUN CGO_ENABLED=1 go build -tags musl -o /dev/null github.com/mattn/go-sqlite3
# Now copy the actual source files
COPY *.go .
+# Import ARGs from top level
+ARG VERSION
+ARG COMMIT
+ARG BUILD_DATE
+
+# Update version information
+RUN sed -i \
+ -e "s/devVersion/${VERSION}/" \
+ -e "s/devBuildDate/${BUILD_DATE}/" \
+ -e "s/devCommit/${COMMIT}/" \
+ version.go
+
# Build the binary using caching for both go modules and build cache
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
@@ -51,6 +79,8 @@ RUN npm run build
# Stage 3: Create a lightweight image with the Go binary and frontend
FROM alpine:latest
+ENV GIN_MODE=release
+
# Install necessary runtime dependencies
RUN apk add --no-cache \
ca-certificates
diff --git a/README.md b/README.md
index c45de32..97464ea 100644
--- a/README.md
+++ b/README.md
@@ -1,122 +1,129 @@
# paperless-gpt
-
[](LICENSE)
[](https://hub.docker.com/r/icereed/paperless-gpt)
[](CODE_OF_CONDUCT.md)

-**paperless-gpt** is a tool designed to generate accurate and meaningful document titles and tags for [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx) using Large Language Models (LLMs). It supports multiple LLM providers, including **OpenAI** and **Ollama**. With paperless-gpt, you can streamline your document management by automatically suggesting appropriate titles and tags based on the content of your scanned documents.
+**paperless-gpt** seamlessly pairs with [paperless-ngx][paperless-ngx] to generate **AI-powered document titles** and **tags**, saving you hours of manual sorting. While other tools may offer AI chat features, **paperless-gpt** stands out by **supercharging OCR with LLMs**—ensuring high accuracy, even with tricky scans. If you’re craving next-level text extraction and effortless document organization, this is your solution.
-[](./demo.gif)
+https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
-## Features
+---
-- **Multiple LLM Support**: Choose between OpenAI and Ollama for generating document titles and tags.
-- **Customizable Prompts**: Modify the prompt templates to suit your specific needs.
-- **Easy Integration**: Works seamlessly with your existing paperless-ngx setup.
-- **User-Friendly Interface**: Intuitive web interface for reviewing and applying suggested titles and tags.
-- **Dockerized Deployment**: Simple setup using Docker and Docker Compose.
-- **Automatic Document Processing**: Automatically apply generated suggestions for documents with the `paperless-gpt-auto` tag.
-- **Experimental OCR Feature**: Send documents to a vision LLM for OCR processing.
+## Key Highlights
+
+1. **LLM-Enhanced OCR**
+ Harness Large Language Models (OpenAI or Ollama) for **better-than-traditional** OCR—turn messy or low-quality scans into context-aware, high-fidelity text.
+
+2. **Automatic Title & Tag Generation**
+ No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
+
+3. **Extensive Customization**
+ - **Prompt Templates**: Tweak your AI prompts to reflect your domain, style, or preference.
+ - **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
+
+4. **Simple Docker Deployment**
+ A few environment variables, and you’re off! Compose it alongside paperless-ngx with minimal fuss.
+
+5. **Unified Web UI**
+ - **Manual Review**: Approve or tweak AI’s suggestions.
+ - **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
+
+6. **Opt-In LLM-based OCR**
+ If you opt in, your images get read by a Vision LLM, pushing boundaries beyond standard OCR tools.
+
+---
## Table of Contents
+- [Key Highlights](#key-highlights)
+- [Getting Started](#getting-started)
+ - [Prerequisites](#prerequisites)
+ - [Installation](#installation)
+ - [Docker Compose](#docker-compose)
+ - [Manual Setup](#manual-setup)
+- [Configuration](#configuration)
+ - [Environment Variables](#environment-variables)
+ - [Custom Prompt Templates](#custom-prompt-templates)
+ - [Prompt Templates Directory](#prompt-templates-directory)
+ - [Mounting the Prompts Directory](#mounting-the-prompts-directory)
+ - [Editing the Prompt Templates](#editing-the-prompt-templates)
+ - [Template Syntax and Variables](#template-syntax-and-variables)
+- [OCR using AI](#llm-based-ocr-compare-for-yourself)
+- [Usage](#usage)
+- [Contributing](#contributing)
+- [License](#license)
+- [Star History](#star-history)
+- [Disclaimer](#disclaimer)
-- [paperless-gpt](#paperless-gpt)
- - [Features](#features)
- - [Table of Contents](#table-of-contents)
- - [Getting Started](#getting-started)
- - [Prerequisites](#prerequisites)
- - [Installation](#installation)
- - [Docker Compose](#docker-compose)
- - [Manual Setup](#manual-setup)
- - [Configuration](#configuration)
- - [Environment Variables](#environment-variables)
- - [Custom Prompt Templates](#custom-prompt-templates)
- - [Prompt Templates Directory](#prompt-templates-directory)
- - [Mounting the Prompts Directory](#mounting-the-prompts-directory)
- - [Editing the Prompt Templates](#editing-the-prompt-templates)
- - [Template Syntax and Variables](#template-syntax-and-variables)
- - [Usage](#usage)
- - [Contributing](#contributing)
- - [License](#license)
- - [Star History](#star-history)
+---
## Getting Started
### Prerequisites
-
-- [Docker](https://www.docker.com/get-started) installed on your system.
-- A running instance of [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx).
+- [Docker][docker-install] installed.
+- A running instance of [paperless-ngx][paperless-ngx].
- Access to an LLM provider:
- - **OpenAI**: An API key with access to models like `gpt-4o` or `gpt-3.5-turbo`.
- - **Ollama**: A running Ollama server with models like `llama2` installed.
+ - **OpenAI**: An API key with models like `gpt-4o` or `gpt-3.5-turbo`.
+ - **Ollama**: A running Ollama server with models like `llama2`.
### Installation
#### Docker Compose
-The easiest way to get started is by using Docker Compose. Below is an example `docker-compose.yml` file to set up paperless-gpt alongside paperless-ngx.
+Here’s an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
```yaml
version: "3.7"
services:
paperless-ngx:
image: ghcr.io/paperless-ngx/paperless-ngx:latest
- # ... (your existing paperless-ngx configuration)
+ # ... (your existing paperless-ngx config)
paperless-gpt:
image: icereed/paperless-gpt:latest
environment:
- PAPERLESS_BASE_URL: "http://paperless-ngx:8000"
- PAPERLESS_API_TOKEN: "your_paperless_api_token"
- PAPERLESS_PUBLIC_URL: "http://paperless.mydomain.com" # Optional, your public link to access Paperless
- MANUAL_TAG: "paperless-gpt" # Optional, default is 'paperless-gpt'
- AUTO_TAG: "paperless-gpt-auto" # Optional, default is 'paperless-gpt-auto'
- LLM_PROVIDER: "openai" # or 'ollama'
- LLM_MODEL: "gpt-4o" # or 'llama2'
- OPENAI_API_KEY: "your_openai_api_key" # Required if using OpenAI
- LLM_LANGUAGE: "English" # Optional, default is 'English'
- OLLAMA_HOST: "http://host.docker.internal:11434" # If using Ollama
- VISION_LLM_PROVIDER: "ollama" # Optional (for OCR) - ollama or openai
- VISION_LLM_MODEL: "minicpm-v" # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai
- AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default is 'paperless-gpt-ocr-auto'
- LOG_LEVEL: "info" # Optional or 'debug', 'warn', 'error'
+ PAPERLESS_BASE_URL: 'http://paperless-ngx:8000'
+ PAPERLESS_API_TOKEN: 'your_paperless_api_token'
+ PAPERLESS_PUBLIC_URL: 'http://paperless.mydomain.com' # Optional
+ MANUAL_TAG: 'paperless-gpt' # Optional, default: paperless-gpt
+ AUTO_TAG: 'paperless-gpt-auto' # Optional, default: paperless-gpt-auto
+ LLM_PROVIDER: 'openai' # or 'ollama'
+ LLM_MODEL: 'gpt-4o' # or 'llama2'
+ OPENAI_API_KEY: 'your_openai_api_key'
+ # Optional - OPENAI_BASE_URL: 'https://litellm.yourinstallationof.it.com/v1'
+ LLM_LANGUAGE: 'English' # Optional, default: English
+ OLLAMA_HOST: 'http://host.docker.internal:11434' # If using Ollama
+ VISION_LLM_PROVIDER: 'ollama' # (for OCR) - openai or ollama
+ VISION_LLM_MODEL: 'minicpm-v' # (for OCR) - minicpm-v (ollama example), gpt-4o (for openai), etc.
+ AUTO_OCR_TAG: 'paperless-gpt-ocr-auto' # Optional, default: paperless-gpt-ocr-auto
+ OCR_LIMIT_PAGES: '5' # Optional, default: 5. Set to 0 for no limit.
+ LOG_LEVEL: 'info' # Optional: debug, warn, error
volumes:
- - ./prompts:/app/prompts # Mount the prompts directory
+ - ./prompts:/app/prompts # Mount the prompts directory
ports:
- "8080:8080"
depends_on:
- paperless-ngx
```
-**Note:** Replace the placeholder values with your actual configuration.
+**Pro Tip**: Replace placeholders with real values and read the logs if something looks off.
#### Manual Setup
-
-If you prefer to run the application manually:
-
-1. **Clone the Repository:**
-
+1. **Clone the Repository**
```bash
git clone https://github.com/icereed/paperless-gpt.git
cd paperless-gpt
```
-
-2. **Create a `prompts` Directory:**
-
+2. **Create a `prompts` Directory**
```bash
mkdir prompts
```
-
-3. **Build the Docker Image:**
-
+3. **Build the Docker Image**
```bash
docker build -t paperless-gpt .
```
-
-4. **Run the Container:**
-
+4. **Run the Container**
```bash
docker run -d \
-e PAPERLESS_BASE_URL='http://your_paperless_ngx_url' \
@@ -128,201 +135,313 @@ If you prefer to run the application manually:
-e VISION_LLM_PROVIDER='ollama' \
-e VISION_LLM_MODEL='minicpm-v' \
-e LOG_LEVEL='info' \
- -v $(pwd)/prompts:/app/prompts \ # Mount the prompts directory
+ -v $(pwd)/prompts:/app/prompts \
-p 8080:8080 \
paperless-gpt
```
+---
+
## Configuration
### Environment Variables
-| Variable | Description | Required |
-| -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | -------- |
-| `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes |
-| `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes |
-| `PAPERLESS_PUBLIC_URL` | The public URL for your Paperless instance, if it is different to your `PAPERLESS_BASE_URL` - say if you are running in Docker Compose | No |
-| `MANUAL_TAG` | The tag to use for manually processing documents. Default is `paperless-gpt`. | No |
-| `AUTO_TAG` | The tag to use for automatically processing documents. Default is `paperless-gpt-auto`. | No |
-| `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes |
-| `LLM_MODEL` | The model name to use (e.g., `gpt-4o`, `gpt-3.5-turbo`, `llama2`). | Yes |
-| `OPENAI_API_KEY` | Your OpenAI API key. Required if using OpenAI as the LLM provider. | Cond. |
-| `LLM_LANGUAGE` | The likely language of your documents (e.g., `English`, `German`). Default is `English`. | No |
-| `OLLAMA_HOST` | The URL of the Ollama server (e.g., `http://host.docker.internal:11434`). Useful if using Ollama. Default is `http://127.0.0.1:11434`. | No |
-| `VISION_LLM_PROVIDER` | The vision LLM provider to use for OCR (`openai` or `ollama`). | No |
-| `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No |
-| `AUTO_OCR_TAG` | The tag to use for automatically processing documents with OCR. Default is `paperless-gpt-ocr-auto`. | No |
-| `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No |
-| `LISTEN_INTERFACE` | The interface paperless-gpt listens to. Default is `:8080` | No |
-| `WEBUI_PATH` | The path to load static content from. Default is `./web-app/dist` | No |
-| `AUTO_GENERATE_TITLE` | Enable/disable title generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No |
-| `AUTO_GENERATE_TAGS` | Enable/disable tag generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No |
-| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`. | No |
-
**Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
+=======
+| Variable | Description | Required |
+|------------------------|------------------------------------------------------------------------------------------------------------------|----------|
+| `PAPERLESS_BASE_URL` | URL of your paperless-ngx instance (e.g. `http://paperless-ngx:8000`). | Yes |
+| `PAPERLESS_API_TOKEN` | API token for paperless-ngx. Generate one in paperless-ngx admin. | Yes |
+| `PAPERLESS_PUBLIC_URL` | Public URL for Paperless (if different from `PAPERLESS_BASE_URL`). | No |
+| `MANUAL_TAG` | Tag for manual processing. Default: `paperless-gpt`. | No |
+| `AUTO_TAG` | Tag for auto processing. Default: `paperless-gpt-auto`. | No |
+| `LLM_PROVIDER` | AI backend (`openai` or `ollama`). | Yes |
+| `LLM_MODEL` | AI model name, e.g. `gpt-4o`, `gpt-3.5-turbo`, `llama2`. | Yes |
+| `OPENAI_API_KEY` | OpenAI API key (required if using OpenAI). | Cond. |
+| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No |
+| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). Default: `English`. | No |
+| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No |
+| `VISION_LLM_PROVIDER` | AI backend for OCR (`openai` or `ollama`). | No |
+| `VISION_LLM_MODEL` | Model name for OCR (e.g. `minicpm-v`). | No |
+| `AUTO_OCR_TAG` | Tag for automatically processing docs with OCR. Default: `paperless-gpt-ocr-auto`. | No |
+| `LOG_LEVEL` | Application log level (`info`, `debug`, `warn`, `error`). Default: `info`. | No |
+| `LISTEN_INTERFACE` | Network interface to listen on. Default: `:8080`. | No |
+| `WEBUI_PATH` | Path for static content. Default: `./web-app/dist`. | No |
+| `AUTO_GENERATE_TITLE` | Generate titles automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
+| `AUTO_GENERATE_TAGS` | Generate tags automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
+| `AUTO_GENERATE_CORRESPONDENTS` | Generate correspondents automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
+| `OCR_LIMIT_PAGES` | Limit the number of pages for OCR. Set to `0` for no limit. Default: `5`. | No |
+| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`.
### Custom Prompt Templates
-You can customize the prompt templates used by paperless-gpt to generate titles and tags. By default, the application uses built-in templates, but you can modify them by editing the template files.
+paperless-gpt’s flexible **prompt templates** let you shape how AI responds:
-#### Prompt Templates Directory
+1. **`title_prompt.tmpl`**: For document titles.
+2. **`tag_prompt.tmpl`**: For tagging logic.
+3. **`ocr_prompt.tmpl`**: For LLM OCR.
-The prompt templates are stored in the `prompts` directory inside the application. The two main template files are:
-
-- `title_prompt.tmpl`: Template used for generating document titles.
-- `tag_prompt.tmpl`: Template used for generating document tags.
-
-#### Mounting the Prompts Directory
-
-To modify the prompt templates, you need to mount a local `prompts` directory into the container.
-
-**Docker Compose Example:**
+Mount them into your container via:
```yaml
-services:
- paperless-gpt:
- image: icereed/paperless-gpt:latest
- # ... (other configurations)
- volumes:
- - ./prompts:/app/prompts # Mount the prompts directory
+ volumes:
+ - ./prompts:/app/prompts
```
-**Docker Run Command Example:**
+Then tweak at will—**paperless-gpt** reloads them automatically on startup!
-```bash
-docker run -d \
- # ... (other configurations)
- -v $(pwd)/prompts:/app/prompts \
- paperless-gpt
-```
-
-#### Editing the Prompt Templates
-
-1. **Start the Container:**
-
- When you first start the container with the `prompts` directory mounted, it will automatically create the default template files in your local `prompts` directory if they do not exist.
-
-2. **Edit the Template Files:**
-
- - Open `prompts/title_prompt.tmpl` and `prompts/tag_prompt.tmpl` with your favorite text editor.
- - Modify the templates using Go's `text/template` syntax.
- - Save the changes.
-
-3. **Restart the Container (if necessary):**
-
- The application automatically reloads the templates when it starts. If the container is already running, you may need to restart it to apply the changes.
-
-#### Template Syntax and Variables
-
-The templates use Go's `text/template` syntax and have access to the following variables:
-
-- **For `title_prompt.tmpl`:**
-
- - `{{.Language}}`: The language specified in `LLM_LANGUAGE` (default is `English`).
- - `{{.Content}}`: The content of the document.
-
-- **For `tag_prompt.tmpl`:**
-
- - `{{.Language}}`: The language specified in `LLM_LANGUAGE`.
- - `{{.AvailableTags}}`: A list (array) of available tags from paperless-ngx.
- - `{{.Title}}`: The suggested title for the document.
- - `{{.Content}}`: The content of the document.
-
-**Example `title_prompt.tmpl`:**
-
-```text
-I will provide you with the content of a document that has been partially read by OCR (so it may contain errors).
-Your task is to find a suitable document title that I can use as the title in the paperless-ngx program.
-Respond only with the title, without any additional information. The content is likely in {{.Language}}.
-
-Be sure to add one fitting emoji at the beginning of the title to make it more visually appealing.
-
-Content:
-{{.Content}}
-```
-
-**Example `tag_prompt.tmpl`:**
-
-```text
-I will provide you with the content and the title of a document. Your task is to select appropriate tags for the document from the list of available tags I will provide. Only select tags from the provided list. Respond only with the selected tags as a comma-separated list, without any additional information. The content is likely in {{.Language}}.
-
-Available Tags:
-{{.AvailableTags | join ","}}
-
-Title:
-{{.Title}}
-
-Content:
-{{.Content}}
-
-Please concisely select the {{.Language}} tags from the list above that best describe the document.
-Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable.
-```
-
-**Note:** Advanced users can utilize additional functions from the [Sprig](http://masterminds.github.io/sprig/) template library, as it is included in the application.
+---
## Usage
-1. **Tag Documents in paperless-ngx:**
+1. **Tag Documents**
+ - Add `paperless-gpt` or your custom tag to the docs you want to AI-ify.
- - Add the tag `paperless-gpt` to documents you want to process. This tag is configurable via the `tagToFilter` variable in the code (default is `paperless-gpt`).
+2. **Visit Web UI**
+ - Go to `http://localhost:8080` (or your host) in your browser.
-2. **Access the paperless-gpt Interface:**
+3. **Generate & Apply Suggestions**
+ - Click “Generate Suggestions” to see AI-proposed titles/tags.
+ - Approve, edit, or discard. Hit “Apply” to finalize in paperless-ngx.
- - Open your browser and navigate to `http://localhost:8080`.
+4. **Try LLM-Based OCR (Experimental)**
+ - If you enabled `VISION_LLM_PROVIDER` and `VISION_LLM_MODEL`, let AI-based OCR read your scanned PDFs.
+ - Tag those documents with `paperless-gpt-ocr-auto` (or your custom `AUTO_OCR_TAG`).
-3. **Process Documents:**
+**Tip**: The entire pipeline can be **fully automated** if you prefer minimal manual intervention.
- - Click on **"Generate Suggestions"** to let the LLM generate title suggestions based on the document content.
+---
-4. **Review and Apply Titles and Tags:**
+## LLM-Based OCR: Compare for Yourself
- - Review the suggested titles. You can edit them if necessary.
- - Click on **"Apply Suggestions"** to update the document titles in paperless-ngx.
+
+Click to expand the vanilla OCR vs. AI-powered OCR comparison
-5. **Experimental OCR Feature:**
+### Example 1
- - Send documents to a vision LLM for OCR processing.
- - Example configuration to enable OCR with Ollama:
- ```env
- VISION_LLM_PROVIDER=ollama
- VISION_LLM_MODEL=minicpm-v
- ```
+**Image**:
+
+
+
+**Vanilla Paperless-ngx OCR**:
+```
+La Grande Recre
+
+Gentre Gommercial 1'Esplanade
+1349 LOLNAIN LA NEWWE
+TA BERBOGAAL Tel =. 010 45,96 12
+Ticket 1440112 03/11/2006 a 13597:
+4007176614518. DINOS. TYRAMNESA
+TOTAET.T.LES
+ReslE par Lask-Euron
+Rencu en Cash Euro
+V.14.6 -Hotgese = VALERTE
+TICKET A-GONGERVER PORR TONT. EEHANGE
+HERET ET A BIENTOT
+```
+
+**LLM-Powered OCR (OpenAI gpt-4o)**:
+```
+La Grande Récré
+Centre Commercial l'Esplanade
+1348 LOUVAIN LA NEUVE
+TVA 860826401 Tel : 010 45 95 12
+Ticket 14421 le 03/11/2006 à 15:27:18
+4007176614518 DINOS TYRANNOSA 14.90
+TOTAL T.T.C. 14.90
+Réglé par Cash Euro 50.00
+Rendu en Cash Euro 35.10
+V.14.6 Hôtesse : VALERIE
+TICKET A CONSERVER POUR TOUT ECHANGE
+MERCI ET A BIENTOT
+```
+
+---
+
+### Example 2
+
+**Image**:
+
+
+
+**Vanilla Paperless-ngx OCR**:
+```
+Invoice Number: 1-996-84199
+
+Fed: Invoica Date: Sep01, 2014
+Accaunt Number: 1334-8037-4
+Page: 1012
+
+Fod£x Tax ID 71.0427007
+
+IRISINC
+SHARON ANDERSON
+4731 W ATLANTIC AVE STE BI
+DELRAY BEACH FL 33445-3897 ’ a
+Invoice Questions?
+
+Bing, ‚Account Shipping Address: Contact FedEx Reı
+
+ISINC
+4731 W ATLANTIC AVE Phone: (800) 622-1147 M-F 7-6 (CST)
+DELRAY BEACH FL 33445-3897 US Fax: (800) 548-3020
+
+Internet: www.fedex.com
+
+Invoice Summary Sep 01, 2014
+
+FodEx Ground Services
+Other Charges 11.00
+Total Charges 11.00 Da £
+>
+polo) Fz// /G
+TOTAL THIS INVOICE .... usps 11.00 P 2/1 f
+
+‘The only charges accrued for this period is the Weekly Service Charge.
+
+The Fedix Ground aceounts teferencedin his involce have been transteired and assigned 10, are owned by,andare payable to FedEx Express:
+
+To onsurs propor credit, plasa raturn this portion wirh your payment 10 FodEx
+‚Please do not staple or fold. Ploase make your chack payablı to FedEx.
+
+[TI For change ol address, hc har and camphat lrm or never ide
+
+Remittance Advice
+Your payment is due by Sep 16, 2004
+
+Number Number Dus
+
+1334803719968 41993200000110071
+
+AT 01 0391292 468448196 A**aDGT
+
+IRISINC Illallun elalalssollallansdHilalellund
+SHARON ANDERSON
+
+4731 W ATLANTIC AVE STEBI FedEx
+
+DELRAY BEACH FL 334453897 PO. Box 94516
+
+PALATINE IL 60094-4515
+```
+
+**LLM-Powered OCR (OpenAI gpt-4o)**:
+```
+FedEx. Invoice Number: 1-996-84199
+ Invoice Date: Sep 01, 2014
+ Account Number: 1334-8037-4
+ Page: 1 of 2
+ FedEx Tax ID: 71-0427007
+
+I R I S INC
+SHARON ANDERSON
+4731 W ATLANTIC AVE STE B1
+DELRAY BEACH FL 33445-3897
+ Invoice Questions?
+Billing Account Shipping Address: Contact FedEx Revenue Services
+I R I S INC Phone: (800) 622-1147 M-F 7-6 (CST)
+4731 W ATLANTIC AVE Fax: (800) 548-3020
+DELRAY BEACH FL 33445-3897 US Internet: www.fedex.com
+
+Invoice Summary Sep 01, 2014
+
+FedEx Ground Services
+Other Charges 11.00
+
+Total Charges .......................................................... USD $ 11.00
+
+TOTAL THIS INVOICE .............................................. USD $ 11.00
+
+The only charges accrued for this period is the Weekly Service Charge.
+
+ RECEIVED
+ SEP _ 8 REC'D
+ BY: _
+
+ posted 9/21/14
+
+The FedEx Ground accounts referenced in this invoice have been transferred and assigned to, are owned by, and are payable to FedEx Express.
+
+To ensure proper credit, please return this portion with your payment to FedEx.
+Please do not staple or fold. Please make your check payable to FedEx.
+
+❑ For change of address, check here and complete form on reverse side.
+
+Remittance Advice
+Your payment is due by Sep 16, 2004
+
+Invoice
+Number
+1-996-84199
+
+Account
+Number
+1334-8037-4
+
+Amount
+Due
+USD $ 11.00
+
+133480371996841993200000110071
+
+AT 01 031292 468448196 A**3DGT
+
+I R I S INC
+SHARON ANDERSON
+4731 W ATLANTIC AVE STE B1
+DELRAY BEACH FL 33445-3897
+
+FedEx
+P.O. Box 94515
+```
+
+---
+
+
+**Why Does It Matter?**
+- Traditional OCR often jumbles text from complex or low-quality scans.
+- Large Language Models interpret context and correct likely errors, producing results that are more precise and readable.
+- You can integrate these cleaned-up texts into your **paperless-ngx** pipeline for better tagging, searching, and archiving.
+
+
+
+
+### How It Works
+
+- **Vanilla OCR** typically uses classical methods or Tesseract-like engines to extract text, which can result in garbled outputs for complex fonts or poor-quality scans.
+- **LLM-Powered OCR** uses your chosen AI backend—OpenAI or Ollama—to interpret the image’s text in a more context-aware manner. This leads to fewer errors and more coherent text.
+
+---
## Contributing
-Contributions are welcome! Please read the [contributing guidelines](CONTRIBUTING.md) before submitting a pull request.
+**Pull requests** and **issues** are welcome!
+1. Fork the repo
+2. Create a branch (`feature/my-awesome-update`)
+3. Commit changes (`git commit -m "Improve X"`)
+4. Open a PR
-1. **Fork the Repository**
+Check out our [contributing guidelines](CONTRIBUTING.md) for details.
-2. **Create a Feature Branch**
-
- ```bash
- git checkout -b feature/my-new-feature
- ```
-
-3. **Commit Your Changes**
-
- ```bash
- git commit -am 'Add some feature'
- ```
-
-4. **Push to the Branch**
-
- ```bash
- git push origin feature/my-new-feature
- ```
-
-5. **Create a Pull Request**
+---
## License
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+paperless-gpt is licensed under the [MIT License](LICENSE). Feel free to adapt and share!
+
+---
## Star History
-
[](https://star-history.com/#icereed/paperless-gpt&Date)
---
-**Disclaimer:** This project is not affiliated with the official paperless-ngx project. Use at your own discretion.
+## Disclaimer
+This project is **not** officially affiliated with [paperless-ngx][paperless-ngx]. Use at your own risk.
+
+---
+
+**paperless-gpt**: The **LLM-based** companion your doc management has been waiting for. Enjoy effortless, intelligent document titles, tags, and next-level OCR.
+
+[paperless-ngx]: https://github.com/paperless-ngx/paperless-ngx
+[docker-install]: https://docs.docker.com/get-docker/
diff --git a/app_http_handlers.go b/app_http_handlers.go
index 6fff276..58809ec 100644
--- a/app_http_handlers.go
+++ b/app_http_handlers.go
@@ -119,7 +119,7 @@ func (app *App) generateSuggestionsHandler(c *gin.Context) {
return
}
- results, err := app.generateDocumentSuggestions(ctx, suggestionRequest)
+ results, err := app.generateDocumentSuggestions(ctx, suggestionRequest, log.WithContext(ctx))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error processing documents: %v", err)})
log.Errorf("Error processing documents: %v", err)
diff --git a/app_llm.go b/app_llm.go
index 7fae692..7547c06 100644
--- a/app_llm.go
+++ b/app_llm.go
@@ -5,9 +5,13 @@ import (
"context"
"encoding/base64"
"fmt"
+ "image"
"strings"
"sync"
+ _ "image/jpeg"
+
+ "github.com/sirupsen/logrus"
"github.com/tmc/langchaingo/llms"
)
@@ -52,7 +56,12 @@ func (app *App) getSuggestedCorrespondent(ctx context.Context, content string, s
}
// getSuggestedTags generates suggested tags for a document using the LLM
-func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedTitle string, availableTags []string) ([]string, error) {
+func (app *App) getSuggestedTags(
+ ctx context.Context,
+ content string,
+ suggestedTitle string,
+ availableTags []string,
+ logger *logrus.Entry) ([]string, error) {
likelyLanguage := getLikelyLanguage()
templateMutex.RLock()
@@ -66,11 +75,12 @@ func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedT
"Content": content,
})
if err != nil {
+ logger.Errorf("Error executing tag template: %v", err)
return nil, fmt.Errorf("error executing tag template: %v", err)
}
prompt := promptBuffer.String()
- log.Debugf("Tag suggestion prompt: %s", prompt)
+ logger.Debugf("Tag suggestion prompt: %s", prompt)
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
{
@@ -83,6 +93,7 @@ func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedT
},
})
if err != nil {
+ logger.Errorf("Error getting response from LLM: %v", err)
return nil, fmt.Errorf("error getting response from LLM: %v", err)
}
@@ -106,7 +117,7 @@ func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedT
return filteredTags, nil
}
-func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, error) {
+func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte, logger *logrus.Entry) (string, error) {
templateMutex.RLock()
defer templateMutex.RUnlock()
@@ -122,15 +133,27 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro
prompt := promptBuffer.String()
+ // Log the image dimensions
+ img, _, err := image.Decode(bytes.NewReader(jpegBytes))
+ if err != nil {
+ return "", fmt.Errorf("error decoding image: %v", err)
+ }
+ bounds := img.Bounds()
+ logger.Debugf("Image dimensions: %dx%d", bounds.Dx(), bounds.Dy())
+
// If not OpenAI then use binary part for image, otherwise, use the ImageURL part with encoding from https://platform.openai.com/docs/guides/vision
var parts []llms.ContentPart
if strings.ToLower(visionLlmProvider) != "openai" {
+ // Log image size in kilobytes
+ logger.Debugf("Image size: %d KB", len(jpegBytes)/1024)
parts = []llms.ContentPart{
llms.BinaryPart("image/jpeg", jpegBytes),
llms.TextPart(prompt),
}
} else {
base64Image := base64.StdEncoding.EncodeToString(jpegBytes)
+ // Log image size in kilobytes
+ logger.Debugf("Image size: %d KB", len(base64Image)/1024)
parts = []llms.ContentPart{
llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)),
llms.TextPart(prompt),
@@ -154,7 +177,7 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro
}
// getSuggestedTitle generates a suggested title for a document using the LLM
-func (app *App) getSuggestedTitle(ctx context.Context, content string) (string, error) {
+func (app *App) getSuggestedTitle(ctx context.Context, content string, logger *logrus.Entry) (string, error) {
likelyLanguage := getLikelyLanguage()
templateMutex.RLock()
@@ -171,7 +194,7 @@ func (app *App) getSuggestedTitle(ctx context.Context, content string) (string,
prompt := promptBuffer.String()
- log.Debugf("Title suggestion prompt: %s", prompt)
+ logger.Debugf("Title suggestion prompt: %s", prompt)
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
{
@@ -191,7 +214,7 @@ func (app *App) getSuggestedTitle(ctx context.Context, content string) (string,
}
// generateDocumentSuggestions generates suggestions for a set of documents
-func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionRequest GenerateSuggestionsRequest) ([]DocumentSuggestion, error) {
+func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionRequest GenerateSuggestionsRequest, logger *logrus.Entry) ([]DocumentSuggestion, error) {
// Fetch all available tags from paperless-ngx
availableTagsMap, err := app.Client.GetAllTags(ctx)
if err != nil {
@@ -231,7 +254,8 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
go func(doc Document) {
defer wg.Done()
documentID := doc.ID
- log.Printf("Processing Document ID %d...", documentID)
+ docLogger := documentLogger(documentID)
+ docLogger.Printf("Processing Document ID %d...", documentID)
content := doc.Content
if len(content) > 5000 {
@@ -243,23 +267,23 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
var suggestedCorrespondent string
if suggestionRequest.GenerateTitles {
- suggestedTitle, err = app.getSuggestedTitle(ctx, content)
+ suggestedTitle, err = app.getSuggestedTitle(ctx, content, docLogger)
if err != nil {
mu.Lock()
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
mu.Unlock()
- log.Errorf("Error processing document %d: %v", documentID, err)
+ docLogger.Errorf("Error processing document %d: %v", documentID, err)
return
}
}
if suggestionRequest.GenerateTags {
- suggestedTags, err = app.getSuggestedTags(ctx, content, suggestedTitle, availableTagNames)
+ suggestedTags, err = app.getSuggestedTags(ctx, content, suggestedTitle, availableTagNames, docLogger)
if err != nil {
mu.Lock()
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
mu.Unlock()
- log.Errorf("Error generating tags for document %d: %v", documentID, err)
+ logger.Errorf("Error generating tags for document %d: %v", documentID, err)
return
}
}
@@ -283,7 +307,7 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
}
// Titles
if suggestionRequest.GenerateTitles {
- log.Printf("Suggested title for document %d: %s", documentID, suggestedTitle)
+ docLogger.Printf("Suggested title for document %d: %s", documentID, suggestedTitle)
suggestion.SuggestedTitle = suggestedTitle
} else {
suggestion.SuggestedTitle = doc.Title
@@ -291,10 +315,10 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
// Tags
if suggestionRequest.GenerateTags {
- log.Printf("Suggested tags for document %d: %v", documentID, suggestedTags)
+ docLogger.Printf("Suggested tags for document %d: %v", documentID, suggestedTags)
suggestion.SuggestedTags = suggestedTags
} else {
- suggestion.SuggestedTags = removeTagFromList(doc.Tags, manualTag)
+ suggestion.SuggestedTags = doc.Tags
}
// Correspondents
@@ -304,10 +328,12 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
} else {
suggestion.SuggestedCorrespondent = ""
}
+ // Remove manual tag from the list of suggested tags
+ suggestion.RemoveTags = []string{manualTag, autoTag}
documentSuggestions = append(documentSuggestions, suggestion)
mu.Unlock()
- log.Printf("Document %d processed successfully.", documentID)
+ docLogger.Printf("Document %d processed successfully.", documentID)
}(documents[i])
}
diff --git a/demo.gif b/demo.gif
deleted file mode 100644
index dc0603f..0000000
Binary files a/demo.gif and /dev/null differ
diff --git a/demo.mp4 b/demo.mp4
new file mode 100644
index 0000000..6ce5550
Binary files /dev/null and b/demo.mp4 differ
diff --git a/demo/ocr-example1.jpg b/demo/ocr-example1.jpg
new file mode 100644
index 0000000..8fedd34
Binary files /dev/null and b/demo/ocr-example1.jpg differ
diff --git a/demo/ocr-example2.jpg b/demo/ocr-example2.jpg
new file mode 100644
index 0000000..928fc28
Binary files /dev/null and b/demo/ocr-example2.jpg differ
diff --git a/go.mod b/go.mod
index 880f413..2762c7d 100644
--- a/go.mod
+++ b/go.mod
@@ -2,7 +2,7 @@ module paperless-gpt
go 1.22.0
-toolchain go1.22.2
+toolchain go1.23.4
require (
github.com/Masterminds/sprig/v3 v3.3.0
@@ -28,6 +28,7 @@ require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/ebitengine/purego v0.8.0 // indirect
+ github.com/fatih/color v1.18.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect
@@ -42,6 +43,7 @@ require (
github.com/jupiterrider/ffi v0.2.0 // indirect
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
+ github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-sqlite3 v1.14.24 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
@@ -58,7 +60,7 @@ require (
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.26.0 // indirect
golang.org/x/net v0.25.0 // indirect
- golang.org/x/sys v0.23.0 // indirect
+ golang.org/x/sys v0.25.0 // indirect
golang.org/x/text v0.20.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
diff --git a/go.sum b/go.sum
index 945e689..6af29a1 100644
--- a/go.sum
+++ b/go.sum
@@ -25,6 +25,8 @@ github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+GvvE=
github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
+github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
+github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
@@ -70,6 +72,9 @@ github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZY
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
@@ -155,6 +160,7 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -162,6 +168,8 @@ golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
+golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
diff --git a/main.go b/main.go
index 8279e6e..273f105 100644
--- a/main.go
+++ b/main.go
@@ -6,12 +6,15 @@ import (
"net/http"
"os"
"path/filepath"
+ "runtime"
+ "strconv"
"strings"
"sync"
"text/template"
"time"
"github.com/Masterminds/sprig/v3"
+ "github.com/fatih/color"
"github.com/gin-gonic/gin"
"github.com/sirupsen/logrus"
"github.com/tmc/langchaingo/llms"
@@ -27,7 +30,6 @@ var (
log = logrus.New()
// Environment Variables
- correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
@@ -45,6 +47,7 @@ var (
autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE")
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS")
+ limitOcrPages int // Will be read from OCR_LIMIT_PAGES
// Templates
titleTemplate *template.Template
@@ -121,6 +124,9 @@ func main() {
// Initialize logrus logger
initLogger()
+ // Print version
+ printVersion()
+
// Initialize PaperlessClient
client := NewPaperlessClient(paperlessBaseURL, paperlessAPIToken)
@@ -263,6 +269,29 @@ func main() {
}
}
+func printVersion() {
+ cyan := color.New(color.FgCyan).SprintFunc()
+ yellow := color.New(color.FgYellow).SprintFunc()
+
+ banner := `
+ ╔═══════════════════════════════════════╗
+ ║ Paperless GPT ║
+ ╚═══════════════════════════════════════╝`
+
+ fmt.Printf("%s\n", cyan(banner))
+ fmt.Printf("\n%s %s\n", yellow("Version:"), version)
+ if commit != "" {
+ fmt.Printf("%s %s\n", yellow("Commit:"), commit)
+ }
+ if buildDate != "" {
+ fmt.Printf("%s %s\n", yellow("Build Date:"), buildDate)
+ }
+ fmt.Printf("%s %s/%s\n", yellow("Platform:"), runtime.GOOS, runtime.GOARCH)
+ fmt.Printf("%s %s\n", yellow("Go Version:"), runtime.Version())
+ fmt.Printf("%s %s\n", yellow("Started:"), time.Now().Format(time.RFC1123))
+ fmt.Println()
+}
+
func initLogger() {
switch logLevel {
case "debug":
@@ -338,6 +367,24 @@ func validateOrDefaultEnvVars() {
if (llmProvider == "openai" || visionLlmProvider == "openai") && openaiAPIKey == "" {
log.Fatal("Please set the OPENAI_API_KEY environment variable for OpenAI provider.")
}
+
+ if isOcrEnabled() {
+ rawLimitOcrPages := os.Getenv("OCR_LIMIT_PAGES")
+ if rawLimitOcrPages == "" {
+ limitOcrPages = 5
+ } else {
+ var err error
+ limitOcrPages, err = strconv.Atoi(rawLimitOcrPages)
+ if err != nil {
+ log.Fatalf("Invalid OCR_LIMIT_PAGES value: %v", err)
+ }
+ }
+ }
+}
+
+// documentLogger creates a logger with document context
+func documentLogger(documentID int) *logrus.Entry {
+ return log.WithField("document_id", documentID)
}
// processAutoTagDocuments handles the background auto-tagging of documents
@@ -356,23 +403,29 @@ func (app *App) processAutoTagDocuments() (int, error) {
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag)
- suggestionRequest := GenerateSuggestionsRequest{
- Documents: documents,
- GenerateTitles: strings.ToLower(autoGenerateTitle) != "false",
- GenerateTags: strings.ToLower(autoGenerateTags) != "false",
- GenerateCorrespondents: strings.ToLower(autoGenerateCorrespondents) != "false",
- }
+ for _, document := range documents {
+ docLogger := documentLogger(document.ID)
+ docLogger.Info("Processing document for auto-tagging")
- suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest)
- if err != nil {
- return 0, fmt.Errorf("error generating suggestions: %w", err)
- }
+ suggestionRequest := GenerateSuggestionsRequest{
+ Documents: []Document{document},
+ GenerateTitles: strings.ToLower(autoGenerateTitle) != "false",
+ GenerateTags: strings.ToLower(autoGenerateTags) != "false",
+ GenerateCorrespondents: strings.ToLower(autoGenerateCorrespondents) != "false",
+ }
- err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
- if err != nil {
- return 0, fmt.Errorf("error updating documents: %w", err)
- }
+ suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger)
+ if err != nil {
+ return 0, fmt.Errorf("error generating suggestions for document %d: %w", document.ID, err)
+ }
+ err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
+ if err != nil {
+ return 0, fmt.Errorf("error updating document %d: %w", document.ID, err)
+ }
+
+ docLogger.Info("Successfully processed document")
+ }
return len(documents), nil
}
@@ -392,26 +445,31 @@ func (app *App) processAutoOcrTagDocuments() (int, error) {
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag)
- documents = documents[:1] // Process only one document at a time
+ for _, document := range documents {
+ docLogger := documentLogger(document.ID)
+ docLogger.Info("Processing document for OCR")
- ocrContent, err := app.ProcessDocumentOCR(ctx, documents[0].ID)
- if err != nil {
- return 0, fmt.Errorf("error processing document OCR: %w", err)
+ ocrContent, err := app.ProcessDocumentOCR(ctx, document.ID)
+ if err != nil {
+ return 0, fmt.Errorf("error processing OCR for document %d: %w", document.ID, err)
+ }
+ docLogger.Debug("OCR processing completed")
+
+ err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{
+ {
+ ID: document.ID,
+ OriginalDocument: document,
+ SuggestedContent: ocrContent,
+ RemoveTags: []string{autoOcrTag},
+ },
+ }, app.Database, false)
+ if err != nil {
+ return 0, fmt.Errorf("error updating document %d after OCR: %w", document.ID, err)
+ }
+
+ docLogger.Info("Successfully processed document OCR")
}
- log.Debugf("OCR content for document %d: %s", documents[0].ID, ocrContent)
-
- err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{
- {
- ID: documents[0].ID,
- OriginalDocument: documents[0],
- SuggestedContent: ocrContent,
- },
- }, app.Database, false)
- if err != nil {
- return 0, fmt.Errorf("error updating documents: %w", err)
- }
-
- return 1, nil // Processed one document
+ return 1, nil
}
// removeTagFromList removes a specific tag from a list of tags
diff --git a/ocr.go b/ocr.go
index ca8ed28..6819a98 100644
--- a/ocr.go
+++ b/ocr.go
@@ -9,31 +9,42 @@ import (
// ProcessDocumentOCR processes a document through OCR and returns the combined text
func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string, error) {
- imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, documentID)
+ docLogger := documentLogger(documentID)
+ docLogger.Info("Starting OCR processing")
+
+ imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, documentID, limitOcrPages)
defer func() {
for _, imagePath := range imagePaths {
- os.Remove(imagePath)
+ if err := os.Remove(imagePath); err != nil {
+ docLogger.WithError(err).WithField("image_path", imagePath).Warn("Failed to remove temporary image file")
+ }
}
}()
if err != nil {
- return "", fmt.Errorf("error downloading document images: %w", err)
+ return "", fmt.Errorf("error downloading document images for document %d: %w", documentID, err)
}
+ docLogger.WithField("page_count", len(imagePaths)).Debug("Downloaded document images")
+
var ocrTexts []string
- for _, imagePath := range imagePaths {
+ for i, imagePath := range imagePaths {
+ pageLogger := docLogger.WithField("page", i+1)
+ pageLogger.Debug("Processing page")
+
imageContent, err := os.ReadFile(imagePath)
if err != nil {
- return "", fmt.Errorf("error reading image file: %w", err)
+ return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
}
- ocrText, err := app.doOCRViaLLM(ctx, imageContent)
+ ocrText, err := app.doOCRViaLLM(ctx, imageContent, pageLogger)
if err != nil {
- return "", fmt.Errorf("error performing OCR: %w", err)
+ return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
}
- log.Debugf("OCR text: %s", ocrText)
+ pageLogger.Debug("OCR completed for page")
ocrTexts = append(ocrTexts, ocrText)
}
+ docLogger.Info("OCR processing completed successfully")
return strings.Join(ocrTexts, "\n\n"), nil
}
diff --git a/paperless-gpt-screenshot.png b/paperless-gpt-screenshot.png
index 99a70c1..2a8eda9 100644
Binary files a/paperless-gpt-screenshot.png and b/paperless-gpt-screenshot.png differ
diff --git a/paperless.go b/paperless.go
index e35068e..1aa5158 100644
--- a/paperless.go
+++ b/paperless.go
@@ -290,8 +290,9 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
}
// remove autoTag to prevent infinite loop (even if it is in the original tags)
- originalTags = removeTagFromList(originalTags, autoTag)
- originalTags = removeTagFromList(originalTags, autoOcrTag)
+ for _, tag := range document.RemoveTags {
+ originalTags = removeTagFromList(originalTags, tag)
+ }
if len(tags) == 0 {
tags = originalTags
@@ -300,6 +301,12 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
originalFields["tags"] = originalTags
// remove autoTag to prevent infinite loop - this is required in case of undo
tags = removeTagFromList(tags, autoTag)
+
+ // keep previous tags
+ tags = append(tags, originalTags...)
+ // remove duplicates
+ slices.Sort(tags)
+ tags = slices.Compact(tags)
}
updatedTagsJSON, err := json.Marshal(tags)
@@ -424,7 +431,8 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
}
// DownloadDocumentAsImages downloads the PDF file of the specified document and converts it to images
-func (client *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, documentId int) ([]string, error) {
+// If limitPages > 0, only the first N pages will be processed
+func (client *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, documentId int, limitPages int) ([]string, error) {
// Create a directory named after the document ID
docDir := filepath.Join(client.GetCacheFolder(), fmt.Sprintf("document-%d", documentId))
if _, err := os.Stat(docDir); os.IsNotExist(err) {
@@ -437,6 +445,9 @@ func (client *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, doc
// Check if images already exist
var imagePaths []string
for n := 0; ; n++ {
+ if limitPages > 0 && n >= limitPages {
+ break
+ }
imagePath := filepath.Join(docDir, fmt.Sprintf("page%03d.jpg", n))
if _, err := os.Stat(imagePath); os.IsNotExist(err) {
break
@@ -485,10 +496,15 @@ func (client *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, doc
}
defer doc.Close()
+ totalPages := doc.NumPage()
+ if limitPages > 0 && limitPages < totalPages {
+ totalPages = limitPages
+ }
+
var mu sync.Mutex
var g errgroup.Group
- for n := 0; n < doc.NumPage(); n++ {
+ for n := 0; n < totalPages; n++ {
n := n // capture loop variable
g.Go(func() error {
mu.Lock()
diff --git a/paperless_test.go b/paperless_test.go
index 13cf103..c9be3a0 100644
--- a/paperless_test.go
+++ b/paperless_test.go
@@ -300,18 +300,24 @@ func TestUpdateDocuments(t *testing.T) {
OriginalDocument: Document{
ID: 1,
Title: "Old Title",
- Tags: []string{"tag1"},
+ Tags: []string{"tag1", "tag3", "manual", "removeMe"},
},
SuggestedTitle: "New Title",
- SuggestedTags: []string{"tag2"},
+ SuggestedTags: []string{"tag2", "tag3"},
+ RemoveTags: []string{"removeMe"},
},
}
+ idTag1 := 1
+ idTag2 := 2
+ idTag3 := 4
// Mock data for tags
tagsResponse := map[string]interface{}{
"results": []map[string]interface{}{
- {"id": 1, "name": "tag1"},
- {"id": 2, "name": "tag2"},
+ {"id": idTag1, "name": "tag1"},
+ {"id": idTag2, "name": "tag2"},
{"id": 3, "name": "manual"},
+ {"id": idTag3, "name": "tag3"},
+ {"id": 5, "name": "removeMe"},
},
"next": nil,
}
@@ -342,7 +348,7 @@ func TestUpdateDocuments(t *testing.T) {
// Expected updated fields
expectedFields := map[string]interface{}{
"title": "New Title",
- "tags": []interface{}{float64(2)}, // tag2 ID
+ "tags": []interface{}{float64(idTag1), float64(idTag2), float64(idTag3)}, // keep also previous tags
}
assert.Equal(t, expectedFields, updatedFields)
@@ -385,7 +391,7 @@ func TestDownloadDocumentAsImages(t *testing.T) {
})
ctx := context.Background()
- imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID)
+ imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID, 0)
require.NoError(t, err)
// Verify that exatly one page was extracted
@@ -422,11 +428,11 @@ func TestDownloadDocumentAsImages_ManyPages(t *testing.T) {
env.client.CacheFolder = "tests/tmp"
// Clean the cache folder
os.RemoveAll(env.client.CacheFolder)
- imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID)
+ imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID, 50)
require.NoError(t, err)
- // Verify that exatly 52 pages were extracted
- assert.Len(t, imagePaths, 52)
+ // Verify that exatly 50 pages were extracted - the original doc contains 52 pages
+ assert.Len(t, imagePaths, 50)
// The path shall end with tests/tmp/document-321/page000.jpg
for _, imagePath := range imagePaths {
_, err := os.Stat(imagePath)
diff --git a/renovate.json b/renovate.json
index 5db72dd..b67362b 100644
--- a/renovate.json
+++ b/renovate.json
@@ -2,5 +2,19 @@
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
"config:recommended"
+ ],
+ "customManagers": [
+ {
+ "customType": "regex",
+ "description": "Update VERSION variables in Dockerfiles",
+ "fileMatch": [
+ "^Dockerfile$"
+ ],
+ "matchStrings": [
+ "# renovate: datasource=(?[a-z-]+?) depName=(?.+?)(?: versioning=(?[a-z-]+?))?\\s(?:ENV|ARG) .+?_VERSION=\"(?.+?)\"\\s",
+ "# renovate: datasource=(?[a-z-]+?) depName=(?.+?)(?: versioning=(?[a-z-]+?))?\\s(?:ENV|ARG) VERSION=\"(?.+?)\"\\s"
+ ],
+ "versioningTemplate": "{{#if versioning}}{{versioning}}{{else}}semver{{/if}}"
+ }
]
}
diff --git a/types.go b/types.go
index 40cbf6f..6bb16a7 100644
--- a/types.go
+++ b/types.go
@@ -81,6 +81,7 @@ type DocumentSuggestion struct {
SuggestedTags []string `json:"suggested_tags,omitempty"`
SuggestedContent string `json:"suggested_content,omitempty"`
SuggestedCorrespondent string `json:"suggested_correspondent,omitempty"`
+ RemoveTags []string `json:"remove_tags,omitempty"`
}
type Correspondent struct {
diff --git a/version.go b/version.go
new file mode 100644
index 0000000..750920c
--- /dev/null
+++ b/version.go
@@ -0,0 +1,7 @@
+package main
+
+var (
+ version = "devVersion"
+ buildDate = "devBuildDate"
+ commit = "devCommit"
+)
diff --git a/web-app/package-lock.json b/web-app/package-lock.json
index fe204d6..f7624a8 100644
--- a/web-app/package-lock.json
+++ b/web-app/package-lock.json
@@ -2889,12 +2889,16 @@
}
},
"node_modules/lilconfig": {
- "version": "2.1.0",
- "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz",
- "integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==",
+ "version": "3.1.3",
+ "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
+ "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==",
"dev": true,
+ "license": "MIT",
"engines": {
- "node": ">=10"
+ "node": ">=14"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/antonk52"
}
},
"node_modules/lines-and-columns": {
@@ -3350,18 +3354,6 @@
}
}
},
- "node_modules/postcss-load-config/node_modules/lilconfig": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.2.tgz",
- "integrity": "sha512-eop+wDAvpItUys0FWkHIKeC9ybYrTGbU41U5K7+bttZZeohvnY7M9dZ5kB21GNWiFT2q1OoPTvncPCgSOVO5ow==",
- "dev": true,
- "engines": {
- "node": ">=14"
- },
- "funding": {
- "url": "https://github.com/sponsors/antonk52"
- }
- },
"node_modules/postcss-nested": {
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz",
@@ -3944,33 +3936,34 @@
"integrity": "sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew=="
},
"node_modules/tailwindcss": {
- "version": "3.4.12",
- "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.12.tgz",
- "integrity": "sha512-Htf/gHj2+soPb9UayUNci/Ja3d8pTmu9ONTfh4QY8r3MATTZOzmv6UYWF7ZwikEIC8okpfqmGqrmDehua8mF8w==",
+ "version": "3.4.17",
+ "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.17.tgz",
+ "integrity": "sha512-w33E2aCvSDP0tW9RZuNXadXlkHXqFzSkQew/aIa2i/Sj8fThxwovwlXHSPXTbAHwEIhBFXAedUhP2tueAKP8Og==",
"dev": true,
+ "license": "MIT",
"dependencies": {
"@alloc/quick-lru": "^5.2.0",
"arg": "^5.0.2",
- "chokidar": "^3.5.3",
+ "chokidar": "^3.6.0",
"didyoumean": "^1.2.2",
"dlv": "^1.1.3",
- "fast-glob": "^3.3.0",
+ "fast-glob": "^3.3.2",
"glob-parent": "^6.0.2",
"is-glob": "^4.0.3",
- "jiti": "^1.21.0",
- "lilconfig": "^2.1.0",
- "micromatch": "^4.0.5",
+ "jiti": "^1.21.6",
+ "lilconfig": "^3.1.3",
+ "micromatch": "^4.0.8",
"normalize-path": "^3.0.0",
"object-hash": "^3.0.0",
- "picocolors": "^1.0.0",
- "postcss": "^8.4.23",
+ "picocolors": "^1.1.1",
+ "postcss": "^8.4.47",
"postcss-import": "^15.1.0",
"postcss-js": "^4.0.1",
- "postcss-load-config": "^4.0.1",
- "postcss-nested": "^6.0.1",
- "postcss-selector-parser": "^6.0.11",
- "resolve": "^1.22.2",
- "sucrase": "^3.32.0"
+ "postcss-load-config": "^4.0.2",
+ "postcss-nested": "^6.2.0",
+ "postcss-selector-parser": "^6.1.2",
+ "resolve": "^1.22.8",
+ "sucrase": "^3.35.0"
},
"bin": {
"tailwind": "lib/cli.js",