mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-13 13:18:02 -05:00
Merge branch 'main' into correspondents
This commit is contained in:
commit
515b78b6a1
20 changed files with 626 additions and 331 deletions
4
.github/workflows/docker-build-and-push.yml
vendored
4
.github/workflows/docker-build-and-push.yml
vendored
|
@ -96,3 +96,7 @@ jobs:
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
tags: ${{ env.TAGS }}
|
tags: ${{ env.TAGS }}
|
||||||
|
build-args: |
|
||||||
|
VERSION=${{ github.ref_type == 'tag' && github.ref_name || github.sha }}
|
||||||
|
COMMIT=${{ github.sha }}
|
||||||
|
BUILD_DATE=${{ github.event.repository.pushed_at }}
|
||||||
|
|
46
Dockerfile
46
Dockerfile
|
@ -1,17 +1,33 @@
|
||||||
|
# Define top-level build arguments
|
||||||
|
ARG VERSION=docker-dev
|
||||||
|
ARG COMMIT=unknown
|
||||||
|
ARG BUILD_DATE=unknown
|
||||||
|
|
||||||
# Stage 1: Build the Go binary
|
# Stage 1: Build the Go binary
|
||||||
FROM golang:1.22-alpine AS builder
|
FROM golang:1.23.4-alpine3.21 AS builder
|
||||||
|
|
||||||
# Set the working directory inside the container
|
# Set the working directory inside the container
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install necessary packages
|
# Package versions for Renovate
|
||||||
RUN apk add --no-cache \
|
# renovate: datasource=repology depName=alpine_3_21/gcc versioning=loose
|
||||||
git \
|
ENV GCC_VERSION=14.2.0-r4
|
||||||
gcc \
|
# renovate: datasource=repology depName=alpine_3_21/musl-dev versioning=loose
|
||||||
musl-dev \
|
ENV MUSL_DEV_VERSION=1.2.5-r8
|
||||||
mupdf \
|
# renovate: datasource=repology depName=alpine_3_21/mupdf versioning=loose
|
||||||
mupdf-dev
|
ENV MUPDF_VERSION=1.24.10-r0
|
||||||
|
# renovate: datasource=repology depName=alpine_3_21/mupdf-dev versioning=loose
|
||||||
|
ENV MUPDF_DEV_VERSION=1.24.10-r0
|
||||||
|
# renovate: datasource=repology depName=alpine_3_21/sed versioning=loose
|
||||||
|
ENV SED_VERSION=4.9-r2
|
||||||
|
|
||||||
|
# Install necessary packages with pinned versions
|
||||||
|
RUN apk add --no-cache \
|
||||||
|
"gcc=${GCC_VERSION}" \
|
||||||
|
"musl-dev=${MUSL_DEV_VERSION}" \
|
||||||
|
"mupdf=${MUPDF_VERSION}" \
|
||||||
|
"mupdf-dev=${MUPDF_DEV_VERSION}" \
|
||||||
|
"sed=${SED_VERSION}"
|
||||||
# Copy go.mod and go.sum files
|
# Copy go.mod and go.sum files
|
||||||
COPY go.mod go.sum ./
|
COPY go.mod go.sum ./
|
||||||
|
|
||||||
|
@ -24,6 +40,18 @@ RUN CGO_ENABLED=1 go build -tags musl -o /dev/null github.com/mattn/go-sqlite3
|
||||||
# Now copy the actual source files
|
# Now copy the actual source files
|
||||||
COPY *.go .
|
COPY *.go .
|
||||||
|
|
||||||
|
# Import ARGs from top level
|
||||||
|
ARG VERSION
|
||||||
|
ARG COMMIT
|
||||||
|
ARG BUILD_DATE
|
||||||
|
|
||||||
|
# Update version information
|
||||||
|
RUN sed -i \
|
||||||
|
-e "s/devVersion/${VERSION}/" \
|
||||||
|
-e "s/devBuildDate/${BUILD_DATE}/" \
|
||||||
|
-e "s/devCommit/${COMMIT}/" \
|
||||||
|
version.go
|
||||||
|
|
||||||
# Build the binary using caching for both go modules and build cache
|
# Build the binary using caching for both go modules and build cache
|
||||||
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
|
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
|
||||||
|
|
||||||
|
@ -51,6 +79,8 @@ RUN npm run build
|
||||||
# Stage 3: Create a lightweight image with the Go binary and frontend
|
# Stage 3: Create a lightweight image with the Go binary and frontend
|
||||||
FROM alpine:latest
|
FROM alpine:latest
|
||||||
|
|
||||||
|
ENV GIN_MODE=release
|
||||||
|
|
||||||
# Install necessary runtime dependencies
|
# Install necessary runtime dependencies
|
||||||
RUN apk add --no-cache \
|
RUN apk add --no-cache \
|
||||||
ca-certificates
|
ca-certificates
|
||||||
|
|
537
README.md
537
README.md
|
@ -1,87 +1,104 @@
|
||||||
# paperless-gpt
|
# paperless-gpt
|
||||||
|
|
||||||
[](LICENSE)
|
[](LICENSE)
|
||||||
[](https://hub.docker.com/r/icereed/paperless-gpt)
|
[](https://hub.docker.com/r/icereed/paperless-gpt)
|
||||||
[](CODE_OF_CONDUCT.md)
|
[](CODE_OF_CONDUCT.md)
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
**paperless-gpt** is a tool designed to generate accurate and meaningful document titles and tags for [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx) using Large Language Models (LLMs). It supports multiple LLM providers, including **OpenAI** and **Ollama**. With paperless-gpt, you can streamline your document management by automatically suggesting appropriate titles and tags based on the content of your scanned documents.
|
**paperless-gpt** seamlessly pairs with [paperless-ngx][paperless-ngx] to generate **AI-powered document titles** and **tags**, saving you hours of manual sorting. While other tools may offer AI chat features, **paperless-gpt** stands out by **supercharging OCR with LLMs**—ensuring high accuracy, even with tricky scans. If you’re craving next-level text extraction and effortless document organization, this is your solution.
|
||||||
|
|
||||||
[](./demo.gif)
|
https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
|
|
||||||
## Features
|
---
|
||||||
|
|
||||||
- **Multiple LLM Support**: Choose between OpenAI and Ollama for generating document titles and tags.
|
## Key Highlights
|
||||||
- **Customizable Prompts**: Modify the prompt templates to suit your specific needs.
|
|
||||||
- **Easy Integration**: Works seamlessly with your existing paperless-ngx setup.
|
1. **LLM-Enhanced OCR**
|
||||||
- **User-Friendly Interface**: Intuitive web interface for reviewing and applying suggested titles and tags.
|
Harness Large Language Models (OpenAI or Ollama) for **better-than-traditional** OCR—turn messy or low-quality scans into context-aware, high-fidelity text.
|
||||||
- **Dockerized Deployment**: Simple setup using Docker and Docker Compose.
|
|
||||||
- **Automatic Document Processing**: Automatically apply generated suggestions for documents with the `paperless-gpt-auto` tag.
|
2. **Automatic Title & Tag Generation**
|
||||||
- **Experimental OCR Feature**: Send documents to a vision LLM for OCR processing.
|
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
|
||||||
|
|
||||||
|
3. **Extensive Customization**
|
||||||
|
- **Prompt Templates**: Tweak your AI prompts to reflect your domain, style, or preference.
|
||||||
|
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
|
||||||
|
|
||||||
|
4. **Simple Docker Deployment**
|
||||||
|
A few environment variables, and you’re off! Compose it alongside paperless-ngx with minimal fuss.
|
||||||
|
|
||||||
|
5. **Unified Web UI**
|
||||||
|
- **Manual Review**: Approve or tweak AI’s suggestions.
|
||||||
|
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
|
||||||
|
|
||||||
|
6. **Opt-In LLM-based OCR**
|
||||||
|
If you opt in, your images get read by a Vision LLM, pushing boundaries beyond standard OCR tools.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Table of Contents
|
## Table of Contents
|
||||||
|
- [Key Highlights](#key-highlights)
|
||||||
- [paperless-gpt](#paperless-gpt)
|
- [Getting Started](#getting-started)
|
||||||
- [Features](#features)
|
|
||||||
- [Table of Contents](#table-of-contents)
|
|
||||||
- [Getting Started](#getting-started)
|
|
||||||
- [Prerequisites](#prerequisites)
|
- [Prerequisites](#prerequisites)
|
||||||
- [Installation](#installation)
|
- [Installation](#installation)
|
||||||
- [Docker Compose](#docker-compose)
|
- [Docker Compose](#docker-compose)
|
||||||
- [Manual Setup](#manual-setup)
|
- [Manual Setup](#manual-setup)
|
||||||
- [Configuration](#configuration)
|
- [Configuration](#configuration)
|
||||||
- [Environment Variables](#environment-variables)
|
- [Environment Variables](#environment-variables)
|
||||||
- [Custom Prompt Templates](#custom-prompt-templates)
|
- [Custom Prompt Templates](#custom-prompt-templates)
|
||||||
- [Prompt Templates Directory](#prompt-templates-directory)
|
- [Prompt Templates Directory](#prompt-templates-directory)
|
||||||
- [Mounting the Prompts Directory](#mounting-the-prompts-directory)
|
- [Mounting the Prompts Directory](#mounting-the-prompts-directory)
|
||||||
- [Editing the Prompt Templates](#editing-the-prompt-templates)
|
- [Editing the Prompt Templates](#editing-the-prompt-templates)
|
||||||
- [Template Syntax and Variables](#template-syntax-and-variables)
|
- [Template Syntax and Variables](#template-syntax-and-variables)
|
||||||
- [Usage](#usage)
|
- [OCR using AI](#llm-based-ocr-compare-for-yourself)
|
||||||
- [Contributing](#contributing)
|
- [Usage](#usage)
|
||||||
- [License](#license)
|
- [Contributing](#contributing)
|
||||||
- [Star History](#star-history)
|
- [License](#license)
|
||||||
|
- [Star History](#star-history)
|
||||||
|
- [Disclaimer](#disclaimer)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
- [Docker][docker-install] installed.
|
||||||
- [Docker](https://www.docker.com/get-started) installed on your system.
|
- A running instance of [paperless-ngx][paperless-ngx].
|
||||||
- A running instance of [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx).
|
|
||||||
- Access to an LLM provider:
|
- Access to an LLM provider:
|
||||||
- **OpenAI**: An API key with access to models like `gpt-4o` or `gpt-3.5-turbo`.
|
- **OpenAI**: An API key with models like `gpt-4o` or `gpt-3.5-turbo`.
|
||||||
- **Ollama**: A running Ollama server with models like `llama2` installed.
|
- **Ollama**: A running Ollama server with models like `llama2`.
|
||||||
|
|
||||||
### Installation
|
### Installation
|
||||||
|
|
||||||
#### Docker Compose
|
#### Docker Compose
|
||||||
|
|
||||||
The easiest way to get started is by using Docker Compose. Below is an example `docker-compose.yml` file to set up paperless-gpt alongside paperless-ngx.
|
Here’s an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
version: "3.7"
|
version: "3.7"
|
||||||
services:
|
services:
|
||||||
paperless-ngx:
|
paperless-ngx:
|
||||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||||
# ... (your existing paperless-ngx configuration)
|
# ... (your existing paperless-ngx config)
|
||||||
|
|
||||||
paperless-gpt:
|
paperless-gpt:
|
||||||
image: icereed/paperless-gpt:latest
|
image: icereed/paperless-gpt:latest
|
||||||
environment:
|
environment:
|
||||||
PAPERLESS_BASE_URL: "http://paperless-ngx:8000"
|
PAPERLESS_BASE_URL: 'http://paperless-ngx:8000'
|
||||||
PAPERLESS_API_TOKEN: "your_paperless_api_token"
|
PAPERLESS_API_TOKEN: 'your_paperless_api_token'
|
||||||
PAPERLESS_PUBLIC_URL: "http://paperless.mydomain.com" # Optional, your public link to access Paperless
|
PAPERLESS_PUBLIC_URL: 'http://paperless.mydomain.com' # Optional
|
||||||
MANUAL_TAG: "paperless-gpt" # Optional, default is 'paperless-gpt'
|
MANUAL_TAG: 'paperless-gpt' # Optional, default: paperless-gpt
|
||||||
AUTO_TAG: "paperless-gpt-auto" # Optional, default is 'paperless-gpt-auto'
|
AUTO_TAG: 'paperless-gpt-auto' # Optional, default: paperless-gpt-auto
|
||||||
LLM_PROVIDER: "openai" # or 'ollama'
|
LLM_PROVIDER: 'openai' # or 'ollama'
|
||||||
LLM_MODEL: "gpt-4o" # or 'llama2'
|
LLM_MODEL: 'gpt-4o' # or 'llama2'
|
||||||
OPENAI_API_KEY: "your_openai_api_key" # Required if using OpenAI
|
OPENAI_API_KEY: 'your_openai_api_key'
|
||||||
LLM_LANGUAGE: "English" # Optional, default is 'English'
|
# Optional - OPENAI_BASE_URL: 'https://litellm.yourinstallationof.it.com/v1'
|
||||||
OLLAMA_HOST: "http://host.docker.internal:11434" # If using Ollama
|
LLM_LANGUAGE: 'English' # Optional, default: English
|
||||||
VISION_LLM_PROVIDER: "ollama" # Optional (for OCR) - ollama or openai
|
OLLAMA_HOST: 'http://host.docker.internal:11434' # If using Ollama
|
||||||
VISION_LLM_MODEL: "minicpm-v" # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai
|
VISION_LLM_PROVIDER: 'ollama' # (for OCR) - openai or ollama
|
||||||
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default is 'paperless-gpt-ocr-auto'
|
VISION_LLM_MODEL: 'minicpm-v' # (for OCR) - minicpm-v (ollama example), gpt-4o (for openai), etc.
|
||||||
LOG_LEVEL: "info" # Optional or 'debug', 'warn', 'error'
|
AUTO_OCR_TAG: 'paperless-gpt-ocr-auto' # Optional, default: paperless-gpt-ocr-auto
|
||||||
|
OCR_LIMIT_PAGES: '5' # Optional, default: 5. Set to 0 for no limit.
|
||||||
|
LOG_LEVEL: 'info' # Optional: debug, warn, error
|
||||||
volumes:
|
volumes:
|
||||||
- ./prompts:/app/prompts # Mount the prompts directory
|
- ./prompts:/app/prompts # Mount the prompts directory
|
||||||
ports:
|
ports:
|
||||||
|
@ -90,33 +107,23 @@ services:
|
||||||
- paperless-ngx
|
- paperless-ngx
|
||||||
```
|
```
|
||||||
|
|
||||||
**Note:** Replace the placeholder values with your actual configuration.
|
**Pro Tip**: Replace placeholders with real values and read the logs if something looks off.
|
||||||
|
|
||||||
#### Manual Setup
|
#### Manual Setup
|
||||||
|
1. **Clone the Repository**
|
||||||
If you prefer to run the application manually:
|
|
||||||
|
|
||||||
1. **Clone the Repository:**
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/icereed/paperless-gpt.git
|
git clone https://github.com/icereed/paperless-gpt.git
|
||||||
cd paperless-gpt
|
cd paperless-gpt
|
||||||
```
|
```
|
||||||
|
2. **Create a `prompts` Directory**
|
||||||
2. **Create a `prompts` Directory:**
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir prompts
|
mkdir prompts
|
||||||
```
|
```
|
||||||
|
3. **Build the Docker Image**
|
||||||
3. **Build the Docker Image:**
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -t paperless-gpt .
|
docker build -t paperless-gpt .
|
||||||
```
|
```
|
||||||
|
4. **Run the Container**
|
||||||
4. **Run the Container:**
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run -d \
|
docker run -d \
|
||||||
-e PAPERLESS_BASE_URL='http://your_paperless_ngx_url' \
|
-e PAPERLESS_BASE_URL='http://your_paperless_ngx_url' \
|
||||||
|
@ -128,201 +135,313 @@ If you prefer to run the application manually:
|
||||||
-e VISION_LLM_PROVIDER='ollama' \
|
-e VISION_LLM_PROVIDER='ollama' \
|
||||||
-e VISION_LLM_MODEL='minicpm-v' \
|
-e VISION_LLM_MODEL='minicpm-v' \
|
||||||
-e LOG_LEVEL='info' \
|
-e LOG_LEVEL='info' \
|
||||||
-v $(pwd)/prompts:/app/prompts \ # Mount the prompts directory
|
-v $(pwd)/prompts:/app/prompts \
|
||||||
-p 8080:8080 \
|
-p 8080:8080 \
|
||||||
paperless-gpt
|
paperless-gpt
|
||||||
```
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
||||||
| Variable | Description | Required |
|
|
||||||
| -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | -------- |
|
|
||||||
| `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes |
|
|
||||||
| `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes |
|
|
||||||
| `PAPERLESS_PUBLIC_URL` | The public URL for your Paperless instance, if it is different to your `PAPERLESS_BASE_URL` - say if you are running in Docker Compose | No |
|
|
||||||
| `MANUAL_TAG` | The tag to use for manually processing documents. Default is `paperless-gpt`. | No |
|
|
||||||
| `AUTO_TAG` | The tag to use for automatically processing documents. Default is `paperless-gpt-auto`. | No |
|
|
||||||
| `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes |
|
|
||||||
| `LLM_MODEL` | The model name to use (e.g., `gpt-4o`, `gpt-3.5-turbo`, `llama2`). | Yes |
|
|
||||||
| `OPENAI_API_KEY` | Your OpenAI API key. Required if using OpenAI as the LLM provider. | Cond. |
|
|
||||||
| `LLM_LANGUAGE` | The likely language of your documents (e.g., `English`, `German`). Default is `English`. | No |
|
|
||||||
| `OLLAMA_HOST` | The URL of the Ollama server (e.g., `http://host.docker.internal:11434`). Useful if using Ollama. Default is `http://127.0.0.1:11434`. | No |
|
|
||||||
| `VISION_LLM_PROVIDER` | The vision LLM provider to use for OCR (`openai` or `ollama`). | No |
|
|
||||||
| `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No |
|
|
||||||
| `AUTO_OCR_TAG` | The tag to use for automatically processing documents with OCR. Default is `paperless-gpt-ocr-auto`. | No |
|
|
||||||
| `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No |
|
|
||||||
| `LISTEN_INTERFACE` | The interface paperless-gpt listens to. Default is `:8080` | No |
|
|
||||||
| `WEBUI_PATH` | The path to load static content from. Default is `./web-app/dist` | No |
|
|
||||||
| `AUTO_GENERATE_TITLE` | Enable/disable title generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No |
|
|
||||||
| `AUTO_GENERATE_TAGS` | Enable/disable tag generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No |
|
|
||||||
| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`. | No |
|
|
||||||
|
|
||||||
**Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
|
**Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
|
||||||
|
=======
|
||||||
|
| Variable | Description | Required |
|
||||||
|
|------------------------|------------------------------------------------------------------------------------------------------------------|----------|
|
||||||
|
| `PAPERLESS_BASE_URL` | URL of your paperless-ngx instance (e.g. `http://paperless-ngx:8000`). | Yes |
|
||||||
|
| `PAPERLESS_API_TOKEN` | API token for paperless-ngx. Generate one in paperless-ngx admin. | Yes |
|
||||||
|
| `PAPERLESS_PUBLIC_URL` | Public URL for Paperless (if different from `PAPERLESS_BASE_URL`). | No |
|
||||||
|
| `MANUAL_TAG` | Tag for manual processing. Default: `paperless-gpt`. | No |
|
||||||
|
| `AUTO_TAG` | Tag for auto processing. Default: `paperless-gpt-auto`. | No |
|
||||||
|
| `LLM_PROVIDER` | AI backend (`openai` or `ollama`). | Yes |
|
||||||
|
| `LLM_MODEL` | AI model name, e.g. `gpt-4o`, `gpt-3.5-turbo`, `llama2`. | Yes |
|
||||||
|
| `OPENAI_API_KEY` | OpenAI API key (required if using OpenAI). | Cond. |
|
||||||
|
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No |
|
||||||
|
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). Default: `English`. | No |
|
||||||
|
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No |
|
||||||
|
| `VISION_LLM_PROVIDER` | AI backend for OCR (`openai` or `ollama`). | No |
|
||||||
|
| `VISION_LLM_MODEL` | Model name for OCR (e.g. `minicpm-v`). | No |
|
||||||
|
| `AUTO_OCR_TAG` | Tag for automatically processing docs with OCR. Default: `paperless-gpt-ocr-auto`. | No |
|
||||||
|
| `LOG_LEVEL` | Application log level (`info`, `debug`, `warn`, `error`). Default: `info`. | No |
|
||||||
|
| `LISTEN_INTERFACE` | Network interface to listen on. Default: `:8080`. | No |
|
||||||
|
| `WEBUI_PATH` | Path for static content. Default: `./web-app/dist`. | No |
|
||||||
|
| `AUTO_GENERATE_TITLE` | Generate titles automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
||||||
|
| `AUTO_GENERATE_TAGS` | Generate tags automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
||||||
|
| `AUTO_GENERATE_CORRESPONDENTS` | Generate correspondents automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
||||||
|
| `OCR_LIMIT_PAGES` | Limit the number of pages for OCR. Set to `0` for no limit. Default: `5`. | No |
|
||||||
|
| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`.
|
||||||
|
|
||||||
### Custom Prompt Templates
|
### Custom Prompt Templates
|
||||||
|
|
||||||
You can customize the prompt templates used by paperless-gpt to generate titles and tags. By default, the application uses built-in templates, but you can modify them by editing the template files.
|
paperless-gpt’s flexible **prompt templates** let you shape how AI responds:
|
||||||
|
|
||||||
#### Prompt Templates Directory
|
1. **`title_prompt.tmpl`**: For document titles.
|
||||||
|
2. **`tag_prompt.tmpl`**: For tagging logic.
|
||||||
|
3. **`ocr_prompt.tmpl`**: For LLM OCR.
|
||||||
|
|
||||||
The prompt templates are stored in the `prompts` directory inside the application. The two main template files are:
|
Mount them into your container via:
|
||||||
|
|
||||||
- `title_prompt.tmpl`: Template used for generating document titles.
|
|
||||||
- `tag_prompt.tmpl`: Template used for generating document tags.
|
|
||||||
|
|
||||||
#### Mounting the Prompts Directory
|
|
||||||
|
|
||||||
To modify the prompt templates, you need to mount a local `prompts` directory into the container.
|
|
||||||
|
|
||||||
**Docker Compose Example:**
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
services:
|
|
||||||
paperless-gpt:
|
|
||||||
image: icereed/paperless-gpt:latest
|
|
||||||
# ... (other configurations)
|
|
||||||
volumes:
|
volumes:
|
||||||
- ./prompts:/app/prompts # Mount the prompts directory
|
- ./prompts:/app/prompts
|
||||||
```
|
```
|
||||||
|
|
||||||
**Docker Run Command Example:**
|
Then tweak at will—**paperless-gpt** reloads them automatically on startup!
|
||||||
|
|
||||||
```bash
|
---
|
||||||
docker run -d \
|
|
||||||
# ... (other configurations)
|
|
||||||
-v $(pwd)/prompts:/app/prompts \
|
|
||||||
paperless-gpt
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Editing the Prompt Templates
|
|
||||||
|
|
||||||
1. **Start the Container:**
|
|
||||||
|
|
||||||
When you first start the container with the `prompts` directory mounted, it will automatically create the default template files in your local `prompts` directory if they do not exist.
|
|
||||||
|
|
||||||
2. **Edit the Template Files:**
|
|
||||||
|
|
||||||
- Open `prompts/title_prompt.tmpl` and `prompts/tag_prompt.tmpl` with your favorite text editor.
|
|
||||||
- Modify the templates using Go's `text/template` syntax.
|
|
||||||
- Save the changes.
|
|
||||||
|
|
||||||
3. **Restart the Container (if necessary):**
|
|
||||||
|
|
||||||
The application automatically reloads the templates when it starts. If the container is already running, you may need to restart it to apply the changes.
|
|
||||||
|
|
||||||
#### Template Syntax and Variables
|
|
||||||
|
|
||||||
The templates use Go's `text/template` syntax and have access to the following variables:
|
|
||||||
|
|
||||||
- **For `title_prompt.tmpl`:**
|
|
||||||
|
|
||||||
- `{{.Language}}`: The language specified in `LLM_LANGUAGE` (default is `English`).
|
|
||||||
- `{{.Content}}`: The content of the document.
|
|
||||||
|
|
||||||
- **For `tag_prompt.tmpl`:**
|
|
||||||
|
|
||||||
- `{{.Language}}`: The language specified in `LLM_LANGUAGE`.
|
|
||||||
- `{{.AvailableTags}}`: A list (array) of available tags from paperless-ngx.
|
|
||||||
- `{{.Title}}`: The suggested title for the document.
|
|
||||||
- `{{.Content}}`: The content of the document.
|
|
||||||
|
|
||||||
**Example `title_prompt.tmpl`:**
|
|
||||||
|
|
||||||
```text
|
|
||||||
I will provide you with the content of a document that has been partially read by OCR (so it may contain errors).
|
|
||||||
Your task is to find a suitable document title that I can use as the title in the paperless-ngx program.
|
|
||||||
Respond only with the title, without any additional information. The content is likely in {{.Language}}.
|
|
||||||
|
|
||||||
Be sure to add one fitting emoji at the beginning of the title to make it more visually appealing.
|
|
||||||
|
|
||||||
Content:
|
|
||||||
{{.Content}}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Example `tag_prompt.tmpl`:**
|
|
||||||
|
|
||||||
```text
|
|
||||||
I will provide you with the content and the title of a document. Your task is to select appropriate tags for the document from the list of available tags I will provide. Only select tags from the provided list. Respond only with the selected tags as a comma-separated list, without any additional information. The content is likely in {{.Language}}.
|
|
||||||
|
|
||||||
Available Tags:
|
|
||||||
{{.AvailableTags | join ","}}
|
|
||||||
|
|
||||||
Title:
|
|
||||||
{{.Title}}
|
|
||||||
|
|
||||||
Content:
|
|
||||||
{{.Content}}
|
|
||||||
|
|
||||||
Please concisely select the {{.Language}} tags from the list above that best describe the document.
|
|
||||||
Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable.
|
|
||||||
```
|
|
||||||
|
|
||||||
**Note:** Advanced users can utilize additional functions from the [Sprig](http://masterminds.github.io/sprig/) template library, as it is included in the application.
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
1. **Tag Documents in paperless-ngx:**
|
1. **Tag Documents**
|
||||||
|
- Add `paperless-gpt` or your custom tag to the docs you want to AI-ify.
|
||||||
|
|
||||||
- Add the tag `paperless-gpt` to documents you want to process. This tag is configurable via the `tagToFilter` variable in the code (default is `paperless-gpt`).
|
2. **Visit Web UI**
|
||||||
|
- Go to `http://localhost:8080` (or your host) in your browser.
|
||||||
|
|
||||||
2. **Access the paperless-gpt Interface:**
|
3. **Generate & Apply Suggestions**
|
||||||
|
- Click “Generate Suggestions” to see AI-proposed titles/tags.
|
||||||
|
- Approve, edit, or discard. Hit “Apply” to finalize in paperless-ngx.
|
||||||
|
|
||||||
- Open your browser and navigate to `http://localhost:8080`.
|
4. **Try LLM-Based OCR (Experimental)**
|
||||||
|
- If you enabled `VISION_LLM_PROVIDER` and `VISION_LLM_MODEL`, let AI-based OCR read your scanned PDFs.
|
||||||
|
- Tag those documents with `paperless-gpt-ocr-auto` (or your custom `AUTO_OCR_TAG`).
|
||||||
|
|
||||||
3. **Process Documents:**
|
**Tip**: The entire pipeline can be **fully automated** if you prefer minimal manual intervention.
|
||||||
|
|
||||||
- Click on **"Generate Suggestions"** to let the LLM generate title suggestions based on the document content.
|
---
|
||||||
|
|
||||||
4. **Review and Apply Titles and Tags:**
|
## LLM-Based OCR: Compare for Yourself
|
||||||
|
|
||||||
- Review the suggested titles. You can edit them if necessary.
|
<details>
|
||||||
- Click on **"Apply Suggestions"** to update the document titles in paperless-ngx.
|
<summary>Click to expand the vanilla OCR vs. AI-powered OCR comparison</summary>
|
||||||
|
|
||||||
5. **Experimental OCR Feature:**
|
### Example 1
|
||||||
|
|
||||||
- Send documents to a vision LLM for OCR processing.
|
**Image**:
|
||||||
- Example configuration to enable OCR with Ollama:
|
|
||||||
```env
|

|
||||||
VISION_LLM_PROVIDER=ollama
|
|
||||||
VISION_LLM_MODEL=minicpm-v
|
**Vanilla Paperless-ngx OCR**:
|
||||||
```
|
```
|
||||||
|
La Grande Recre
|
||||||
|
|
||||||
|
Gentre Gommercial 1'Esplanade
|
||||||
|
1349 LOLNAIN LA NEWWE
|
||||||
|
TA BERBOGAAL Tel =. 010 45,96 12
|
||||||
|
Ticket 1440112 03/11/2006 a 13597:
|
||||||
|
4007176614518. DINOS. TYRAMNESA
|
||||||
|
TOTAET.T.LES
|
||||||
|
ReslE par Lask-Euron
|
||||||
|
Rencu en Cash Euro
|
||||||
|
V.14.6 -Hotgese = VALERTE
|
||||||
|
TICKET A-GONGERVER PORR TONT. EEHANGE
|
||||||
|
HERET ET A BIENTOT
|
||||||
|
```
|
||||||
|
|
||||||
|
**LLM-Powered OCR (OpenAI gpt-4o)**:
|
||||||
|
```
|
||||||
|
La Grande Récré
|
||||||
|
Centre Commercial l'Esplanade
|
||||||
|
1348 LOUVAIN LA NEUVE
|
||||||
|
TVA 860826401 Tel : 010 45 95 12
|
||||||
|
Ticket 14421 le 03/11/2006 à 15:27:18
|
||||||
|
4007176614518 DINOS TYRANNOSA 14.90
|
||||||
|
TOTAL T.T.C. 14.90
|
||||||
|
Réglé par Cash Euro 50.00
|
||||||
|
Rendu en Cash Euro 35.10
|
||||||
|
V.14.6 Hôtesse : VALERIE
|
||||||
|
TICKET A CONSERVER POUR TOUT ECHANGE
|
||||||
|
MERCI ET A BIENTOT
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Example 2
|
||||||
|
|
||||||
|
**Image**:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
**Vanilla Paperless-ngx OCR**:
|
||||||
|
```
|
||||||
|
Invoice Number: 1-996-84199
|
||||||
|
|
||||||
|
Fed: Invoica Date: Sep01, 2014
|
||||||
|
Accaunt Number: 1334-8037-4
|
||||||
|
Page: 1012
|
||||||
|
|
||||||
|
Fod£x Tax ID 71.0427007
|
||||||
|
|
||||||
|
IRISINC
|
||||||
|
SHARON ANDERSON
|
||||||
|
4731 W ATLANTIC AVE STE BI
|
||||||
|
DELRAY BEACH FL 33445-3897 ’ a
|
||||||
|
Invoice Questions?
|
||||||
|
|
||||||
|
Bing, ‚Account Shipping Address: Contact FedEx Reı
|
||||||
|
|
||||||
|
ISINC
|
||||||
|
4731 W ATLANTIC AVE Phone: (800) 622-1147 M-F 7-6 (CST)
|
||||||
|
DELRAY BEACH FL 33445-3897 US Fax: (800) 548-3020
|
||||||
|
|
||||||
|
Internet: www.fedex.com
|
||||||
|
|
||||||
|
Invoice Summary Sep 01, 2014
|
||||||
|
|
||||||
|
FodEx Ground Services
|
||||||
|
Other Charges 11.00
|
||||||
|
Total Charges 11.00 Da £
|
||||||
|
>
|
||||||
|
polo) Fz// /G
|
||||||
|
TOTAL THIS INVOICE .... usps 11.00 P 2/1 f
|
||||||
|
|
||||||
|
‘The only charges accrued for this period is the Weekly Service Charge.
|
||||||
|
|
||||||
|
The Fedix Ground aceounts teferencedin his involce have been transteired and assigned 10, are owned by,andare payable to FedEx Express:
|
||||||
|
|
||||||
|
To onsurs propor credit, plasa raturn this portion wirh your payment 10 FodEx
|
||||||
|
‚Please do not staple or fold. Ploase make your chack payablı to FedEx.
|
||||||
|
|
||||||
|
[TI For change ol address, hc har and camphat lrm or never ide
|
||||||
|
|
||||||
|
Remittance Advice
|
||||||
|
Your payment is due by Sep 16, 2004
|
||||||
|
|
||||||
|
Number Number Dus
|
||||||
|
|
||||||
|
1334803719968 41993200000110071
|
||||||
|
|
||||||
|
AT 01 0391292 468448196 A**aDGT
|
||||||
|
|
||||||
|
IRISINC Illallun elalalssollallansdHilalellund
|
||||||
|
SHARON ANDERSON
|
||||||
|
|
||||||
|
4731 W ATLANTIC AVE STEBI FedEx
|
||||||
|
|
||||||
|
DELRAY BEACH FL 334453897 PO. Box 94516
|
||||||
|
|
||||||
|
PALATINE IL 60094-4515
|
||||||
|
```
|
||||||
|
|
||||||
|
**LLM-Powered OCR (OpenAI gpt-4o)**:
|
||||||
|
```
|
||||||
|
FedEx. Invoice Number: 1-996-84199
|
||||||
|
Invoice Date: Sep 01, 2014
|
||||||
|
Account Number: 1334-8037-4
|
||||||
|
Page: 1 of 2
|
||||||
|
FedEx Tax ID: 71-0427007
|
||||||
|
|
||||||
|
I R I S INC
|
||||||
|
SHARON ANDERSON
|
||||||
|
4731 W ATLANTIC AVE STE B1
|
||||||
|
DELRAY BEACH FL 33445-3897
|
||||||
|
Invoice Questions?
|
||||||
|
Billing Account Shipping Address: Contact FedEx Revenue Services
|
||||||
|
I R I S INC Phone: (800) 622-1147 M-F 7-6 (CST)
|
||||||
|
4731 W ATLANTIC AVE Fax: (800) 548-3020
|
||||||
|
DELRAY BEACH FL 33445-3897 US Internet: www.fedex.com
|
||||||
|
|
||||||
|
Invoice Summary Sep 01, 2014
|
||||||
|
|
||||||
|
FedEx Ground Services
|
||||||
|
Other Charges 11.00
|
||||||
|
|
||||||
|
Total Charges .......................................................... USD $ 11.00
|
||||||
|
|
||||||
|
TOTAL THIS INVOICE .............................................. USD $ 11.00
|
||||||
|
|
||||||
|
The only charges accrued for this period is the Weekly Service Charge.
|
||||||
|
|
||||||
|
RECEIVED
|
||||||
|
SEP _ 8 REC'D
|
||||||
|
BY: _
|
||||||
|
|
||||||
|
posted 9/21/14
|
||||||
|
|
||||||
|
The FedEx Ground accounts referenced in this invoice have been transferred and assigned to, are owned by, and are payable to FedEx Express.
|
||||||
|
|
||||||
|
To ensure proper credit, please return this portion with your payment to FedEx.
|
||||||
|
Please do not staple or fold. Please make your check payable to FedEx.
|
||||||
|
|
||||||
|
❑ For change of address, check here and complete form on reverse side.
|
||||||
|
|
||||||
|
Remittance Advice
|
||||||
|
Your payment is due by Sep 16, 2004
|
||||||
|
|
||||||
|
Invoice
|
||||||
|
Number
|
||||||
|
1-996-84199
|
||||||
|
|
||||||
|
Account
|
||||||
|
Number
|
||||||
|
1334-8037-4
|
||||||
|
|
||||||
|
Amount
|
||||||
|
Due
|
||||||
|
USD $ 11.00
|
||||||
|
|
||||||
|
133480371996841993200000110071
|
||||||
|
|
||||||
|
AT 01 031292 468448196 A**3DGT
|
||||||
|
|
||||||
|
I R I S INC
|
||||||
|
SHARON ANDERSON
|
||||||
|
4731 W ATLANTIC AVE STE B1
|
||||||
|
DELRAY BEACH FL 33445-3897
|
||||||
|
|
||||||
|
FedEx
|
||||||
|
P.O. Box 94515
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
</details>
|
||||||
|
|
||||||
|
**Why Does It Matter?**
|
||||||
|
- Traditional OCR often jumbles text from complex or low-quality scans.
|
||||||
|
- Large Language Models interpret context and correct likely errors, producing results that are more precise and readable.
|
||||||
|
- You can integrate these cleaned-up texts into your **paperless-ngx** pipeline for better tagging, searching, and archiving.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
- **Vanilla OCR** typically uses classical methods or Tesseract-like engines to extract text, which can result in garbled outputs for complex fonts or poor-quality scans.
|
||||||
|
- **LLM-Powered OCR** uses your chosen AI backend—OpenAI or Ollama—to interpret the image’s text in a more context-aware manner. This leads to fewer errors and more coherent text.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
Contributions are welcome! Please read the [contributing guidelines](CONTRIBUTING.md) before submitting a pull request.
|
**Pull requests** and **issues** are welcome!
|
||||||
|
1. Fork the repo
|
||||||
|
2. Create a branch (`feature/my-awesome-update`)
|
||||||
|
3. Commit changes (`git commit -m "Improve X"`)
|
||||||
|
4. Open a PR
|
||||||
|
|
||||||
1. **Fork the Repository**
|
Check out our [contributing guidelines](CONTRIBUTING.md) for details.
|
||||||
|
|
||||||
2. **Create a Feature Branch**
|
---
|
||||||
|
|
||||||
```bash
|
|
||||||
git checkout -b feature/my-new-feature
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Commit Your Changes**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git commit -am 'Add some feature'
|
|
||||||
```
|
|
||||||
|
|
||||||
4. **Push to the Branch**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git push origin feature/my-new-feature
|
|
||||||
```
|
|
||||||
|
|
||||||
5. **Create a Pull Request**
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
paperless-gpt is licensed under the [MIT License](LICENSE). Feel free to adapt and share!
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Star History
|
## Star History
|
||||||
|
|
||||||
[](https://star-history.com/#icereed/paperless-gpt&Date)
|
[](https://star-history.com/#icereed/paperless-gpt&Date)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
**Disclaimer:** This project is not affiliated with the official paperless-ngx project. Use at your own discretion.
|
## Disclaimer
|
||||||
|
This project is **not** officially affiliated with [paperless-ngx][paperless-ngx]. Use at your own risk.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**paperless-gpt**: The **LLM-based** companion your doc management has been waiting for. Enjoy effortless, intelligent document titles, tags, and next-level OCR.
|
||||||
|
|
||||||
|
[paperless-ngx]: https://github.com/paperless-ngx/paperless-ngx
|
||||||
|
[docker-install]: https://docs.docker.com/get-docker/
|
||||||
|
|
|
@ -119,7 +119,7 @@ func (app *App) generateSuggestionsHandler(c *gin.Context) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
results, err := app.generateDocumentSuggestions(ctx, suggestionRequest)
|
results, err := app.generateDocumentSuggestions(ctx, suggestionRequest, log.WithContext(ctx))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error processing documents: %v", err)})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error processing documents: %v", err)})
|
||||||
log.Errorf("Error processing documents: %v", err)
|
log.Errorf("Error processing documents: %v", err)
|
||||||
|
|
56
app_llm.go
56
app_llm.go
|
@ -5,9 +5,13 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"image"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
_ "image/jpeg"
|
||||||
|
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/tmc/langchaingo/llms"
|
"github.com/tmc/langchaingo/llms"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -52,7 +56,12 @@ func (app *App) getSuggestedCorrespondent(ctx context.Context, content string, s
|
||||||
}
|
}
|
||||||
|
|
||||||
// getSuggestedTags generates suggested tags for a document using the LLM
|
// getSuggestedTags generates suggested tags for a document using the LLM
|
||||||
func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedTitle string, availableTags []string) ([]string, error) {
|
func (app *App) getSuggestedTags(
|
||||||
|
ctx context.Context,
|
||||||
|
content string,
|
||||||
|
suggestedTitle string,
|
||||||
|
availableTags []string,
|
||||||
|
logger *logrus.Entry) ([]string, error) {
|
||||||
likelyLanguage := getLikelyLanguage()
|
likelyLanguage := getLikelyLanguage()
|
||||||
|
|
||||||
templateMutex.RLock()
|
templateMutex.RLock()
|
||||||
|
@ -66,11 +75,12 @@ func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedT
|
||||||
"Content": content,
|
"Content": content,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
logger.Errorf("Error executing tag template: %v", err)
|
||||||
return nil, fmt.Errorf("error executing tag template: %v", err)
|
return nil, fmt.Errorf("error executing tag template: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
prompt := promptBuffer.String()
|
prompt := promptBuffer.String()
|
||||||
log.Debugf("Tag suggestion prompt: %s", prompt)
|
logger.Debugf("Tag suggestion prompt: %s", prompt)
|
||||||
|
|
||||||
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
|
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
|
||||||
{
|
{
|
||||||
|
@ -83,6 +93,7 @@ func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedT
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
logger.Errorf("Error getting response from LLM: %v", err)
|
||||||
return nil, fmt.Errorf("error getting response from LLM: %v", err)
|
return nil, fmt.Errorf("error getting response from LLM: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,7 +117,7 @@ func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedT
|
||||||
return filteredTags, nil
|
return filteredTags, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, error) {
|
func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte, logger *logrus.Entry) (string, error) {
|
||||||
|
|
||||||
templateMutex.RLock()
|
templateMutex.RLock()
|
||||||
defer templateMutex.RUnlock()
|
defer templateMutex.RUnlock()
|
||||||
|
@ -122,15 +133,27 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro
|
||||||
|
|
||||||
prompt := promptBuffer.String()
|
prompt := promptBuffer.String()
|
||||||
|
|
||||||
|
// Log the image dimensions
|
||||||
|
img, _, err := image.Decode(bytes.NewReader(jpegBytes))
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("error decoding image: %v", err)
|
||||||
|
}
|
||||||
|
bounds := img.Bounds()
|
||||||
|
logger.Debugf("Image dimensions: %dx%d", bounds.Dx(), bounds.Dy())
|
||||||
|
|
||||||
// If not OpenAI then use binary part for image, otherwise, use the ImageURL part with encoding from https://platform.openai.com/docs/guides/vision
|
// If not OpenAI then use binary part for image, otherwise, use the ImageURL part with encoding from https://platform.openai.com/docs/guides/vision
|
||||||
var parts []llms.ContentPart
|
var parts []llms.ContentPart
|
||||||
if strings.ToLower(visionLlmProvider) != "openai" {
|
if strings.ToLower(visionLlmProvider) != "openai" {
|
||||||
|
// Log image size in kilobytes
|
||||||
|
logger.Debugf("Image size: %d KB", len(jpegBytes)/1024)
|
||||||
parts = []llms.ContentPart{
|
parts = []llms.ContentPart{
|
||||||
llms.BinaryPart("image/jpeg", jpegBytes),
|
llms.BinaryPart("image/jpeg", jpegBytes),
|
||||||
llms.TextPart(prompt),
|
llms.TextPart(prompt),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
base64Image := base64.StdEncoding.EncodeToString(jpegBytes)
|
base64Image := base64.StdEncoding.EncodeToString(jpegBytes)
|
||||||
|
// Log image size in kilobytes
|
||||||
|
logger.Debugf("Image size: %d KB", len(base64Image)/1024)
|
||||||
parts = []llms.ContentPart{
|
parts = []llms.ContentPart{
|
||||||
llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)),
|
llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)),
|
||||||
llms.TextPart(prompt),
|
llms.TextPart(prompt),
|
||||||
|
@ -154,7 +177,7 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro
|
||||||
}
|
}
|
||||||
|
|
||||||
// getSuggestedTitle generates a suggested title for a document using the LLM
|
// getSuggestedTitle generates a suggested title for a document using the LLM
|
||||||
func (app *App) getSuggestedTitle(ctx context.Context, content string) (string, error) {
|
func (app *App) getSuggestedTitle(ctx context.Context, content string, logger *logrus.Entry) (string, error) {
|
||||||
likelyLanguage := getLikelyLanguage()
|
likelyLanguage := getLikelyLanguage()
|
||||||
|
|
||||||
templateMutex.RLock()
|
templateMutex.RLock()
|
||||||
|
@ -171,7 +194,7 @@ func (app *App) getSuggestedTitle(ctx context.Context, content string) (string,
|
||||||
|
|
||||||
prompt := promptBuffer.String()
|
prompt := promptBuffer.String()
|
||||||
|
|
||||||
log.Debugf("Title suggestion prompt: %s", prompt)
|
logger.Debugf("Title suggestion prompt: %s", prompt)
|
||||||
|
|
||||||
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
|
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
|
||||||
{
|
{
|
||||||
|
@ -191,7 +214,7 @@ func (app *App) getSuggestedTitle(ctx context.Context, content string) (string,
|
||||||
}
|
}
|
||||||
|
|
||||||
// generateDocumentSuggestions generates suggestions for a set of documents
|
// generateDocumentSuggestions generates suggestions for a set of documents
|
||||||
func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionRequest GenerateSuggestionsRequest) ([]DocumentSuggestion, error) {
|
func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionRequest GenerateSuggestionsRequest, logger *logrus.Entry) ([]DocumentSuggestion, error) {
|
||||||
// Fetch all available tags from paperless-ngx
|
// Fetch all available tags from paperless-ngx
|
||||||
availableTagsMap, err := app.Client.GetAllTags(ctx)
|
availableTagsMap, err := app.Client.GetAllTags(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -231,7 +254,8 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
|
||||||
go func(doc Document) {
|
go func(doc Document) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
documentID := doc.ID
|
documentID := doc.ID
|
||||||
log.Printf("Processing Document ID %d...", documentID)
|
docLogger := documentLogger(documentID)
|
||||||
|
docLogger.Printf("Processing Document ID %d...", documentID)
|
||||||
|
|
||||||
content := doc.Content
|
content := doc.Content
|
||||||
if len(content) > 5000 {
|
if len(content) > 5000 {
|
||||||
|
@ -243,23 +267,23 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
|
||||||
var suggestedCorrespondent string
|
var suggestedCorrespondent string
|
||||||
|
|
||||||
if suggestionRequest.GenerateTitles {
|
if suggestionRequest.GenerateTitles {
|
||||||
suggestedTitle, err = app.getSuggestedTitle(ctx, content)
|
suggestedTitle, err = app.getSuggestedTitle(ctx, content, docLogger)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
|
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
log.Errorf("Error processing document %d: %v", documentID, err)
|
docLogger.Errorf("Error processing document %d: %v", documentID, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if suggestionRequest.GenerateTags {
|
if suggestionRequest.GenerateTags {
|
||||||
suggestedTags, err = app.getSuggestedTags(ctx, content, suggestedTitle, availableTagNames)
|
suggestedTags, err = app.getSuggestedTags(ctx, content, suggestedTitle, availableTagNames, docLogger)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
|
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
log.Errorf("Error generating tags for document %d: %v", documentID, err)
|
logger.Errorf("Error generating tags for document %d: %v", documentID, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -283,7 +307,7 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
|
||||||
}
|
}
|
||||||
// Titles
|
// Titles
|
||||||
if suggestionRequest.GenerateTitles {
|
if suggestionRequest.GenerateTitles {
|
||||||
log.Printf("Suggested title for document %d: %s", documentID, suggestedTitle)
|
docLogger.Printf("Suggested title for document %d: %s", documentID, suggestedTitle)
|
||||||
suggestion.SuggestedTitle = suggestedTitle
|
suggestion.SuggestedTitle = suggestedTitle
|
||||||
} else {
|
} else {
|
||||||
suggestion.SuggestedTitle = doc.Title
|
suggestion.SuggestedTitle = doc.Title
|
||||||
|
@ -291,10 +315,10 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
|
||||||
|
|
||||||
// Tags
|
// Tags
|
||||||
if suggestionRequest.GenerateTags {
|
if suggestionRequest.GenerateTags {
|
||||||
log.Printf("Suggested tags for document %d: %v", documentID, suggestedTags)
|
docLogger.Printf("Suggested tags for document %d: %v", documentID, suggestedTags)
|
||||||
suggestion.SuggestedTags = suggestedTags
|
suggestion.SuggestedTags = suggestedTags
|
||||||
} else {
|
} else {
|
||||||
suggestion.SuggestedTags = removeTagFromList(doc.Tags, manualTag)
|
suggestion.SuggestedTags = doc.Tags
|
||||||
}
|
}
|
||||||
|
|
||||||
// Correspondents
|
// Correspondents
|
||||||
|
@ -304,10 +328,12 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
|
||||||
} else {
|
} else {
|
||||||
suggestion.SuggestedCorrespondent = ""
|
suggestion.SuggestedCorrespondent = ""
|
||||||
}
|
}
|
||||||
|
// Remove manual tag from the list of suggested tags
|
||||||
|
suggestion.RemoveTags = []string{manualTag, autoTag}
|
||||||
|
|
||||||
documentSuggestions = append(documentSuggestions, suggestion)
|
documentSuggestions = append(documentSuggestions, suggestion)
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
log.Printf("Document %d processed successfully.", documentID)
|
docLogger.Printf("Document %d processed successfully.", documentID)
|
||||||
}(documents[i])
|
}(documents[i])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
BIN
demo.gif
BIN
demo.gif
Binary file not shown.
Before Width: | Height: | Size: 1.2 MiB |
BIN
demo.mp4
Normal file
BIN
demo.mp4
Normal file
Binary file not shown.
BIN
demo/ocr-example1.jpg
Normal file
BIN
demo/ocr-example1.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 30 KiB |
BIN
demo/ocr-example2.jpg
Normal file
BIN
demo/ocr-example2.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 73 KiB |
6
go.mod
6
go.mod
|
@ -2,7 +2,7 @@ module paperless-gpt
|
||||||
|
|
||||||
go 1.22.0
|
go 1.22.0
|
||||||
|
|
||||||
toolchain go1.22.2
|
toolchain go1.23.4
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/Masterminds/sprig/v3 v3.3.0
|
github.com/Masterminds/sprig/v3 v3.3.0
|
||||||
|
@ -28,6 +28,7 @@ require (
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
github.com/dlclark/regexp2 v1.10.0 // indirect
|
github.com/dlclark/regexp2 v1.10.0 // indirect
|
||||||
github.com/ebitengine/purego v0.8.0 // indirect
|
github.com/ebitengine/purego v0.8.0 // indirect
|
||||||
|
github.com/fatih/color v1.18.0 // indirect
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
|
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
|
||||||
github.com/gin-contrib/sse v0.1.0 // indirect
|
github.com/gin-contrib/sse v0.1.0 // indirect
|
||||||
github.com/go-playground/locales v0.14.1 // indirect
|
github.com/go-playground/locales v0.14.1 // indirect
|
||||||
|
@ -42,6 +43,7 @@ require (
|
||||||
github.com/jupiterrider/ffi v0.2.0 // indirect
|
github.com/jupiterrider/ffi v0.2.0 // indirect
|
||||||
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
|
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
|
||||||
github.com/leodido/go-urn v1.4.0 // indirect
|
github.com/leodido/go-urn v1.4.0 // indirect
|
||||||
|
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
github.com/mattn/go-sqlite3 v1.14.24 // indirect
|
github.com/mattn/go-sqlite3 v1.14.24 // indirect
|
||||||
github.com/mitchellh/copystructure v1.2.0 // indirect
|
github.com/mitchellh/copystructure v1.2.0 // indirect
|
||||||
|
@ -58,7 +60,7 @@ require (
|
||||||
golang.org/x/arch v0.8.0 // indirect
|
golang.org/x/arch v0.8.0 // indirect
|
||||||
golang.org/x/crypto v0.26.0 // indirect
|
golang.org/x/crypto v0.26.0 // indirect
|
||||||
golang.org/x/net v0.25.0 // indirect
|
golang.org/x/net v0.25.0 // indirect
|
||||||
golang.org/x/sys v0.23.0 // indirect
|
golang.org/x/sys v0.25.0 // indirect
|
||||||
golang.org/x/text v0.20.0 // indirect
|
golang.org/x/text v0.20.0 // indirect
|
||||||
google.golang.org/protobuf v1.34.1 // indirect
|
google.golang.org/protobuf v1.34.1 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
|
|
8
go.sum
8
go.sum
|
@ -25,6 +25,8 @@ github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq
|
||||||
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||||
github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+GvvE=
|
github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+GvvE=
|
||||||
github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
||||||
|
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
|
||||||
|
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
||||||
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
|
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
|
||||||
|
@ -70,6 +72,9 @@ github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZY
|
||||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||||
|
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||||
|
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||||
|
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
|
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
|
||||||
|
@ -155,6 +160,7 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
|
||||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
@ -162,6 +168,8 @@ golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
|
||||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
|
golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
|
||||||
golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
|
||||||
|
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
|
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
|
||||||
|
|
84
main.go
84
main.go
|
@ -6,12 +6,15 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"text/template"
|
"text/template"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/Masterminds/sprig/v3"
|
"github.com/Masterminds/sprig/v3"
|
||||||
|
"github.com/fatih/color"
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/tmc/langchaingo/llms"
|
"github.com/tmc/langchaingo/llms"
|
||||||
|
@ -27,7 +30,6 @@ var (
|
||||||
log = logrus.New()
|
log = logrus.New()
|
||||||
|
|
||||||
// Environment Variables
|
// Environment Variables
|
||||||
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
|
|
||||||
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
|
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
|
||||||
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
|
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
|
||||||
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
|
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
|
||||||
|
@ -45,6 +47,7 @@ var (
|
||||||
autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE")
|
autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE")
|
||||||
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
|
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
|
||||||
autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS")
|
autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS")
|
||||||
|
limitOcrPages int // Will be read from OCR_LIMIT_PAGES
|
||||||
|
|
||||||
// Templates
|
// Templates
|
||||||
titleTemplate *template.Template
|
titleTemplate *template.Template
|
||||||
|
@ -121,6 +124,9 @@ func main() {
|
||||||
// Initialize logrus logger
|
// Initialize logrus logger
|
||||||
initLogger()
|
initLogger()
|
||||||
|
|
||||||
|
// Print version
|
||||||
|
printVersion()
|
||||||
|
|
||||||
// Initialize PaperlessClient
|
// Initialize PaperlessClient
|
||||||
client := NewPaperlessClient(paperlessBaseURL, paperlessAPIToken)
|
client := NewPaperlessClient(paperlessBaseURL, paperlessAPIToken)
|
||||||
|
|
||||||
|
@ -263,6 +269,29 @@ func main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func printVersion() {
|
||||||
|
cyan := color.New(color.FgCyan).SprintFunc()
|
||||||
|
yellow := color.New(color.FgYellow).SprintFunc()
|
||||||
|
|
||||||
|
banner := `
|
||||||
|
╔═══════════════════════════════════════╗
|
||||||
|
║ Paperless GPT ║
|
||||||
|
╚═══════════════════════════════════════╝`
|
||||||
|
|
||||||
|
fmt.Printf("%s\n", cyan(banner))
|
||||||
|
fmt.Printf("\n%s %s\n", yellow("Version:"), version)
|
||||||
|
if commit != "" {
|
||||||
|
fmt.Printf("%s %s\n", yellow("Commit:"), commit)
|
||||||
|
}
|
||||||
|
if buildDate != "" {
|
||||||
|
fmt.Printf("%s %s\n", yellow("Build Date:"), buildDate)
|
||||||
|
}
|
||||||
|
fmt.Printf("%s %s/%s\n", yellow("Platform:"), runtime.GOOS, runtime.GOARCH)
|
||||||
|
fmt.Printf("%s %s\n", yellow("Go Version:"), runtime.Version())
|
||||||
|
fmt.Printf("%s %s\n", yellow("Started:"), time.Now().Format(time.RFC1123))
|
||||||
|
fmt.Println()
|
||||||
|
}
|
||||||
|
|
||||||
func initLogger() {
|
func initLogger() {
|
||||||
switch logLevel {
|
switch logLevel {
|
||||||
case "debug":
|
case "debug":
|
||||||
|
@ -338,6 +367,24 @@ func validateOrDefaultEnvVars() {
|
||||||
if (llmProvider == "openai" || visionLlmProvider == "openai") && openaiAPIKey == "" {
|
if (llmProvider == "openai" || visionLlmProvider == "openai") && openaiAPIKey == "" {
|
||||||
log.Fatal("Please set the OPENAI_API_KEY environment variable for OpenAI provider.")
|
log.Fatal("Please set the OPENAI_API_KEY environment variable for OpenAI provider.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if isOcrEnabled() {
|
||||||
|
rawLimitOcrPages := os.Getenv("OCR_LIMIT_PAGES")
|
||||||
|
if rawLimitOcrPages == "" {
|
||||||
|
limitOcrPages = 5
|
||||||
|
} else {
|
||||||
|
var err error
|
||||||
|
limitOcrPages, err = strconv.Atoi(rawLimitOcrPages)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Invalid OCR_LIMIT_PAGES value: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// documentLogger creates a logger with document context
|
||||||
|
func documentLogger(documentID int) *logrus.Entry {
|
||||||
|
return log.WithField("document_id", documentID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// processAutoTagDocuments handles the background auto-tagging of documents
|
// processAutoTagDocuments handles the background auto-tagging of documents
|
||||||
|
@ -356,23 +403,29 @@ func (app *App) processAutoTagDocuments() (int, error) {
|
||||||
|
|
||||||
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag)
|
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag)
|
||||||
|
|
||||||
|
for _, document := range documents {
|
||||||
|
docLogger := documentLogger(document.ID)
|
||||||
|
docLogger.Info("Processing document for auto-tagging")
|
||||||
|
|
||||||
suggestionRequest := GenerateSuggestionsRequest{
|
suggestionRequest := GenerateSuggestionsRequest{
|
||||||
Documents: documents,
|
Documents: []Document{document},
|
||||||
GenerateTitles: strings.ToLower(autoGenerateTitle) != "false",
|
GenerateTitles: strings.ToLower(autoGenerateTitle) != "false",
|
||||||
GenerateTags: strings.ToLower(autoGenerateTags) != "false",
|
GenerateTags: strings.ToLower(autoGenerateTags) != "false",
|
||||||
GenerateCorrespondents: strings.ToLower(autoGenerateCorrespondents) != "false",
|
GenerateCorrespondents: strings.ToLower(autoGenerateCorrespondents) != "false",
|
||||||
}
|
}
|
||||||
|
|
||||||
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest)
|
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("error generating suggestions: %w", err)
|
return 0, fmt.Errorf("error generating suggestions for document %d: %w", document.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
|
err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("error updating documents: %w", err)
|
return 0, fmt.Errorf("error updating document %d: %w", document.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
docLogger.Info("Successfully processed document")
|
||||||
|
}
|
||||||
return len(documents), nil
|
return len(documents), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -392,26 +445,31 @@ func (app *App) processAutoOcrTagDocuments() (int, error) {
|
||||||
|
|
||||||
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag)
|
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag)
|
||||||
|
|
||||||
documents = documents[:1] // Process only one document at a time
|
for _, document := range documents {
|
||||||
|
docLogger := documentLogger(document.ID)
|
||||||
|
docLogger.Info("Processing document for OCR")
|
||||||
|
|
||||||
ocrContent, err := app.ProcessDocumentOCR(ctx, documents[0].ID)
|
ocrContent, err := app.ProcessDocumentOCR(ctx, document.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("error processing document OCR: %w", err)
|
return 0, fmt.Errorf("error processing OCR for document %d: %w", document.ID, err)
|
||||||
}
|
}
|
||||||
log.Debugf("OCR content for document %d: %s", documents[0].ID, ocrContent)
|
docLogger.Debug("OCR processing completed")
|
||||||
|
|
||||||
err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{
|
err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{
|
||||||
{
|
{
|
||||||
ID: documents[0].ID,
|
ID: document.ID,
|
||||||
OriginalDocument: documents[0],
|
OriginalDocument: document,
|
||||||
SuggestedContent: ocrContent,
|
SuggestedContent: ocrContent,
|
||||||
|
RemoveTags: []string{autoOcrTag},
|
||||||
},
|
},
|
||||||
}, app.Database, false)
|
}, app.Database, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("error updating documents: %w", err)
|
return 0, fmt.Errorf("error updating document %d after OCR: %w", document.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1, nil // Processed one document
|
docLogger.Info("Successfully processed document OCR")
|
||||||
|
}
|
||||||
|
return 1, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// removeTagFromList removes a specific tag from a list of tags
|
// removeTagFromList removes a specific tag from a list of tags
|
||||||
|
|
27
ocr.go
27
ocr.go
|
@ -9,31 +9,42 @@ import (
|
||||||
|
|
||||||
// ProcessDocumentOCR processes a document through OCR and returns the combined text
|
// ProcessDocumentOCR processes a document through OCR and returns the combined text
|
||||||
func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string, error) {
|
func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string, error) {
|
||||||
imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, documentID)
|
docLogger := documentLogger(documentID)
|
||||||
|
docLogger.Info("Starting OCR processing")
|
||||||
|
|
||||||
|
imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, documentID, limitOcrPages)
|
||||||
defer func() {
|
defer func() {
|
||||||
for _, imagePath := range imagePaths {
|
for _, imagePath := range imagePaths {
|
||||||
os.Remove(imagePath)
|
if err := os.Remove(imagePath); err != nil {
|
||||||
|
docLogger.WithError(err).WithField("image_path", imagePath).Warn("Failed to remove temporary image file")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error downloading document images: %w", err)
|
return "", fmt.Errorf("error downloading document images for document %d: %w", documentID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
docLogger.WithField("page_count", len(imagePaths)).Debug("Downloaded document images")
|
||||||
|
|
||||||
var ocrTexts []string
|
var ocrTexts []string
|
||||||
for _, imagePath := range imagePaths {
|
for i, imagePath := range imagePaths {
|
||||||
|
pageLogger := docLogger.WithField("page", i+1)
|
||||||
|
pageLogger.Debug("Processing page")
|
||||||
|
|
||||||
imageContent, err := os.ReadFile(imagePath)
|
imageContent, err := os.ReadFile(imagePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error reading image file: %w", err)
|
return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ocrText, err := app.doOCRViaLLM(ctx, imageContent)
|
ocrText, err := app.doOCRViaLLM(ctx, imageContent, pageLogger)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error performing OCR: %w", err)
|
return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
|
||||||
}
|
}
|
||||||
log.Debugf("OCR text: %s", ocrText)
|
pageLogger.Debug("OCR completed for page")
|
||||||
|
|
||||||
ocrTexts = append(ocrTexts, ocrText)
|
ocrTexts = append(ocrTexts, ocrText)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
docLogger.Info("OCR processing completed successfully")
|
||||||
return strings.Join(ocrTexts, "\n\n"), nil
|
return strings.Join(ocrTexts, "\n\n"), nil
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 42 KiB After Width: | Height: | Size: 92 KiB |
24
paperless.go
24
paperless.go
|
@ -290,8 +290,9 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove autoTag to prevent infinite loop (even if it is in the original tags)
|
// remove autoTag to prevent infinite loop (even if it is in the original tags)
|
||||||
originalTags = removeTagFromList(originalTags, autoTag)
|
for _, tag := range document.RemoveTags {
|
||||||
originalTags = removeTagFromList(originalTags, autoOcrTag)
|
originalTags = removeTagFromList(originalTags, tag)
|
||||||
|
}
|
||||||
|
|
||||||
if len(tags) == 0 {
|
if len(tags) == 0 {
|
||||||
tags = originalTags
|
tags = originalTags
|
||||||
|
@ -300,6 +301,12 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
|
||||||
originalFields["tags"] = originalTags
|
originalFields["tags"] = originalTags
|
||||||
// remove autoTag to prevent infinite loop - this is required in case of undo
|
// remove autoTag to prevent infinite loop - this is required in case of undo
|
||||||
tags = removeTagFromList(tags, autoTag)
|
tags = removeTagFromList(tags, autoTag)
|
||||||
|
|
||||||
|
// keep previous tags
|
||||||
|
tags = append(tags, originalTags...)
|
||||||
|
// remove duplicates
|
||||||
|
slices.Sort(tags)
|
||||||
|
tags = slices.Compact(tags)
|
||||||
}
|
}
|
||||||
|
|
||||||
updatedTagsJSON, err := json.Marshal(tags)
|
updatedTagsJSON, err := json.Marshal(tags)
|
||||||
|
@ -424,7 +431,8 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
|
||||||
}
|
}
|
||||||
|
|
||||||
// DownloadDocumentAsImages downloads the PDF file of the specified document and converts it to images
|
// DownloadDocumentAsImages downloads the PDF file of the specified document and converts it to images
|
||||||
func (client *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, documentId int) ([]string, error) {
|
// If limitPages > 0, only the first N pages will be processed
|
||||||
|
func (client *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, documentId int, limitPages int) ([]string, error) {
|
||||||
// Create a directory named after the document ID
|
// Create a directory named after the document ID
|
||||||
docDir := filepath.Join(client.GetCacheFolder(), fmt.Sprintf("document-%d", documentId))
|
docDir := filepath.Join(client.GetCacheFolder(), fmt.Sprintf("document-%d", documentId))
|
||||||
if _, err := os.Stat(docDir); os.IsNotExist(err) {
|
if _, err := os.Stat(docDir); os.IsNotExist(err) {
|
||||||
|
@ -437,6 +445,9 @@ func (client *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, doc
|
||||||
// Check if images already exist
|
// Check if images already exist
|
||||||
var imagePaths []string
|
var imagePaths []string
|
||||||
for n := 0; ; n++ {
|
for n := 0; ; n++ {
|
||||||
|
if limitPages > 0 && n >= limitPages {
|
||||||
|
break
|
||||||
|
}
|
||||||
imagePath := filepath.Join(docDir, fmt.Sprintf("page%03d.jpg", n))
|
imagePath := filepath.Join(docDir, fmt.Sprintf("page%03d.jpg", n))
|
||||||
if _, err := os.Stat(imagePath); os.IsNotExist(err) {
|
if _, err := os.Stat(imagePath); os.IsNotExist(err) {
|
||||||
break
|
break
|
||||||
|
@ -485,10 +496,15 @@ func (client *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, doc
|
||||||
}
|
}
|
||||||
defer doc.Close()
|
defer doc.Close()
|
||||||
|
|
||||||
|
totalPages := doc.NumPage()
|
||||||
|
if limitPages > 0 && limitPages < totalPages {
|
||||||
|
totalPages = limitPages
|
||||||
|
}
|
||||||
|
|
||||||
var mu sync.Mutex
|
var mu sync.Mutex
|
||||||
var g errgroup.Group
|
var g errgroup.Group
|
||||||
|
|
||||||
for n := 0; n < doc.NumPage(); n++ {
|
for n := 0; n < totalPages; n++ {
|
||||||
n := n // capture loop variable
|
n := n // capture loop variable
|
||||||
g.Go(func() error {
|
g.Go(func() error {
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
|
|
|
@ -300,18 +300,24 @@ func TestUpdateDocuments(t *testing.T) {
|
||||||
OriginalDocument: Document{
|
OriginalDocument: Document{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
Title: "Old Title",
|
Title: "Old Title",
|
||||||
Tags: []string{"tag1"},
|
Tags: []string{"tag1", "tag3", "manual", "removeMe"},
|
||||||
},
|
},
|
||||||
SuggestedTitle: "New Title",
|
SuggestedTitle: "New Title",
|
||||||
SuggestedTags: []string{"tag2"},
|
SuggestedTags: []string{"tag2", "tag3"},
|
||||||
|
RemoveTags: []string{"removeMe"},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
idTag1 := 1
|
||||||
|
idTag2 := 2
|
||||||
|
idTag3 := 4
|
||||||
// Mock data for tags
|
// Mock data for tags
|
||||||
tagsResponse := map[string]interface{}{
|
tagsResponse := map[string]interface{}{
|
||||||
"results": []map[string]interface{}{
|
"results": []map[string]interface{}{
|
||||||
{"id": 1, "name": "tag1"},
|
{"id": idTag1, "name": "tag1"},
|
||||||
{"id": 2, "name": "tag2"},
|
{"id": idTag2, "name": "tag2"},
|
||||||
{"id": 3, "name": "manual"},
|
{"id": 3, "name": "manual"},
|
||||||
|
{"id": idTag3, "name": "tag3"},
|
||||||
|
{"id": 5, "name": "removeMe"},
|
||||||
},
|
},
|
||||||
"next": nil,
|
"next": nil,
|
||||||
}
|
}
|
||||||
|
@ -342,7 +348,7 @@ func TestUpdateDocuments(t *testing.T) {
|
||||||
// Expected updated fields
|
// Expected updated fields
|
||||||
expectedFields := map[string]interface{}{
|
expectedFields := map[string]interface{}{
|
||||||
"title": "New Title",
|
"title": "New Title",
|
||||||
"tags": []interface{}{float64(2)}, // tag2 ID
|
"tags": []interface{}{float64(idTag1), float64(idTag2), float64(idTag3)}, // keep also previous tags
|
||||||
}
|
}
|
||||||
|
|
||||||
assert.Equal(t, expectedFields, updatedFields)
|
assert.Equal(t, expectedFields, updatedFields)
|
||||||
|
@ -385,7 +391,7 @@ func TestDownloadDocumentAsImages(t *testing.T) {
|
||||||
})
|
})
|
||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID)
|
imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID, 0)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Verify that exatly one page was extracted
|
// Verify that exatly one page was extracted
|
||||||
|
@ -422,11 +428,11 @@ func TestDownloadDocumentAsImages_ManyPages(t *testing.T) {
|
||||||
env.client.CacheFolder = "tests/tmp"
|
env.client.CacheFolder = "tests/tmp"
|
||||||
// Clean the cache folder
|
// Clean the cache folder
|
||||||
os.RemoveAll(env.client.CacheFolder)
|
os.RemoveAll(env.client.CacheFolder)
|
||||||
imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID)
|
imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID, 50)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Verify that exatly 52 pages were extracted
|
// Verify that exatly 50 pages were extracted - the original doc contains 52 pages
|
||||||
assert.Len(t, imagePaths, 52)
|
assert.Len(t, imagePaths, 50)
|
||||||
// The path shall end with tests/tmp/document-321/page000.jpg
|
// The path shall end with tests/tmp/document-321/page000.jpg
|
||||||
for _, imagePath := range imagePaths {
|
for _, imagePath := range imagePaths {
|
||||||
_, err := os.Stat(imagePath)
|
_, err := os.Stat(imagePath)
|
||||||
|
|
|
@ -2,5 +2,19 @@
|
||||||
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
||||||
"extends": [
|
"extends": [
|
||||||
"config:recommended"
|
"config:recommended"
|
||||||
|
],
|
||||||
|
"customManagers": [
|
||||||
|
{
|
||||||
|
"customType": "regex",
|
||||||
|
"description": "Update VERSION variables in Dockerfiles",
|
||||||
|
"fileMatch": [
|
||||||
|
"^Dockerfile$"
|
||||||
|
],
|
||||||
|
"matchStrings": [
|
||||||
|
"# renovate: datasource=(?<datasource>[a-z-]+?) depName=(?<depName>.+?)(?: versioning=(?<versioning>[a-z-]+?))?\\s(?:ENV|ARG) .+?_VERSION=\"(?<currentValue>.+?)\"\\s",
|
||||||
|
"# renovate: datasource=(?<datasource>[a-z-]+?) depName=(?<depName>.+?)(?: versioning=(?<versioning>[a-z-]+?))?\\s(?:ENV|ARG) VERSION=\"(?<currentValue>.+?)\"\\s"
|
||||||
|
],
|
||||||
|
"versioningTemplate": "{{#if versioning}}{{versioning}}{{else}}semver{{/if}}"
|
||||||
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
1
types.go
1
types.go
|
@ -81,6 +81,7 @@ type DocumentSuggestion struct {
|
||||||
SuggestedTags []string `json:"suggested_tags,omitempty"`
|
SuggestedTags []string `json:"suggested_tags,omitempty"`
|
||||||
SuggestedContent string `json:"suggested_content,omitempty"`
|
SuggestedContent string `json:"suggested_content,omitempty"`
|
||||||
SuggestedCorrespondent string `json:"suggested_correspondent,omitempty"`
|
SuggestedCorrespondent string `json:"suggested_correspondent,omitempty"`
|
||||||
|
RemoveTags []string `json:"remove_tags,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Correspondent struct {
|
type Correspondent struct {
|
||||||
|
|
7
version.go
Normal file
7
version.go
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
var (
|
||||||
|
version = "devVersion"
|
||||||
|
buildDate = "devBuildDate"
|
||||||
|
commit = "devCommit"
|
||||||
|
)
|
55
web-app/package-lock.json
generated
55
web-app/package-lock.json
generated
|
@ -2889,12 +2889,16 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/lilconfig": {
|
"node_modules/lilconfig": {
|
||||||
"version": "2.1.0",
|
"version": "3.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
|
||||||
"integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==",
|
"integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=10"
|
"node": ">=14"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/antonk52"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/lines-and-columns": {
|
"node_modules/lines-and-columns": {
|
||||||
|
@ -3350,18 +3354,6 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/postcss-load-config/node_modules/lilconfig": {
|
|
||||||
"version": "3.1.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.2.tgz",
|
|
||||||
"integrity": "sha512-eop+wDAvpItUys0FWkHIKeC9ybYrTGbU41U5K7+bttZZeohvnY7M9dZ5kB21GNWiFT2q1OoPTvncPCgSOVO5ow==",
|
|
||||||
"dev": true,
|
|
||||||
"engines": {
|
|
||||||
"node": ">=14"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/antonk52"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/postcss-nested": {
|
"node_modules/postcss-nested": {
|
||||||
"version": "6.2.0",
|
"version": "6.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz",
|
||||||
|
@ -3944,33 +3936,34 @@
|
||||||
"integrity": "sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew=="
|
"integrity": "sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew=="
|
||||||
},
|
},
|
||||||
"node_modules/tailwindcss": {
|
"node_modules/tailwindcss": {
|
||||||
"version": "3.4.12",
|
"version": "3.4.17",
|
||||||
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.12.tgz",
|
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.17.tgz",
|
||||||
"integrity": "sha512-Htf/gHj2+soPb9UayUNci/Ja3d8pTmu9ONTfh4QY8r3MATTZOzmv6UYWF7ZwikEIC8okpfqmGqrmDehua8mF8w==",
|
"integrity": "sha512-w33E2aCvSDP0tW9RZuNXadXlkHXqFzSkQew/aIa2i/Sj8fThxwovwlXHSPXTbAHwEIhBFXAedUhP2tueAKP8Og==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@alloc/quick-lru": "^5.2.0",
|
"@alloc/quick-lru": "^5.2.0",
|
||||||
"arg": "^5.0.2",
|
"arg": "^5.0.2",
|
||||||
"chokidar": "^3.5.3",
|
"chokidar": "^3.6.0",
|
||||||
"didyoumean": "^1.2.2",
|
"didyoumean": "^1.2.2",
|
||||||
"dlv": "^1.1.3",
|
"dlv": "^1.1.3",
|
||||||
"fast-glob": "^3.3.0",
|
"fast-glob": "^3.3.2",
|
||||||
"glob-parent": "^6.0.2",
|
"glob-parent": "^6.0.2",
|
||||||
"is-glob": "^4.0.3",
|
"is-glob": "^4.0.3",
|
||||||
"jiti": "^1.21.0",
|
"jiti": "^1.21.6",
|
||||||
"lilconfig": "^2.1.0",
|
"lilconfig": "^3.1.3",
|
||||||
"micromatch": "^4.0.5",
|
"micromatch": "^4.0.8",
|
||||||
"normalize-path": "^3.0.0",
|
"normalize-path": "^3.0.0",
|
||||||
"object-hash": "^3.0.0",
|
"object-hash": "^3.0.0",
|
||||||
"picocolors": "^1.0.0",
|
"picocolors": "^1.1.1",
|
||||||
"postcss": "^8.4.23",
|
"postcss": "^8.4.47",
|
||||||
"postcss-import": "^15.1.0",
|
"postcss-import": "^15.1.0",
|
||||||
"postcss-js": "^4.0.1",
|
"postcss-js": "^4.0.1",
|
||||||
"postcss-load-config": "^4.0.1",
|
"postcss-load-config": "^4.0.2",
|
||||||
"postcss-nested": "^6.0.1",
|
"postcss-nested": "^6.2.0",
|
||||||
"postcss-selector-parser": "^6.0.11",
|
"postcss-selector-parser": "^6.1.2",
|
||||||
"resolve": "^1.22.2",
|
"resolve": "^1.22.8",
|
||||||
"sucrase": "^3.32.0"
|
"sucrase": "^3.35.0"
|
||||||
},
|
},
|
||||||
"bin": {
|
"bin": {
|
||||||
"tailwind": "lib/cli.js",
|
"tailwind": "lib/cli.js",
|
||||||
|
|
Loading…
Reference in a new issue