mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-13 13:18:02 -05:00
Merge branch 'main' into arm-v7
This commit is contained in:
commit
a4bc82f71f
19 changed files with 726 additions and 337 deletions
10
.github/workflows/docker-build-and-push.yml
vendored
10
.github/workflows/docker-build-and-push.yml
vendored
|
@ -15,10 +15,10 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v4
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.22
|
||||
|
||||
|
@ -63,7 +63,7 @@ jobs:
|
|||
platform: [linux/amd64, linux/arm64, linux/arm/v7]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
@ -99,3 +99,7 @@ jobs:
|
|||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
tags: ${{ env.TAGS }}
|
||||
build-args: |
|
||||
VERSION=${{ github.ref_type == 'tag' && github.ref_name || github.sha }}
|
||||
COMMIT=${{ github.sha }}
|
||||
BUILD_DATE=${{ github.event.repository.pushed_at }}
|
||||
|
|
46
Dockerfile
46
Dockerfile
|
@ -1,17 +1,33 @@
|
|||
# Define top-level build arguments
|
||||
ARG VERSION=docker-dev
|
||||
ARG COMMIT=unknown
|
||||
ARG BUILD_DATE=unknown
|
||||
|
||||
# Stage 1: Build the Go binary
|
||||
FROM golang:1.22-alpine AS builder
|
||||
FROM golang:1.23.4-alpine3.21 AS builder
|
||||
|
||||
# Set the working directory inside the container
|
||||
WORKDIR /app
|
||||
|
||||
# Install necessary packages
|
||||
RUN apk add --no-cache \
|
||||
git \
|
||||
gcc \
|
||||
musl-dev \
|
||||
mupdf \
|
||||
mupdf-dev
|
||||
# Package versions for Renovate
|
||||
# renovate: datasource=repology depName=alpine_3_21/gcc versioning=loose
|
||||
ENV GCC_VERSION=14.2.0-r4
|
||||
# renovate: datasource=repology depName=alpine_3_21/musl-dev versioning=loose
|
||||
ENV MUSL_DEV_VERSION=1.2.5-r8
|
||||
# renovate: datasource=repology depName=alpine_3_21/mupdf versioning=loose
|
||||
ENV MUPDF_VERSION=1.24.10-r0
|
||||
# renovate: datasource=repology depName=alpine_3_21/mupdf-dev versioning=loose
|
||||
ENV MUPDF_DEV_VERSION=1.24.10-r0
|
||||
# renovate: datasource=repology depName=alpine_3_21/sed versioning=loose
|
||||
ENV SED_VERSION=4.9-r2
|
||||
|
||||
# Install necessary packages with pinned versions
|
||||
RUN apk add --no-cache \
|
||||
"gcc=${GCC_VERSION}" \
|
||||
"musl-dev=${MUSL_DEV_VERSION}" \
|
||||
"mupdf=${MUPDF_VERSION}" \
|
||||
"mupdf-dev=${MUPDF_DEV_VERSION}" \
|
||||
"sed=${SED_VERSION}"
|
||||
# Copy go.mod and go.sum files
|
||||
COPY go.mod go.sum ./
|
||||
|
||||
|
@ -24,6 +40,18 @@ RUN CGO_ENABLED=1 go build -tags musl -o /dev/null github.com/mattn/go-sqlite3
|
|||
# Now copy the actual source files
|
||||
COPY *.go .
|
||||
|
||||
# Import ARGs from top level
|
||||
ARG VERSION
|
||||
ARG COMMIT
|
||||
ARG BUILD_DATE
|
||||
|
||||
# Update version information
|
||||
RUN sed -i \
|
||||
-e "s/devVersion/${VERSION}/" \
|
||||
-e "s/devBuildDate/${BUILD_DATE}/" \
|
||||
-e "s/devCommit/${COMMIT}/" \
|
||||
version.go
|
||||
|
||||
# Build the binary using caching for both go modules and build cache
|
||||
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
|
||||
|
||||
|
@ -51,6 +79,8 @@ RUN npm run build
|
|||
# Stage 3: Create a lightweight image with the Go binary and frontend
|
||||
FROM alpine:latest
|
||||
|
||||
ENV GIN_MODE=release
|
||||
|
||||
# Install necessary runtime dependencies
|
||||
RUN apk add --no-cache \
|
||||
ca-certificates
|
||||
|
|
498
README.md
498
README.md
|
@ -1,30 +1,42 @@
|
|||
# paperless-gpt
|
||||
|
||||
[](LICENSE)
|
||||
[](https://hub.docker.com/r/icereed/paperless-gpt)
|
||||
[](CODE_OF_CONDUCT.md)
|
||||
|
||||

|
||||
|
||||
**paperless-gpt** is a tool designed to generate accurate and meaningful document titles and tags for [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx) using Large Language Models (LLMs). It supports multiple LLM providers, including **OpenAI** and **Ollama**. With paperless-gpt, you can streamline your document management by automatically suggesting appropriate titles and tags based on the content of your scanned documents.
|
||||
**paperless-gpt** seamlessly pairs with [paperless-ngx][paperless-ngx] to generate **AI-powered document titles** and **tags**, saving you hours of manual sorting. While other tools may offer AI chat features, **paperless-gpt** stands out by **supercharging OCR with LLMs**—ensuring high accuracy, even with tricky scans. If you’re craving next-level text extraction and effortless document organization, this is your solution.
|
||||
|
||||
[](./demo.gif)
|
||||
https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||
|
||||
## Features
|
||||
---
|
||||
|
||||
- **Multiple LLM Support**: Choose between OpenAI and Ollama for generating document titles and tags.
|
||||
- **Customizable Prompts**: Modify the prompt templates to suit your specific needs.
|
||||
- **Easy Integration**: Works seamlessly with your existing paperless-ngx setup.
|
||||
- **User-Friendly Interface**: Intuitive web interface for reviewing and applying suggested titles and tags.
|
||||
- **Dockerized Deployment**: Simple setup using Docker and Docker Compose.
|
||||
- **Automatic Document Processing**: Automatically apply generated suggestions for documents with the `paperless-gpt-auto` tag.
|
||||
- **Experimental OCR Feature**: Send documents to a vision LLM for OCR processing.
|
||||
## Key Highlights
|
||||
|
||||
1. **LLM-Enhanced OCR**
|
||||
Harness Large Language Models (OpenAI or Ollama) for **better-than-traditional** OCR—turn messy or low-quality scans into context-aware, high-fidelity text.
|
||||
|
||||
2. **Automatic Title & Tag Generation**
|
||||
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
|
||||
|
||||
3. **Extensive Customization**
|
||||
- **Prompt Templates**: Tweak your AI prompts to reflect your domain, style, or preference.
|
||||
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
|
||||
|
||||
4. **Simple Docker Deployment**
|
||||
A few environment variables, and you’re off! Compose it alongside paperless-ngx with minimal fuss.
|
||||
|
||||
5. **Unified Web UI**
|
||||
- **Manual Review**: Approve or tweak AI’s suggestions.
|
||||
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
|
||||
|
||||
6. **Opt-In LLM-based OCR**
|
||||
If you opt in, your images get read by a Vision LLM, pushing boundaries beyond standard OCR tools.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [paperless-gpt](#paperless-gpt)
|
||||
- [Features](#features)
|
||||
- [Table of Contents](#table-of-contents)
|
||||
- [Key Highlights](#key-highlights)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Installation](#installation)
|
||||
|
@ -37,48 +49,56 @@
|
|||
- [Mounting the Prompts Directory](#mounting-the-prompts-directory)
|
||||
- [Editing the Prompt Templates](#editing-the-prompt-templates)
|
||||
- [Template Syntax and Variables](#template-syntax-and-variables)
|
||||
- [OCR using AI](#llm-based-ocr-compare-for-yourself)
|
||||
- [Usage](#usage)
|
||||
- [Contributing](#contributing)
|
||||
- [License](#license)
|
||||
- [Star History](#star-history)
|
||||
- [Disclaimer](#disclaimer)
|
||||
|
||||
---
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- [Docker](https://www.docker.com/get-started) installed on your system.
|
||||
- A running instance of [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx).
|
||||
- [Docker][docker-install] installed.
|
||||
- A running instance of [paperless-ngx][paperless-ngx].
|
||||
- Access to an LLM provider:
|
||||
- **OpenAI**: An API key with access to models like `gpt-4o` or `gpt-3.5-turbo`.
|
||||
- **Ollama**: A running Ollama server with models like `llama2` installed.
|
||||
- **OpenAI**: An API key with models like `gpt-4o` or `gpt-3.5-turbo`.
|
||||
- **Ollama**: A running Ollama server with models like `llama2`.
|
||||
|
||||
### Installation
|
||||
|
||||
#### Docker Compose
|
||||
|
||||
The easiest way to get started is by using Docker Compose. Below is an example `docker-compose.yml` file to set up paperless-gpt alongside paperless-ngx.
|
||||
Here’s an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
|
||||
|
||||
```yaml
|
||||
version: '3.7'
|
||||
services:
|
||||
paperless-ngx:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
# ... (your existing paperless-ngx configuration)
|
||||
# ... (your existing paperless-ngx config)
|
||||
|
||||
paperless-gpt:
|
||||
image: icereed/paperless-gpt:latest
|
||||
environment:
|
||||
PAPERLESS_BASE_URL: 'http://paperless-ngx:8000'
|
||||
PAPERLESS_API_TOKEN: 'your_paperless_api_token'
|
||||
PAPERLESS_PUBLIC_URL: 'http://paperless.mydomain.com' # Optional, your public link to access Paperless
|
||||
PAPERLESS_PUBLIC_URL: 'http://paperless.mydomain.com' # Optional
|
||||
MANUAL_TAG: 'paperless-gpt' # Optional, default: paperless-gpt
|
||||
AUTO_TAG: 'paperless-gpt-auto' # Optional, default: paperless-gpt-auto
|
||||
LLM_PROVIDER: 'openai' # or 'ollama'
|
||||
LLM_MODEL: 'gpt-4o' # or 'llama2'
|
||||
OPENAI_API_KEY: 'your_openai_api_key' # Required if using OpenAI
|
||||
LLM_LANGUAGE: 'English' # Optional, default is 'English'
|
||||
OPENAI_API_KEY: 'your_openai_api_key'
|
||||
# Optional - OPENAI_BASE_URL: 'https://litellm.yourinstallationof.it.com/v1'
|
||||
LLM_LANGUAGE: 'English' # Optional, default: English
|
||||
OLLAMA_HOST: 'http://host.docker.internal:11434' # If using Ollama
|
||||
VISION_LLM_PROVIDER: 'ollama' # Optional (for OCR) - ollama or openai
|
||||
VISION_LLM_MODEL: 'minicpm-v' # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai
|
||||
LOG_LEVEL: 'info' # Optional or 'debug', 'warn', 'error'
|
||||
VISION_LLM_PROVIDER: 'ollama' # (for OCR) - openai or ollama
|
||||
VISION_LLM_MODEL: 'minicpm-v' # (for OCR) - minicpm-v (ollama example), gpt-4o (for openai), etc.
|
||||
AUTO_OCR_TAG: 'paperless-gpt-ocr-auto' # Optional, default: paperless-gpt-ocr-auto
|
||||
OCR_LIMIT_PAGES: '5' # Optional, default: 5. Set to 0 for no limit.
|
||||
LOG_LEVEL: 'info' # Optional: debug, warn, error
|
||||
volumes:
|
||||
- ./prompts:/app/prompts # Mount the prompts directory
|
||||
ports:
|
||||
|
@ -87,33 +107,23 @@ services:
|
|||
- paperless-ngx
|
||||
```
|
||||
|
||||
**Note:** Replace the placeholder values with your actual configuration.
|
||||
**Pro Tip**: Replace placeholders with real values and read the logs if something looks off.
|
||||
|
||||
#### Manual Setup
|
||||
|
||||
If you prefer to run the application manually:
|
||||
|
||||
1. **Clone the Repository:**
|
||||
|
||||
1. **Clone the Repository**
|
||||
```bash
|
||||
git clone https://github.com/icereed/paperless-gpt.git
|
||||
cd paperless-gpt
|
||||
```
|
||||
|
||||
2. **Create a `prompts` Directory:**
|
||||
|
||||
2. **Create a `prompts` Directory**
|
||||
```bash
|
||||
mkdir prompts
|
||||
```
|
||||
|
||||
3. **Build the Docker Image:**
|
||||
|
||||
3. **Build the Docker Image**
|
||||
```bash
|
||||
docker build -t paperless-gpt .
|
||||
```
|
||||
|
||||
4. **Run the Container:**
|
||||
|
||||
4. **Run the Container**
|
||||
```bash
|
||||
docker run -d \
|
||||
-e PAPERLESS_BASE_URL='http://your_paperless_ngx_url' \
|
||||
|
@ -125,193 +135,309 @@ If you prefer to run the application manually:
|
|||
-e VISION_LLM_PROVIDER='ollama' \
|
||||
-e VISION_LLM_MODEL='minicpm-v' \
|
||||
-e LOG_LEVEL='info' \
|
||||
-v $(pwd)/prompts:/app/prompts \ # Mount the prompts directory
|
||||
-v $(pwd)/prompts:/app/prompts \
|
||||
-p 8080:8080 \
|
||||
paperless-gpt
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Description | Required |
|
||||
|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
|
||||
| `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes |
|
||||
| `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes |
|
||||
| `PAPERLESS_PUBLIC_URL` | The public URL for your Paperless instance, if it is different to your `PAPERLESS_BASE_URL` - say if you are running in Docker Compose | No |
|
||||
| `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes |
|
||||
| `LLM_MODEL` | The model name to use (e.g., `gpt-4o`, `gpt-3.5-turbo`, `llama2`). | Yes |
|
||||
| `OPENAI_API_KEY` | Your OpenAI API key. Required if using OpenAI as the LLM provider. | Cond. |
|
||||
| `LLM_LANGUAGE` | The likely language of your documents (e.g., `English`, `German`). Default is `English`. | No |
|
||||
| `OLLAMA_HOST` | The URL of the Ollama server (e.g., `http://host.docker.internal:11434`). Useful if using Ollama. Default is `http://127.0.0.1:11434`. | No |
|
||||
| `VISION_LLM_PROVIDER` | The vision LLM provider to use for OCR (`openai` or `ollama`). | No |
|
||||
| `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No |
|
||||
| `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No |
|
||||
|
||||
**Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
|
||||
|------------------------|------------------------------------------------------------------------------------------------------------------|----------|
|
||||
| `PAPERLESS_BASE_URL` | URL of your paperless-ngx instance (e.g. `http://paperless-ngx:8000`). | Yes |
|
||||
| `PAPERLESS_API_TOKEN` | API token for paperless-ngx. Generate one in paperless-ngx admin. | Yes |
|
||||
| `PAPERLESS_PUBLIC_URL` | Public URL for Paperless (if different from `PAPERLESS_BASE_URL`). | No |
|
||||
| `MANUAL_TAG` | Tag for manual processing. Default: `paperless-gpt`. | No |
|
||||
| `AUTO_TAG` | Tag for auto processing. Default: `paperless-gpt-auto`. | No |
|
||||
| `LLM_PROVIDER` | AI backend (`openai` or `ollama`). | Yes |
|
||||
| `LLM_MODEL` | AI model name, e.g. `gpt-4o`, `gpt-3.5-turbo`, `llama2`. | Yes |
|
||||
| `OPENAI_API_KEY` | OpenAI API key (required if using OpenAI). | Cond. |
|
||||
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No |
|
||||
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). Default: `English`. | No |
|
||||
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No |
|
||||
| `VISION_LLM_PROVIDER` | AI backend for OCR (`openai` or `ollama`). | No |
|
||||
| `VISION_LLM_MODEL` | Model name for OCR (e.g. `minicpm-v`). | No |
|
||||
| `AUTO_OCR_TAG` | Tag for automatically processing docs with OCR. Default: `paperless-gpt-ocr-auto`. | No |
|
||||
| `LOG_LEVEL` | Application log level (`info`, `debug`, `warn`, `error`). Default: `info`. | No |
|
||||
| `LISTEN_INTERFACE` | Network interface to listen on. Default: `:8080`. | No |
|
||||
| `WEBUI_PATH` | Path for static content. Default: `./web-app/dist`. | No |
|
||||
| `AUTO_GENERATE_TITLE` | Generate titles automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
||||
| `AUTO_GENERATE_TAGS` | Generate tags automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
||||
| `OCR_LIMIT_PAGES` | Limit the number of pages for OCR. Set to `0` for no limit. Default: `5`. | No |
|
||||
|
||||
### Custom Prompt Templates
|
||||
|
||||
You can customize the prompt templates used by paperless-gpt to generate titles and tags. By default, the application uses built-in templates, but you can modify them by editing the template files.
|
||||
paperless-gpt’s flexible **prompt templates** let you shape how AI responds:
|
||||
|
||||
#### Prompt Templates Directory
|
||||
1. **`title_prompt.tmpl`**: For document titles.
|
||||
2. **`tag_prompt.tmpl`**: For tagging logic.
|
||||
3. **`ocr_prompt.tmpl`**: For LLM OCR.
|
||||
|
||||
The prompt templates are stored in the `prompts` directory inside the application. The two main template files are:
|
||||
|
||||
- `title_prompt.tmpl`: Template used for generating document titles.
|
||||
- `tag_prompt.tmpl`: Template used for generating document tags.
|
||||
|
||||
#### Mounting the Prompts Directory
|
||||
|
||||
To modify the prompt templates, you need to mount a local `prompts` directory into the container.
|
||||
|
||||
**Docker Compose Example:**
|
||||
Mount them into your container via:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
paperless-gpt:
|
||||
image: icereed/paperless-gpt:latest
|
||||
# ... (other configurations)
|
||||
volumes:
|
||||
- ./prompts:/app/prompts # Mount the prompts directory
|
||||
- ./prompts:/app/prompts
|
||||
```
|
||||
|
||||
**Docker Run Command Example:**
|
||||
Then tweak at will—**paperless-gpt** reloads them automatically on startup!
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
# ... (other configurations)
|
||||
-v $(pwd)/prompts:/app/prompts \
|
||||
paperless-gpt
|
||||
```
|
||||
|
||||
#### Editing the Prompt Templates
|
||||
|
||||
1. **Start the Container:**
|
||||
|
||||
When you first start the container with the `prompts` directory mounted, it will automatically create the default template files in your local `prompts` directory if they do not exist.
|
||||
|
||||
2. **Edit the Template Files:**
|
||||
|
||||
- Open `prompts/title_prompt.tmpl` and `prompts/tag_prompt.tmpl` with your favorite text editor.
|
||||
- Modify the templates using Go's `text/template` syntax.
|
||||
- Save the changes.
|
||||
|
||||
3. **Restart the Container (if necessary):**
|
||||
|
||||
The application automatically reloads the templates when it starts. If the container is already running, you may need to restart it to apply the changes.
|
||||
|
||||
#### Template Syntax and Variables
|
||||
|
||||
The templates use Go's `text/template` syntax and have access to the following variables:
|
||||
|
||||
- **For `title_prompt.tmpl`:**
|
||||
|
||||
- `{{.Language}}`: The language specified in `LLM_LANGUAGE` (default is `English`).
|
||||
- `{{.Content}}`: The content of the document.
|
||||
|
||||
- **For `tag_prompt.tmpl`:**
|
||||
|
||||
- `{{.Language}}`: The language specified in `LLM_LANGUAGE`.
|
||||
- `{{.AvailableTags}}`: A list (array) of available tags from paperless-ngx.
|
||||
- `{{.Title}}`: The suggested title for the document.
|
||||
- `{{.Content}}`: The content of the document.
|
||||
|
||||
**Example `title_prompt.tmpl`:**
|
||||
|
||||
```text
|
||||
I will provide you with the content of a document that has been partially read by OCR (so it may contain errors).
|
||||
Your task is to find a suitable document title that I can use as the title in the paperless-ngx program.
|
||||
Respond only with the title, without any additional information. The content is likely in {{.Language}}.
|
||||
|
||||
Be sure to add one fitting emoji at the beginning of the title to make it more visually appealing.
|
||||
|
||||
Content:
|
||||
{{.Content}}
|
||||
```
|
||||
|
||||
**Example `tag_prompt.tmpl`:**
|
||||
|
||||
```text
|
||||
I will provide you with the content and the title of a document. Your task is to select appropriate tags for the document from the list of available tags I will provide. Only select tags from the provided list. Respond only with the selected tags as a comma-separated list, without any additional information. The content is likely in {{.Language}}.
|
||||
|
||||
Available Tags:
|
||||
{{.AvailableTags | join ","}}
|
||||
|
||||
Title:
|
||||
{{.Title}}
|
||||
|
||||
Content:
|
||||
{{.Content}}
|
||||
|
||||
Please concisely select the {{.Language}} tags from the list above that best describe the document.
|
||||
Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable.
|
||||
```
|
||||
|
||||
**Note:** Advanced users can utilize additional functions from the [Sprig](http://masterminds.github.io/sprig/) template library, as it is included in the application.
|
||||
---
|
||||
|
||||
## Usage
|
||||
|
||||
1. **Tag Documents in paperless-ngx:**
|
||||
1. **Tag Documents**
|
||||
- Add `paperless-gpt` or your custom tag to the docs you want to AI-ify.
|
||||
|
||||
- Add the tag `paperless-gpt` to documents you want to process. This tag is configurable via the `tagToFilter` variable in the code (default is `paperless-gpt`).
|
||||
2. **Visit Web UI**
|
||||
- Go to `http://localhost:8080` (or your host) in your browser.
|
||||
|
||||
2. **Access the paperless-gpt Interface:**
|
||||
3. **Generate & Apply Suggestions**
|
||||
- Click “Generate Suggestions” to see AI-proposed titles/tags.
|
||||
- Approve, edit, or discard. Hit “Apply” to finalize in paperless-ngx.
|
||||
|
||||
- Open your browser and navigate to `http://localhost:8080`.
|
||||
4. **Try LLM-Based OCR (Experimental)**
|
||||
- If you enabled `VISION_LLM_PROVIDER` and `VISION_LLM_MODEL`, let AI-based OCR read your scanned PDFs.
|
||||
- Tag those documents with `paperless-gpt-ocr-auto` (or your custom `AUTO_OCR_TAG`).
|
||||
|
||||
3. **Process Documents:**
|
||||
**Tip**: The entire pipeline can be **fully automated** if you prefer minimal manual intervention.
|
||||
|
||||
- Click on **"Generate Suggestions"** to let the LLM generate title suggestions based on the document content.
|
||||
---
|
||||
|
||||
4. **Review and Apply Titles and Tags:**
|
||||
## LLM-Based OCR: Compare for Yourself
|
||||
|
||||
- Review the suggested titles. You can edit them if necessary.
|
||||
- Click on **"Apply Suggestions"** to update the document titles in paperless-ngx.
|
||||
<details>
|
||||
<summary>Click to expand the vanilla OCR vs. AI-powered OCR comparison</summary>
|
||||
|
||||
5. **Experimental OCR Feature:**
|
||||
### Example 1
|
||||
|
||||
- Send documents to a vision LLM for OCR processing.
|
||||
- Example configuration to enable OCR with Ollama:
|
||||
```env
|
||||
VISION_LLM_PROVIDER=ollama
|
||||
VISION_LLM_MODEL=minicpm-v
|
||||
**Image**:
|
||||
|
||||

|
||||
|
||||
**Vanilla Paperless-ngx OCR**:
|
||||
```
|
||||
La Grande Recre
|
||||
|
||||
Gentre Gommercial 1'Esplanade
|
||||
1349 LOLNAIN LA NEWWE
|
||||
TA BERBOGAAL Tel =. 010 45,96 12
|
||||
Ticket 1440112 03/11/2006 a 13597:
|
||||
4007176614518. DINOS. TYRAMNESA
|
||||
TOTAET.T.LES
|
||||
ReslE par Lask-Euron
|
||||
Rencu en Cash Euro
|
||||
V.14.6 -Hotgese = VALERTE
|
||||
TICKET A-GONGERVER PORR TONT. EEHANGE
|
||||
HERET ET A BIENTOT
|
||||
```
|
||||
|
||||
**LLM-Powered OCR (OpenAI gpt-4o)**:
|
||||
```
|
||||
La Grande Récré
|
||||
Centre Commercial l'Esplanade
|
||||
1348 LOUVAIN LA NEUVE
|
||||
TVA 860826401 Tel : 010 45 95 12
|
||||
Ticket 14421 le 03/11/2006 à 15:27:18
|
||||
4007176614518 DINOS TYRANNOSA 14.90
|
||||
TOTAL T.T.C. 14.90
|
||||
Réglé par Cash Euro 50.00
|
||||
Rendu en Cash Euro 35.10
|
||||
V.14.6 Hôtesse : VALERIE
|
||||
TICKET A CONSERVER POUR TOUT ECHANGE
|
||||
MERCI ET A BIENTOT
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 2
|
||||
|
||||
**Image**:
|
||||
|
||||

|
||||
|
||||
**Vanilla Paperless-ngx OCR**:
|
||||
```
|
||||
Invoice Number: 1-996-84199
|
||||
|
||||
Fed: Invoica Date: Sep01, 2014
|
||||
Accaunt Number: 1334-8037-4
|
||||
Page: 1012
|
||||
|
||||
Fod£x Tax ID 71.0427007
|
||||
|
||||
IRISINC
|
||||
SHARON ANDERSON
|
||||
4731 W ATLANTIC AVE STE BI
|
||||
DELRAY BEACH FL 33445-3897 ’ a
|
||||
Invoice Questions?
|
||||
|
||||
Bing, ‚Account Shipping Address: Contact FedEx Reı
|
||||
|
||||
ISINC
|
||||
4731 W ATLANTIC AVE Phone: (800) 622-1147 M-F 7-6 (CST)
|
||||
DELRAY BEACH FL 33445-3897 US Fax: (800) 548-3020
|
||||
|
||||
Internet: www.fedex.com
|
||||
|
||||
Invoice Summary Sep 01, 2014
|
||||
|
||||
FodEx Ground Services
|
||||
Other Charges 11.00
|
||||
Total Charges 11.00 Da £
|
||||
>
|
||||
polo) Fz// /G
|
||||
TOTAL THIS INVOICE .... usps 11.00 P 2/1 f
|
||||
|
||||
‘The only charges accrued for this period is the Weekly Service Charge.
|
||||
|
||||
The Fedix Ground aceounts teferencedin his involce have been transteired and assigned 10, are owned by,andare payable to FedEx Express:
|
||||
|
||||
To onsurs propor credit, plasa raturn this portion wirh your payment 10 FodEx
|
||||
‚Please do not staple or fold. Ploase make your chack payablı to FedEx.
|
||||
|
||||
[TI For change ol address, hc har and camphat lrm or never ide
|
||||
|
||||
Remittance Advice
|
||||
Your payment is due by Sep 16, 2004
|
||||
|
||||
Number Number Dus
|
||||
|
||||
1334803719968 41993200000110071
|
||||
|
||||
AT 01 0391292 468448196 A**aDGT
|
||||
|
||||
IRISINC Illallun elalalssollallansdHilalellund
|
||||
SHARON ANDERSON
|
||||
|
||||
4731 W ATLANTIC AVE STEBI FedEx
|
||||
|
||||
DELRAY BEACH FL 334453897 PO. Box 94516
|
||||
|
||||
PALATINE IL 60094-4515
|
||||
```
|
||||
|
||||
**LLM-Powered OCR (OpenAI gpt-4o)**:
|
||||
```
|
||||
FedEx. Invoice Number: 1-996-84199
|
||||
Invoice Date: Sep 01, 2014
|
||||
Account Number: 1334-8037-4
|
||||
Page: 1 of 2
|
||||
FedEx Tax ID: 71-0427007
|
||||
|
||||
I R I S INC
|
||||
SHARON ANDERSON
|
||||
4731 W ATLANTIC AVE STE B1
|
||||
DELRAY BEACH FL 33445-3897
|
||||
Invoice Questions?
|
||||
Billing Account Shipping Address: Contact FedEx Revenue Services
|
||||
I R I S INC Phone: (800) 622-1147 M-F 7-6 (CST)
|
||||
4731 W ATLANTIC AVE Fax: (800) 548-3020
|
||||
DELRAY BEACH FL 33445-3897 US Internet: www.fedex.com
|
||||
|
||||
Invoice Summary Sep 01, 2014
|
||||
|
||||
FedEx Ground Services
|
||||
Other Charges 11.00
|
||||
|
||||
Total Charges .......................................................... USD $ 11.00
|
||||
|
||||
TOTAL THIS INVOICE .............................................. USD $ 11.00
|
||||
|
||||
The only charges accrued for this period is the Weekly Service Charge.
|
||||
|
||||
RECEIVED
|
||||
SEP _ 8 REC'D
|
||||
BY: _
|
||||
|
||||
posted 9/21/14
|
||||
|
||||
The FedEx Ground accounts referenced in this invoice have been transferred and assigned to, are owned by, and are payable to FedEx Express.
|
||||
|
||||
To ensure proper credit, please return this portion with your payment to FedEx.
|
||||
Please do not staple or fold. Please make your check payable to FedEx.
|
||||
|
||||
❑ For change of address, check here and complete form on reverse side.
|
||||
|
||||
Remittance Advice
|
||||
Your payment is due by Sep 16, 2004
|
||||
|
||||
Invoice
|
||||
Number
|
||||
1-996-84199
|
||||
|
||||
Account
|
||||
Number
|
||||
1334-8037-4
|
||||
|
||||
Amount
|
||||
Due
|
||||
USD $ 11.00
|
||||
|
||||
133480371996841993200000110071
|
||||
|
||||
AT 01 031292 468448196 A**3DGT
|
||||
|
||||
I R I S INC
|
||||
SHARON ANDERSON
|
||||
4731 W ATLANTIC AVE STE B1
|
||||
DELRAY BEACH FL 33445-3897
|
||||
|
||||
FedEx
|
||||
P.O. Box 94515
|
||||
```
|
||||
|
||||
---
|
||||
</details>
|
||||
|
||||
**Why Does It Matter?**
|
||||
- Traditional OCR often jumbles text from complex or low-quality scans.
|
||||
- Large Language Models interpret context and correct likely errors, producing results that are more precise and readable.
|
||||
- You can integrate these cleaned-up texts into your **paperless-ngx** pipeline for better tagging, searching, and archiving.
|
||||
|
||||
|
||||
|
||||
|
||||
### How It Works
|
||||
|
||||
- **Vanilla OCR** typically uses classical methods or Tesseract-like engines to extract text, which can result in garbled outputs for complex fonts or poor-quality scans.
|
||||
- **LLM-Powered OCR** uses your chosen AI backend—OpenAI or Ollama—to interpret the image’s text in a more context-aware manner. This leads to fewer errors and more coherent text.
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please read the [contributing guidelines](CONTRIBUTING.md) before submitting a pull request.
|
||||
**Pull requests** and **issues** are welcome!
|
||||
1. Fork the repo
|
||||
2. Create a branch (`feature/my-awesome-update`)
|
||||
3. Commit changes (`git commit -m "Improve X"`)
|
||||
4. Open a PR
|
||||
|
||||
1. **Fork the Repository**
|
||||
Check out our [contributing guidelines](CONTRIBUTING.md) for details.
|
||||
|
||||
2. **Create a Feature Branch**
|
||||
|
||||
```bash
|
||||
git checkout -b feature/my-new-feature
|
||||
```
|
||||
|
||||
3. **Commit Your Changes**
|
||||
|
||||
```bash
|
||||
git commit -am 'Add some feature'
|
||||
```
|
||||
|
||||
4. **Push to the Branch**
|
||||
|
||||
```bash
|
||||
git push origin feature/my-new-feature
|
||||
```
|
||||
|
||||
5. **Create a Pull Request**
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
||||
paperless-gpt is licensed under the [MIT License](LICENSE). Feel free to adapt and share!
|
||||
|
||||
---
|
||||
|
||||
## Star History
|
||||
|
||||
[](https://star-history.com/#icereed/paperless-gpt&Date)
|
||||
|
||||
---
|
||||
|
||||
**Disclaimer:** This project is not affiliated with the official paperless-ngx project. Use at your own discretion.
|
||||
## Disclaimer
|
||||
This project is **not** officially affiliated with [paperless-ngx][paperless-ngx]. Use at your own risk.
|
||||
|
||||
---
|
||||
|
||||
**paperless-gpt**: The **LLM-based** companion your doc management has been waiting for. Enjoy effortless, intelligent document titles, tags, and next-level OCR.
|
||||
|
||||
[paperless-ngx]: https://github.com/paperless-ngx/paperless-ngx
|
||||
[docker-install]: https://docs.docker.com/get-docker/
|
||||
|
|
15
app_llm.go
15
app_llm.go
|
@ -5,9 +5,12 @@ import (
|
|||
"context"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"image"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
_ "image/jpeg"
|
||||
|
||||
"github.com/tmc/langchaingo/llms"
|
||||
)
|
||||
|
||||
|
@ -82,15 +85,27 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro
|
|||
|
||||
prompt := promptBuffer.String()
|
||||
|
||||
// Log the image dimensions
|
||||
img, _, err := image.Decode(bytes.NewReader(jpegBytes))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error decoding image: %v", err)
|
||||
}
|
||||
bounds := img.Bounds()
|
||||
log.Debugf("Image dimensions: %dx%d", bounds.Dx(), bounds.Dy())
|
||||
|
||||
// If not OpenAI then use binary part for image, otherwise, use the ImageURL part with encoding from https://platform.openai.com/docs/guides/vision
|
||||
var parts []llms.ContentPart
|
||||
if strings.ToLower(visionLlmProvider) != "openai" {
|
||||
// Log image size in kilobytes
|
||||
log.Debugf("Image size: %d KB", len(jpegBytes)/1024)
|
||||
parts = []llms.ContentPart{
|
||||
llms.BinaryPart("image/jpeg", jpegBytes),
|
||||
llms.TextPart(prompt),
|
||||
}
|
||||
} else {
|
||||
base64Image := base64.StdEncoding.EncodeToString(jpegBytes)
|
||||
// Log image size in kilobytes
|
||||
log.Debugf("Image size: %d KB", len(base64Image)/1024)
|
||||
parts = []llms.ContentPart{
|
||||
llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)),
|
||||
llms.TextPart(prompt),
|
||||
|
|
BIN
demo.gif
BIN
demo.gif
Binary file not shown.
Before Width: | Height: | Size: 1.2 MiB |
BIN
demo.mp4
Normal file
BIN
demo.mp4
Normal file
Binary file not shown.
BIN
demo/ocr-example1.jpg
Normal file
BIN
demo/ocr-example1.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 30 KiB |
BIN
demo/ocr-example2.jpg
Normal file
BIN
demo/ocr-example2.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 73 KiB |
27
go.mod
27
go.mod
|
@ -2,24 +2,25 @@ module paperless-gpt
|
|||
|
||||
go 1.22.0
|
||||
|
||||
toolchain go1.22.2
|
||||
toolchain go1.23.4
|
||||
|
||||
require (
|
||||
github.com/Masterminds/sprig/v3 v3.2.3
|
||||
github.com/Masterminds/sprig/v3 v3.3.0
|
||||
github.com/gen2brain/go-fitz v1.24.14
|
||||
github.com/gin-gonic/gin v1.10.0
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/stretchr/testify v1.9.0
|
||||
github.com/stretchr/testify v1.10.0
|
||||
github.com/tmc/langchaingo v0.1.12
|
||||
golang.org/x/sync v0.9.0
|
||||
golang.org/x/sync v0.10.0
|
||||
gorm.io/driver/sqlite v1.5.6
|
||||
gorm.io/gorm v1.25.12
|
||||
)
|
||||
|
||||
require (
|
||||
dario.cat/mergo v1.0.1 // indirect
|
||||
github.com/Masterminds/goutils v1.1.1 // indirect
|
||||
github.com/Masterminds/semver/v3 v3.2.0 // indirect
|
||||
github.com/Masterminds/semver/v3 v3.3.0 // indirect
|
||||
github.com/bytedance/sonic v1.11.6 // indirect
|
||||
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||
|
@ -27,13 +28,14 @@ require (
|
|||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/dlclark/regexp2 v1.10.0 // indirect
|
||||
github.com/ebitengine/purego v0.8.0 // indirect
|
||||
github.com/fatih/color v1.18.0 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
|
||||
github.com/gin-contrib/sse v0.1.0 // indirect
|
||||
github.com/go-playground/locales v0.14.1 // indirect
|
||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
||||
github.com/goccy/go-json v0.10.2 // indirect
|
||||
github.com/huandu/xstrings v1.3.3 // indirect
|
||||
github.com/huandu/xstrings v1.5.0 // indirect
|
||||
github.com/imdario/mergo v0.3.13 // indirect
|
||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||
github.com/jinzhu/now v1.1.5 // indirect
|
||||
|
@ -41,23 +43,24 @@ require (
|
|||
github.com/jupiterrider/ffi v0.2.0 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
|
||||
github.com/leodido/go-urn v1.4.0 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-sqlite3 v1.14.24 // indirect
|
||||
github.com/mitchellh/copystructure v1.0.0 // indirect
|
||||
github.com/mitchellh/reflectwalk v1.0.0 // indirect
|
||||
github.com/mitchellh/copystructure v1.2.0 // indirect
|
||||
github.com/mitchellh/reflectwalk v1.0.2 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
|
||||
github.com/pkoukk/tiktoken-go v0.1.6 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/shopspring/decimal v1.2.0 // indirect
|
||||
github.com/spf13/cast v1.3.1 // indirect
|
||||
github.com/shopspring/decimal v1.4.0 // indirect
|
||||
github.com/spf13/cast v1.7.0 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||
golang.org/x/arch v0.8.0 // indirect
|
||||
golang.org/x/crypto v0.23.0 // indirect
|
||||
golang.org/x/crypto v0.26.0 // indirect
|
||||
golang.org/x/net v0.25.0 // indirect
|
||||
golang.org/x/sys v0.20.0 // indirect
|
||||
golang.org/x/sys v0.25.0 // indirect
|
||||
golang.org/x/text v0.20.0 // indirect
|
||||
google.golang.org/protobuf v1.34.1 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
|
|
32
go.sum
32
go.sum
|
@ -1,9 +1,15 @@
|
|||
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
|
||||
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
|
||||
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
|
||||
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
|
||||
github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
|
||||
github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
|
||||
github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
|
||||
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
|
||||
github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
|
||||
github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
|
||||
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
|
||||
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
|
||||
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
|
||||
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
||||
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
||||
|
@ -19,6 +25,8 @@ github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq
|
|||
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+GvvE=
|
||||
github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
||||
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
|
||||
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
|
||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
||||
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
|
||||
|
@ -45,6 +53,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
|||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
|
||||
github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
|
||||
github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk=
|
||||
github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg=
|
||||
|
@ -62,14 +72,21 @@ github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZY
|
|||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
|
||||
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
|
||||
github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
|
||||
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
|
||||
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
|
||||
github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
|
||||
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
||||
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
|
||||
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
|
@ -83,10 +100,14 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
|
|||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
|
||||
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
|
||||
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
||||
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
|
||||
github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
|
||||
github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
|
||||
github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
|
@ -101,6 +122,8 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
|
|||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/tmc/langchaingo v0.1.12 h1:yXwSu54f3b1IKw0jJ5/DWu+qFVH1NBblwC0xddBzGJE=
|
||||
github.com/tmc/langchaingo v0.1.12/go.mod h1:cd62xD6h+ouk8k/QQFhOsjRYBSA1JJ5UVKXSIgm7Ni4=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||
|
@ -116,6 +139,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
|
|||
golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
|
||||
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
|
||||
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
|
||||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
|
@ -127,17 +152,24 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
|
|||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
||||
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
|
||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
|
||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
|
||||
golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
|
||||
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
|
||||
|
|
33
jobs.go
33
jobs.go
|
@ -2,10 +2,8 @@ package main
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
|
@ -125,38 +123,13 @@ func processJob(app *App, job *Job) {
|
|||
|
||||
ctx := context.Background()
|
||||
|
||||
// Download images of the document
|
||||
imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, job.DocumentID)
|
||||
fullOcrText, err := app.ProcessDocumentOCR(ctx, job.DocumentID)
|
||||
if err != nil {
|
||||
logger.Infof("Error downloading document images for job %s: %v", job.ID, err)
|
||||
jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error downloading document images: %v", err))
|
||||
logger.Errorf("Error processing document OCR for job %s: %v", job.ID, err)
|
||||
jobStore.updateJobStatus(job.ID, "failed", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
var ocrTexts []string
|
||||
for i, imagePath := range imagePaths {
|
||||
imageContent, err := os.ReadFile(imagePath)
|
||||
if err != nil {
|
||||
logger.Errorf("Error reading image file for job %s: %v", job.ID, err)
|
||||
jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error reading image file: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
ocrText, err := app.doOCRViaLLM(ctx, imageContent)
|
||||
if err != nil {
|
||||
logger.Errorf("Error performing OCR for job %s: %v", job.ID, err)
|
||||
jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error performing OCR: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
ocrTexts = append(ocrTexts, ocrText)
|
||||
jobStore.updatePagesDone(job.ID, i+1) // Update PagesDone after each page is processed
|
||||
}
|
||||
|
||||
// Combine the OCR texts
|
||||
fullOcrText := strings.Join(ocrTexts, "\n\n")
|
||||
|
||||
// Update job status and result
|
||||
jobStore.updateJobStatus(job.ID, "completed", fullOcrText)
|
||||
logger.Infof("Job completed: %s", job.ID)
|
||||
}
|
||||
|
|
161
main.go
161
main.go
|
@ -6,12 +6,15 @@ import (
|
|||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/Masterminds/sprig/v3"
|
||||
"github.com/fatih/color"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/tmc/langchaingo/llms"
|
||||
|
@ -30,13 +33,20 @@ var (
|
|||
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
|
||||
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
|
||||
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
|
||||
manualTag = "paperless-gpt"
|
||||
autoTag = "paperless-gpt-auto"
|
||||
manualTag = os.Getenv("MANUAL_TAG")
|
||||
autoTag = os.Getenv("AUTO_TAG")
|
||||
manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet
|
||||
autoOcrTag = os.Getenv("AUTO_OCR_TAG")
|
||||
llmProvider = os.Getenv("LLM_PROVIDER")
|
||||
llmModel = os.Getenv("LLM_MODEL")
|
||||
visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER")
|
||||
visionLlmModel = os.Getenv("VISION_LLM_MODEL")
|
||||
logLevel = strings.ToLower(os.Getenv("LOG_LEVEL"))
|
||||
listenInterface = os.Getenv("LISTEN_INTERFACE")
|
||||
webuiPath = os.Getenv("WEBUI_PATH")
|
||||
autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE")
|
||||
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
|
||||
limitOcrPages int // Will be read from OCR_LIMIT_PAGES
|
||||
|
||||
// Templates
|
||||
titleTemplate *template.Template
|
||||
|
@ -68,7 +78,7 @@ Please concisely select the {{.Language}} tags from the list above that best des
|
|||
Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable.
|
||||
`
|
||||
|
||||
defaultOcrPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format.`
|
||||
defaultOcrPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format but without a code block.`
|
||||
)
|
||||
|
||||
// App struct to hold dependencies
|
||||
|
@ -81,11 +91,14 @@ type App struct {
|
|||
|
||||
func main() {
|
||||
// Validate Environment Variables
|
||||
validateEnvVars()
|
||||
validateOrDefaultEnvVars()
|
||||
|
||||
// Initialize logrus logger
|
||||
initLogger()
|
||||
|
||||
// Print version
|
||||
printVersion()
|
||||
|
||||
// Initialize PaperlessClient
|
||||
client := NewPaperlessClient(paperlessBaseURL, paperlessAPIToken)
|
||||
|
||||
|
@ -123,7 +136,23 @@ func main() {
|
|||
|
||||
backoffDuration := minBackoffDuration
|
||||
for {
|
||||
processedCount, err := app.processAutoTagDocuments()
|
||||
processedCount, err := func() (int, error) {
|
||||
count := 0
|
||||
if isOcrEnabled() {
|
||||
ocrCount, err := app.processAutoOcrTagDocuments()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error in processAutoOcrTagDocuments: %w", err)
|
||||
}
|
||||
count += ocrCount
|
||||
}
|
||||
autoCount, err := app.processAutoTagDocuments()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error in processAutoTagDocuments: %w", err)
|
||||
}
|
||||
count += autoCount
|
||||
return count, nil
|
||||
}()
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Error in processAutoTagDocuments: %v", err)
|
||||
time.Sleep(backoffDuration)
|
||||
|
@ -187,25 +216,54 @@ func main() {
|
|||
})
|
||||
}
|
||||
|
||||
if webuiPath == "" {
|
||||
webuiPath = "./web-app/dist"
|
||||
}
|
||||
// Serve static files for the frontend under /assets
|
||||
router.StaticFS("/assets", gin.Dir("./web-app/dist/assets", true))
|
||||
router.StaticFile("/vite.svg", "./web-app/dist/vite.svg")
|
||||
router.StaticFS("/assets", gin.Dir(webuiPath+"/assets", true))
|
||||
router.StaticFile("/vite.svg", webuiPath+"/vite.svg")
|
||||
|
||||
// Catch-all route for serving the frontend
|
||||
router.NoRoute(func(c *gin.Context) {
|
||||
c.File("./web-app/dist/index.html")
|
||||
c.File(webuiPath + "/index.html")
|
||||
})
|
||||
|
||||
// Start OCR worker pool
|
||||
numWorkers := 1 // Number of workers to start
|
||||
startWorkerPool(app, numWorkers)
|
||||
|
||||
log.Infoln("Server started on port :8080")
|
||||
if err := router.Run(":8080"); err != nil {
|
||||
if listenInterface == "" {
|
||||
listenInterface = ":8080"
|
||||
}
|
||||
log.Infoln("Server started on interface", listenInterface)
|
||||
if err := router.Run(listenInterface); err != nil {
|
||||
log.Fatalf("Failed to run server: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func printVersion() {
|
||||
cyan := color.New(color.FgCyan).SprintFunc()
|
||||
yellow := color.New(color.FgYellow).SprintFunc()
|
||||
|
||||
banner := `
|
||||
╔═══════════════════════════════════════╗
|
||||
║ Paperless GPT ║
|
||||
╚═══════════════════════════════════════╝`
|
||||
|
||||
fmt.Printf("%s\n", cyan(banner))
|
||||
fmt.Printf("\n%s %s\n", yellow("Version:"), version)
|
||||
if commit != "" {
|
||||
fmt.Printf("%s %s\n", yellow("Commit:"), commit)
|
||||
}
|
||||
if buildDate != "" {
|
||||
fmt.Printf("%s %s\n", yellow("Build Date:"), buildDate)
|
||||
}
|
||||
fmt.Printf("%s %s/%s\n", yellow("Platform:"), runtime.GOOS, runtime.GOARCH)
|
||||
fmt.Printf("%s %s\n", yellow("Go Version:"), runtime.Version())
|
||||
fmt.Printf("%s %s\n", yellow("Started:"), time.Now().Format(time.RFC1123))
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
func initLogger() {
|
||||
switch logLevel {
|
||||
case "debug":
|
||||
|
@ -232,8 +290,32 @@ func isOcrEnabled() bool {
|
|||
return visionLlmModel != "" && visionLlmProvider != ""
|
||||
}
|
||||
|
||||
// validateEnvVars ensures all necessary environment variables are set
|
||||
func validateEnvVars() {
|
||||
// validateOrDefaultEnvVars ensures all necessary environment variables are set
|
||||
func validateOrDefaultEnvVars() {
|
||||
if manualTag == "" {
|
||||
manualTag = "paperless-gpt"
|
||||
}
|
||||
fmt.Printf("Using %s as manual tag\n", manualTag)
|
||||
|
||||
if autoTag == "" {
|
||||
autoTag = "paperless-gpt-auto"
|
||||
}
|
||||
fmt.Printf("Using %s as auto tag\n", autoTag)
|
||||
|
||||
if manualOcrTag == "" {
|
||||
manualOcrTag = "paperless-gpt-ocr"
|
||||
}
|
||||
if isOcrEnabled() {
|
||||
fmt.Printf("Using %s as manual OCR tag\n", manualOcrTag)
|
||||
}
|
||||
|
||||
if autoOcrTag == "" {
|
||||
autoOcrTag = "paperless-gpt-ocr-auto"
|
||||
}
|
||||
if isOcrEnabled() {
|
||||
fmt.Printf("Using %s as auto OCR tag\n", autoOcrTag)
|
||||
}
|
||||
|
||||
if paperlessBaseURL == "" {
|
||||
log.Fatal("Please set the PAPERLESS_BASE_URL environment variable.")
|
||||
}
|
||||
|
@ -257,6 +339,19 @@ func validateEnvVars() {
|
|||
if (llmProvider == "openai" || visionLlmProvider == "openai") && openaiAPIKey == "" {
|
||||
log.Fatal("Please set the OPENAI_API_KEY environment variable for OpenAI provider.")
|
||||
}
|
||||
|
||||
if isOcrEnabled() {
|
||||
rawLimitOcrPages := os.Getenv("OCR_LIMIT_PAGES")
|
||||
if rawLimitOcrPages == "" {
|
||||
limitOcrPages = 5
|
||||
} else {
|
||||
var err error
|
||||
limitOcrPages, err = strconv.Atoi(rawLimitOcrPages)
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid OCR_LIMIT_PAGES value: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// processAutoTagDocuments handles the background auto-tagging of documents
|
||||
|
@ -279,8 +374,8 @@ func (app *App) processAutoTagDocuments() (int, error) {
|
|||
|
||||
suggestionRequest := GenerateSuggestionsRequest{
|
||||
Documents: documents,
|
||||
GenerateTitles: true,
|
||||
GenerateTags: true,
|
||||
GenerateTitles: strings.ToLower(autoGenerateTitle) != "false",
|
||||
GenerateTags: strings.ToLower(autoGenerateTags) != "false",
|
||||
}
|
||||
|
||||
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest)
|
||||
|
@ -296,6 +391,44 @@ func (app *App) processAutoTagDocuments() (int, error) {
|
|||
return len(documents), nil
|
||||
}
|
||||
|
||||
// processAutoOcrTagDocuments handles the background auto-tagging of OCR documents
|
||||
func (app *App) processAutoOcrTagDocuments() (int, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
documents, err := app.Client.GetDocumentsByTags(ctx, []string{autoOcrTag})
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error fetching documents with autoOcrTag: %w", err)
|
||||
}
|
||||
|
||||
if len(documents) == 0 {
|
||||
log.Debugf("No documents with tag %s found", autoOcrTag)
|
||||
return 0, nil // No documents to process
|
||||
}
|
||||
|
||||
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag)
|
||||
|
||||
documents = documents[:1] // Process only one document at a time
|
||||
|
||||
ocrContent, err := app.ProcessDocumentOCR(ctx, documents[0].ID)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error processing document OCR: %w", err)
|
||||
}
|
||||
log.Debugf("OCR content for document %d: %s", documents[0].ID, ocrContent)
|
||||
|
||||
err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{
|
||||
{
|
||||
ID: documents[0].ID,
|
||||
OriginalDocument: documents[0],
|
||||
SuggestedContent: ocrContent,
|
||||
},
|
||||
}, app.Database, false)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error updating documents: %w", err)
|
||||
}
|
||||
|
||||
return 1, nil // Processed one document
|
||||
}
|
||||
|
||||
// removeTagFromList removes a specific tag from a list of tags
|
||||
func removeTagFromList(tags []string, tagToRemove string) []string {
|
||||
filteredTags := []string{}
|
||||
|
|
39
ocr.go
Normal file
39
ocr.go
Normal file
|
@ -0,0 +1,39 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ProcessDocumentOCR processes a document through OCR and returns the combined text
|
||||
func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string, error) {
|
||||
imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, documentID, limitOcrPages)
|
||||
defer func() {
|
||||
for _, imagePath := range imagePaths {
|
||||
os.Remove(imagePath)
|
||||
}
|
||||
}()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error downloading document images: %w", err)
|
||||
}
|
||||
|
||||
var ocrTexts []string
|
||||
for _, imagePath := range imagePaths {
|
||||
imageContent, err := os.ReadFile(imagePath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error reading image file: %w", err)
|
||||
}
|
||||
|
||||
ocrText, err := app.doOCRViaLLM(ctx, imageContent)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error performing OCR: %w", err)
|
||||
}
|
||||
log.Debugf("OCR text: %s", ocrText)
|
||||
|
||||
ocrTexts = append(ocrTexts, ocrText)
|
||||
}
|
||||
|
||||
return strings.Join(ocrTexts, "\n\n"), nil
|
||||
}
|
Binary file not shown.
Before Width: | Height: | Size: 42 KiB After Width: | Height: | Size: 92 KiB |
14
paperless.go
14
paperless.go
|
@ -273,6 +273,7 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum
|
|||
|
||||
// remove autoTag to prevent infinite loop (even if it is in the original tags)
|
||||
originalTags = removeTagFromList(originalTags, autoTag)
|
||||
originalTags = removeTagFromList(originalTags, autoOcrTag)
|
||||
|
||||
if len(tags) == 0 {
|
||||
tags = originalTags
|
||||
|
@ -390,7 +391,8 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum
|
|||
}
|
||||
|
||||
// DownloadDocumentAsImages downloads the PDF file of the specified document and converts it to images
|
||||
func (c *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, documentId int) ([]string, error) {
|
||||
// If limitPages > 0, only the first N pages will be processed
|
||||
func (c *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, documentId int, limitPages int) ([]string, error) {
|
||||
// Create a directory named after the document ID
|
||||
docDir := filepath.Join(c.GetCacheFolder(), fmt.Sprintf("/document-%d", documentId))
|
||||
if _, err := os.Stat(docDir); os.IsNotExist(err) {
|
||||
|
@ -403,6 +405,9 @@ func (c *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, document
|
|||
// Check if images already exist
|
||||
var imagePaths []string
|
||||
for n := 0; ; n++ {
|
||||
if limitPages > 0 && n >= limitPages {
|
||||
break
|
||||
}
|
||||
imagePath := filepath.Join(docDir, fmt.Sprintf("page%03d.jpg", n))
|
||||
if _, err := os.Stat(imagePath); os.IsNotExist(err) {
|
||||
break
|
||||
|
@ -451,10 +456,15 @@ func (c *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, document
|
|||
}
|
||||
defer doc.Close()
|
||||
|
||||
totalPages := doc.NumPage()
|
||||
if limitPages > 0 && limitPages < totalPages {
|
||||
totalPages = limitPages
|
||||
}
|
||||
|
||||
var mu sync.Mutex
|
||||
var g errgroup.Group
|
||||
|
||||
for n := 0; n < doc.NumPage(); n++ {
|
||||
for n := 0; n < totalPages; n++ {
|
||||
n := n // capture loop variable
|
||||
g.Go(func() error {
|
||||
mu.Lock()
|
||||
|
|
|
@ -385,7 +385,7 @@ func TestDownloadDocumentAsImages(t *testing.T) {
|
|||
})
|
||||
|
||||
ctx := context.Background()
|
||||
imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID)
|
||||
imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID, 0)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify that exatly one page was extracted
|
||||
|
@ -422,11 +422,11 @@ func TestDownloadDocumentAsImages_ManyPages(t *testing.T) {
|
|||
env.client.CacheFolder = "tests/tmp"
|
||||
// Clean the cache folder
|
||||
os.RemoveAll(env.client.CacheFolder)
|
||||
imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID)
|
||||
imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID, 50)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify that exatly 52 pages were extracted
|
||||
assert.Len(t, imagePaths, 52)
|
||||
// Verify that exatly 50 pages were extracted - the original doc contains 52 pages
|
||||
assert.Len(t, imagePaths, 50)
|
||||
// The path shall end with tests/tmp/document-321/page000.jpg
|
||||
for _, imagePath := range imagePaths {
|
||||
_, err := os.Stat(imagePath)
|
||||
|
|
17
renovate.json
Normal file
17
renovate.json
Normal file
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
||||
"extends": [
|
||||
"config:recommended"
|
||||
],
|
||||
"regexManagers": [
|
||||
{
|
||||
"description": "Update VERSION variables in Dockerfiles",
|
||||
"fileMatch": ["^Dockerfile$"],
|
||||
"matchStrings": [
|
||||
"# renovate: datasource=(?<datasource>[a-z-]+?) depName=(?<depName>.+?)(?: versioning=(?<versioning>[a-z-]+?))?\\s(?:ENV|ARG) .+?_VERSION=\"(?<currentValue>.+?)\"\\s",
|
||||
"# renovate: datasource=(?<datasource>[a-z-]+?) depName=(?<depName>.+?)(?: versioning=(?<versioning>[a-z-]+?))?\\s(?:ENV|ARG) VERSION=\"(?<currentValue>.+?)\"\\s"
|
||||
],
|
||||
"versioningTemplate": "{{#if versioning}}{{versioning}}{{else}}semver{{/if}}"
|
||||
}
|
||||
]
|
||||
}
|
7
version.go
Normal file
7
version.go
Normal file
|
@ -0,0 +1,7 @@
|
|||
package main
|
||||
|
||||
var (
|
||||
version = "devVersion"
|
||||
buildDate = "devBuildDate"
|
||||
commit = "devCommit"
|
||||
)
|
114
web-app/package-lock.json
generated
114
web-app/package-lock.json
generated
|
@ -1351,10 +1351,11 @@
|
|||
"dev": true
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "22.10.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz",
|
||||
"integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==",
|
||||
"version": "22.10.5",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.5.tgz",
|
||||
"integrity": "sha512-F8Q+SeGimwOo86fiovQh8qiXfFEh2/ocYv7tU5pJ3EXMSSxk1Joj5wefpFK2fHTf/N6HKGSxIDBT9f3gCxXPkQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~6.20.0"
|
||||
}
|
||||
|
@ -1608,15 +1609,16 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@vitejs/plugin-react-swc": {
|
||||
"version": "3.7.0",
|
||||
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react-swc/-/plugin-react-swc-3.7.0.tgz",
|
||||
"integrity": "sha512-yrknSb3Dci6svCd/qhHqhFPDSw0QtjumcqdKMoNNzmOl5lMXTTiqzjWtG4Qask2HdvvzaNgSunbQGet8/GrKdA==",
|
||||
"version": "3.7.2",
|
||||
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react-swc/-/plugin-react-swc-3.7.2.tgz",
|
||||
"integrity": "sha512-y0byko2b2tSVVf5Gpng1eEhX1OvPC7x8yns1Fx8jDzlJp4LS6CMkCPfLw47cjyoMrshQDoQw4qcgjsU9VvlCew==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@swc/core": "^1.5.7"
|
||||
"@swc/core": "^1.7.26"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"vite": "^4 || ^5"
|
||||
"vite": "^4 || ^5 || ^6"
|
||||
}
|
||||
},
|
||||
"node_modules/acorn": {
|
||||
|
@ -2264,12 +2266,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-react-refresh": {
|
||||
"version": "0.4.12",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.12.tgz",
|
||||
"integrity": "sha512-9neVjoGv20FwYtCP6CB1dzR1vr57ZDNOXst21wd2xJ/cTlM2xLq0GWVlSNTdMn/4BtP6cHYBMCSp1wFBJ9jBsg==",
|
||||
"version": "0.4.16",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.16.tgz",
|
||||
"integrity": "sha512-slterMlxAhov/DZO8NScf6mEeMBBXodFUolijDvrtTxyezyLoTQaa73FyYus/VbTdftd8wBgBxPMRk3poleXNQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"eslint": ">=7"
|
||||
"eslint": ">=8.40"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-scope": {
|
||||
|
@ -2624,10 +2627,11 @@
|
|||
}
|
||||
},
|
||||
"node_modules/globals": {
|
||||
"version": "15.9.0",
|
||||
"resolved": "https://registry.npmjs.org/globals/-/globals-15.9.0.tgz",
|
||||
"integrity": "sha512-SmSKyLLKFbSr6rptvP8izbyxJL4ILwqO9Jg23UA0sDlGlu58V59D1//I3vlc0KJphVdUR7vMjHIplYnzBxorQA==",
|
||||
"version": "15.14.0",
|
||||
"resolved": "https://registry.npmjs.org/globals/-/globals-15.14.0.tgz",
|
||||
"integrity": "sha512-OkToC372DtlQeje9/zHIo5CT8lRP/FUgEOKBEhU4e0abL7J7CD24fD9ohiLN5hagG/kWCYj4K5oaxxtj2Z0Dig==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
|
@ -2885,12 +2889,16 @@
|
|||
}
|
||||
},
|
||||
"node_modules/lilconfig": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz",
|
||||
"integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==",
|
||||
"version": "3.1.3",
|
||||
"resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
|
||||
"integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
"node": ">=14"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/antonk52"
|
||||
}
|
||||
},
|
||||
"node_modules/lines-and-columns": {
|
||||
|
@ -3210,10 +3218,11 @@
|
|||
}
|
||||
},
|
||||
"node_modules/picocolors": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.0.tgz",
|
||||
"integrity": "sha512-TQ92mBOW0l3LeMeyLV6mzy/kWr8lkd/hp3mTg7wYK7zJhuBStmGMBG0BdeDZS/dZx1IukaX6Bk11zcln25o1Aw==",
|
||||
"dev": true
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
|
||||
"integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/picomatch": {
|
||||
"version": "2.3.1",
|
||||
|
@ -3246,9 +3255,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/postcss": {
|
||||
"version": "8.4.47",
|
||||
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.47.tgz",
|
||||
"integrity": "sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==",
|
||||
"version": "8.4.49",
|
||||
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.49.tgz",
|
||||
"integrity": "sha512-OCVPnIObs4N29kxTjzLfUryOkvZEq+pf8jTF0lg8E7uETuWHA+v7j3c/xJmiqpX450191LlmZfUKkXxkTry7nA==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
{
|
||||
|
@ -3264,9 +3273,10 @@
|
|||
"url": "https://github.com/sponsors/ai"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"nanoid": "^3.3.7",
|
||||
"picocolors": "^1.1.0",
|
||||
"picocolors": "^1.1.1",
|
||||
"source-map-js": "^1.2.1"
|
||||
},
|
||||
"engines": {
|
||||
|
@ -3344,18 +3354,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"node_modules/postcss-load-config/node_modules/lilconfig": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.2.tgz",
|
||||
"integrity": "sha512-eop+wDAvpItUys0FWkHIKeC9ybYrTGbU41U5K7+bttZZeohvnY7M9dZ5kB21GNWiFT2q1OoPTvncPCgSOVO5ow==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/antonk52"
|
||||
}
|
||||
},
|
||||
"node_modules/postcss-nested": {
|
||||
"version": "6.2.0",
|
||||
"resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz",
|
||||
|
@ -3938,33 +3936,34 @@
|
|||
"integrity": "sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew=="
|
||||
},
|
||||
"node_modules/tailwindcss": {
|
||||
"version": "3.4.12",
|
||||
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.12.tgz",
|
||||
"integrity": "sha512-Htf/gHj2+soPb9UayUNci/Ja3d8pTmu9ONTfh4QY8r3MATTZOzmv6UYWF7ZwikEIC8okpfqmGqrmDehua8mF8w==",
|
||||
"version": "3.4.17",
|
||||
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.17.tgz",
|
||||
"integrity": "sha512-w33E2aCvSDP0tW9RZuNXadXlkHXqFzSkQew/aIa2i/Sj8fThxwovwlXHSPXTbAHwEIhBFXAedUhP2tueAKP8Og==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@alloc/quick-lru": "^5.2.0",
|
||||
"arg": "^5.0.2",
|
||||
"chokidar": "^3.5.3",
|
||||
"chokidar": "^3.6.0",
|
||||
"didyoumean": "^1.2.2",
|
||||
"dlv": "^1.1.3",
|
||||
"fast-glob": "^3.3.0",
|
||||
"fast-glob": "^3.3.2",
|
||||
"glob-parent": "^6.0.2",
|
||||
"is-glob": "^4.0.3",
|
||||
"jiti": "^1.21.0",
|
||||
"lilconfig": "^2.1.0",
|
||||
"micromatch": "^4.0.5",
|
||||
"jiti": "^1.21.6",
|
||||
"lilconfig": "^3.1.3",
|
||||
"micromatch": "^4.0.8",
|
||||
"normalize-path": "^3.0.0",
|
||||
"object-hash": "^3.0.0",
|
||||
"picocolors": "^1.0.0",
|
||||
"postcss": "^8.4.23",
|
||||
"picocolors": "^1.1.1",
|
||||
"postcss": "^8.4.47",
|
||||
"postcss-import": "^15.1.0",
|
||||
"postcss-js": "^4.0.1",
|
||||
"postcss-load-config": "^4.0.1",
|
||||
"postcss-nested": "^6.0.1",
|
||||
"postcss-selector-parser": "^6.0.11",
|
||||
"resolve": "^1.22.2",
|
||||
"sucrase": "^3.32.0"
|
||||
"postcss-load-config": "^4.0.2",
|
||||
"postcss-nested": "^6.2.0",
|
||||
"postcss-selector-parser": "^6.1.2",
|
||||
"resolve": "^1.22.8",
|
||||
"sucrase": "^3.35.0"
|
||||
},
|
||||
"bin": {
|
||||
"tailwind": "lib/cli.js",
|
||||
|
@ -4062,10 +4061,11 @@
|
|||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.6.2",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz",
|
||||
"integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==",
|
||||
"version": "5.7.2",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz",
|
||||
"integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
|
|
Loading…
Reference in a new issue