mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-13 13:18:02 -05:00
Compare commits
67 commits
Author | SHA1 | Date | |
---|---|---|---|
|
53bfc7d252 | ||
|
14567480a8 | ||
|
72a23ac51e | ||
|
60c141e815 | ||
|
3788abc9cb | ||
|
d477e26048 | ||
|
184c1a8600 | ||
|
cbd9c5438c | ||
|
360663b05b | ||
|
7c7449e197 | ||
|
a5a5afe276 | ||
|
3bb1415b25 | ||
|
df396dac78 | ||
|
c514914b4e | ||
|
5ac6bb7532 | ||
|
6c03fca89c | ||
|
0b5b367b0a | ||
|
9b0204180f | ||
|
5c6f50a1a3 | ||
|
74e4419b7c | ||
|
fc1d69a93b | ||
|
1d6cea481f | ||
|
c95487e834 | ||
|
3bcee9c1c5 | ||
|
ad74e28473 | ||
|
b5fb1cb040 | ||
|
992b3b824b | ||
|
474fde659c | ||
|
80365f95a0 | ||
|
bfe1e00392 | ||
|
c1b4f8344f | ||
|
b6b8948fe3 | ||
|
b7fab1af8a | ||
|
1647219fa8 | ||
|
40d699e777 | ||
|
d5bd23828d | ||
|
7995e98e96 | ||
|
57bd178670 | ||
|
e547596518 | ||
|
5d316a2187 | ||
|
16e478687e | ||
|
d99f92f65f | ||
|
29c2bb6d04 | ||
|
8936e5ea89 | ||
|
d61337fed3 | ||
|
e964fdb147 | ||
|
cf7c93c314 | ||
|
18ba388af1 | ||
|
fef7da9cdc | ||
|
c8c0dd75ff | ||
|
b1a7b9992d | ||
|
0fcc4fcf30 | ||
|
0bc7a25e44 | ||
|
24b0b30b14 | ||
|
7c7c593faa | ||
|
4e71f5f841 | ||
|
abeab949a1 | ||
|
712ed53c1c | ||
|
87b0b4fc07 | ||
|
caa27c4d53 | ||
|
47b0f07b15 | ||
|
46922d8899 | ||
|
93773725f4 | ||
|
5b98ca1e3c | ||
|
b3c94f6196 | ||
|
0b46cbf3b5 | ||
|
56b3497253 |
30 changed files with 2483 additions and 629 deletions
1
.github/workflows/docker-build-and-push.yml
vendored
1
.github/workflows/docker-build-and-push.yml
vendored
|
@ -241,6 +241,7 @@ jobs:
|
||||||
run: npm run test:e2e
|
run: npm run test:e2e
|
||||||
env:
|
env:
|
||||||
CI: true
|
CI: true
|
||||||
|
DEBUG: testcontainers:containers
|
||||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
PAPERLESS_GPT_IMAGE: ${{ env.PAPERLESS_GPT_IMAGE }}
|
PAPERLESS_GPT_IMAGE: ${{ env.PAPERLESS_GPT_IMAGE }}
|
||||||
- name: Upload Playwright Report
|
- name: Upload Playwright Report
|
||||||
|
|
|
@ -91,9 +91,20 @@ We welcome pull requests (PRs). Please follow these guidelines:
|
||||||
4. **Run the backend server**:
|
4. **Run the backend server**:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
go run main.go
|
mkdir dist
|
||||||
|
touch dist/index.html
|
||||||
|
go build
|
||||||
|
./paperless-gpt
|
||||||
```
|
```
|
||||||
|
|
||||||
|
5. **Run the backend server with frontend built in**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd web-app && npm install && npm run build && cp -r dist ..
|
||||||
|
go build
|
||||||
|
./paperless-gpt
|
||||||
|
```
|
||||||
|
|
||||||
### Frontend Setup
|
### Frontend Setup
|
||||||
|
|
||||||
1. **Navigate to the frontend directory**:
|
1. **Navigate to the frontend directory**:
|
||||||
|
|
17
Dockerfile
17
Dockerfile
|
@ -25,22 +25,22 @@ COPY web-app /app/
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Stage 2: Build the Go binary
|
# Stage 2: Build the Go binary
|
||||||
FROM golang:1.23.5-alpine3.21 AS builder
|
FROM golang:1.24.1-alpine3.21 AS builder
|
||||||
|
|
||||||
# Set the working directory inside the container
|
# Set the working directory inside the container
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Package versions for Renovate
|
# Package versions for Renovate
|
||||||
# renovate: datasource=repology depName=alpine_3_21/gcc versioning=loose
|
# renovate: datasource=repology depName=alpine_3_21/gcc versioning=loose
|
||||||
ENV GCC_VERSION=14.2.0-r4
|
ENV GCC_VERSION="14.2.0-r4"
|
||||||
# renovate: datasource=repology depName=alpine_3_21/musl-dev versioning=loose
|
# renovate: datasource=repology depName=alpine_3_21/musl-dev versioning=loose
|
||||||
ENV MUSL_DEV_VERSION=1.2.5-r8
|
ENV MUSL_DEV_VERSION="1.2.5-r9"
|
||||||
# renovate: datasource=repology depName=alpine_3_21/mupdf versioning=loose
|
# renovate: datasource=repology depName=alpine_3_21/mupdf versioning=loose
|
||||||
ENV MUPDF_VERSION=1.24.10-r0
|
ENV MUPDF_VERSION="1.24.10-r0"
|
||||||
# renovate: datasource=repology depName=alpine_3_21/mupdf-dev versioning=loose
|
# renovate: datasource=repology depName=alpine_3_21/mupdf-dev versioning=loose
|
||||||
ENV MUPDF_DEV_VERSION=1.24.10-r0
|
ENV MUPDF_DEV_VERSION="1.24.10-r0"
|
||||||
# renovate: datasource=repology depName=alpine_3_21/sed versioning=loose
|
# renovate: datasource=repology depName=alpine_3_21/sed versioning=loose
|
||||||
ENV SED_VERSION=4.9-r2
|
ENV SED_VERSION="4.9-r2"
|
||||||
|
|
||||||
# Install necessary packages with pinned versions
|
# Install necessary packages with pinned versions
|
||||||
RUN apk add --no-cache \
|
RUN apk add --no-cache \
|
||||||
|
@ -60,10 +60,11 @@ RUN go mod download
|
||||||
RUN CGO_ENABLED=1 go build -tags musl -o /dev/null github.com/mattn/go-sqlite3
|
RUN CGO_ENABLED=1 go build -tags musl -o /dev/null github.com/mattn/go-sqlite3
|
||||||
|
|
||||||
# Copy the frontend build
|
# Copy the frontend build
|
||||||
COPY --from=frontend /app/dist /app/dist
|
COPY --from=frontend /app/dist /app/web-app/dist
|
||||||
|
|
||||||
# Copy the Go source files
|
# Copy the Go source files
|
||||||
COPY *.go .
|
COPY *.go .
|
||||||
|
COPY ocr ./ocr
|
||||||
|
|
||||||
# Import ARGs from top level
|
# Import ARGs from top level
|
||||||
ARG VERSION
|
ARG VERSION
|
||||||
|
@ -81,7 +82,7 @@ RUN sed -i \
|
||||||
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
|
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
|
||||||
|
|
||||||
# Stage 3: Create a lightweight image with just the binary
|
# Stage 3: Create a lightweight image with just the binary
|
||||||
FROM alpine:latest
|
FROM alpine:3.21.3
|
||||||
|
|
||||||
ENV GIN_MODE=release
|
ENV GIN_MODE=release
|
||||||
|
|
||||||
|
|
270
README.md
270
README.md
|
@ -1,4 +1,5 @@
|
||||||
# paperless-gpt
|
# paperless-gpt
|
||||||
|
|
||||||
[](LICENSE)
|
[](LICENSE)
|
||||||
[](https://discord.gg/fJQppDH2J7)
|
[](https://discord.gg/fJQppDH2J7)
|
||||||
[](https://hub.docker.com/r/icereed/paperless-gpt)
|
[](https://hub.docker.com/r/icereed/paperless-gpt)
|
||||||
|
@ -6,7 +7,7 @@
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
**paperless-gpt** seamlessly pairs with [paperless-ngx][paperless-ngx] to generate **AI-powered document titles** and **tags**, saving you hours of manual sorting. While other tools may offer AI chat features, **paperless-gpt** stands out by **supercharging OCR with LLMs**—ensuring high accuracy, even with tricky scans. If you’re craving next-level text extraction and effortless document organization, this is your solution.
|
**paperless-gpt** seamlessly pairs with [paperless-ngx][paperless-ngx] to generate **AI-powered document titles** and **tags**, saving you hours of manual sorting. While other tools may offer AI chat features, **paperless-gpt** stands out by **supercharging OCR with LLMs**-ensuring high accuracy, even with tricky scans. If you’re craving next-level text extraction and effortless document organization, this is your solution.
|
||||||
|
|
||||||
https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
|
|
||||||
|
@ -17,35 +18,50 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
1. **LLM-Enhanced OCR**
|
1. **LLM-Enhanced OCR**
|
||||||
Harness Large Language Models (OpenAI or Ollama) for **better-than-traditional** OCR—turn messy or low-quality scans into context-aware, high-fidelity text.
|
Harness Large Language Models (OpenAI or Ollama) for **better-than-traditional** OCR—turn messy or low-quality scans into context-aware, high-fidelity text.
|
||||||
|
|
||||||
2. **Automatic Title & Tag Generation**
|
2. **Use specialized AI OCR services**
|
||||||
|
|
||||||
|
- **LLM OCR**: Use OpenAI or Ollama to extract text from images.
|
||||||
|
- **Google Document AI**: Leverage Google's powerful Document AI for OCR tasks.
|
||||||
|
- **Azure Document Intelligence**: Use Microsoft's enterprise OCR solution.
|
||||||
|
|
||||||
|
3. **Automatic Title & Tag Generation**
|
||||||
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
|
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
|
||||||
|
|
||||||
3. **Automatic Correspondent Generation**
|
4. **Supports DeepSeek reasoning models in Ollama**
|
||||||
|
Greatly enhance accuracy by using a reasoning model like `deepseek-r1:8b`. The perfect tradeoff between privacy and performance! Of course, if you got enough GPUs or NPUs, a bigger model will enhance the experience.
|
||||||
|
|
||||||
|
5. **Automatic Correspondent Generation**
|
||||||
Automatically identify and generate correspondents from your documents, making it easier to track and organize your communications.
|
Automatically identify and generate correspondents from your documents, making it easier to track and organize your communications.
|
||||||
|
|
||||||
4. **Extensive Customization**
|
6. **Extensive Customization**
|
||||||
|
|
||||||
- **Prompt Templates**: Tweak your AI prompts to reflect your domain, style, or preference.
|
- **Prompt Templates**: Tweak your AI prompts to reflect your domain, style, or preference.
|
||||||
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
|
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
|
||||||
|
|
||||||
5. **Simple Docker Deployment**
|
7. **Simple Docker Deployment**
|
||||||
A few environment variables, and you’re off! Compose it alongside paperless-ngx with minimal fuss.
|
A few environment variables, and you're off! Compose it alongside paperless-ngx with minimal fuss.
|
||||||
|
|
||||||
6. **Unified Web UI**
|
8. **Unified Web UI**
|
||||||
- **Manual Review**: Approve or tweak AI’s suggestions.
|
|
||||||
|
- **Manual Review**: Approve or tweak AI's suggestions.
|
||||||
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
|
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
|
||||||
|
|
||||||
7. **Opt-In LLM-based OCR**
|
|
||||||
If you opt in, your images get read by a Vision LLM, pushing boundaries beyond standard OCR tools.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Table of Contents
|
## Table of Contents
|
||||||
|
|
||||||
- [Key Highlights](#key-highlights)
|
- [Key Highlights](#key-highlights)
|
||||||
- [Getting Started](#getting-started)
|
- [Getting Started](#getting-started)
|
||||||
- [Prerequisites](#prerequisites)
|
- [Prerequisites](#prerequisites)
|
||||||
- [Installation](#installation)
|
- [Installation](#installation)
|
||||||
- [Docker Compose](#docker-compose)
|
- [Docker Compose](#docker-compose)
|
||||||
- [Manual Setup](#manual-setup)
|
- [Manual Setup](#manual-setup)
|
||||||
|
- [OCR Providers](#ocr-providers)
|
||||||
|
- [LLM-based OCR](#1-llm-based-ocr-default)
|
||||||
|
- [Azure Document Intelligence](#2-azure-document-intelligence)
|
||||||
|
- [Google Document AI](#3-google-document-ai)
|
||||||
|
- [Comparing OCR Providers](#comparing-ocr-providers)
|
||||||
|
- [Choosing the Right Provider](#choosing-the-right-provider)
|
||||||
- [Configuration](#configuration)
|
- [Configuration](#configuration)
|
||||||
- [Environment Variables](#environment-variables)
|
- [Environment Variables](#environment-variables)
|
||||||
- [Custom Prompt Templates](#custom-prompt-templates)
|
- [Custom Prompt Templates](#custom-prompt-templates)
|
||||||
|
@ -65,20 +81,20 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
- [Docker][docker-install] installed.
|
- [Docker][docker-install] installed.
|
||||||
- A running instance of [paperless-ngx][paperless-ngx].
|
- A running instance of [paperless-ngx][paperless-ngx].
|
||||||
- Access to an LLM provider:
|
- Access to an LLM provider:
|
||||||
- **OpenAI**: An API key with models like `gpt-4o` or `gpt-3.5-turbo`.
|
- **OpenAI**: An API key with models like `gpt-4o` or `gpt-3.5-turbo`.
|
||||||
- **Ollama**: A running Ollama server with models like `llama2`.
|
- **Ollama**: A running Ollama server with models like `deepseek-r1:8b`.
|
||||||
|
|
||||||
### Installation
|
### Installation
|
||||||
|
|
||||||
#### Docker Compose
|
#### Docker Compose
|
||||||
|
|
||||||
Here’s an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
|
Here's an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
version: "3.7"
|
|
||||||
services:
|
services:
|
||||||
paperless-ngx:
|
paperless-ngx:
|
||||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||||
|
@ -87,24 +103,47 @@ services:
|
||||||
paperless-gpt:
|
paperless-gpt:
|
||||||
image: icereed/paperless-gpt:latest
|
image: icereed/paperless-gpt:latest
|
||||||
environment:
|
environment:
|
||||||
PAPERLESS_BASE_URL: 'http://paperless-ngx:8000'
|
PAPERLESS_BASE_URL: "http://paperless-ngx:8000"
|
||||||
PAPERLESS_API_TOKEN: 'your_paperless_api_token'
|
PAPERLESS_API_TOKEN: "your_paperless_api_token"
|
||||||
PAPERLESS_PUBLIC_URL: 'http://paperless.mydomain.com' # Optional
|
PAPERLESS_PUBLIC_URL: "http://paperless.mydomain.com" # Optional
|
||||||
MANUAL_TAG: 'paperless-gpt' # Optional, default: paperless-gpt
|
MANUAL_TAG: "paperless-gpt" # Optional, default: paperless-gpt
|
||||||
AUTO_TAG: 'paperless-gpt-auto' # Optional, default: paperless-gpt-auto
|
AUTO_TAG: "paperless-gpt-auto" # Optional, default: paperless-gpt-auto
|
||||||
LLM_PROVIDER: 'openai' # or 'ollama'
|
LLM_PROVIDER: "openai" # or 'ollama'
|
||||||
LLM_MODEL: 'gpt-4o' # or 'llama2'
|
LLM_MODEL: "gpt-4o" # or 'deepseek-r1:8b'
|
||||||
OPENAI_API_KEY: 'your_openai_api_key'
|
# Optional, but recommended for Ollama
|
||||||
|
TOKEN_LIMIT: 1000
|
||||||
|
OPENAI_API_KEY: "your_openai_api_key"
|
||||||
# Optional - OPENAI_BASE_URL: 'https://litellm.yourinstallationof.it.com/v1'
|
# Optional - OPENAI_BASE_URL: 'https://litellm.yourinstallationof.it.com/v1'
|
||||||
LLM_LANGUAGE: 'English' # Optional, default: English
|
LLM_LANGUAGE: "English" # Optional, default: English
|
||||||
OLLAMA_HOST: 'http://host.docker.internal:11434' # If using Ollama
|
|
||||||
VISION_LLM_PROVIDER: 'ollama' # (for OCR) - openai or ollama
|
# OCR Configuration - Choose one:
|
||||||
VISION_LLM_MODEL: 'minicpm-v' # (for OCR) - minicpm-v (ollama example), gpt-4o (for openai), etc.
|
# Option 1: LLM-based OCR
|
||||||
AUTO_OCR_TAG: 'paperless-gpt-ocr-auto' # Optional, default: paperless-gpt-ocr-auto
|
OCR_PROVIDER: "llm" # Default OCR provider
|
||||||
OCR_LIMIT_PAGES: '5' # Optional, default: 5. Set to 0 for no limit.
|
VISION_LLM_PROVIDER: "ollama" # openai or ollama
|
||||||
LOG_LEVEL: 'info' # Optional: debug, warn, error
|
VISION_LLM_MODEL: "minicpm-v" # minicpm-v (ollama) or gpt-4v (openai)
|
||||||
|
OLLAMA_HOST: "http://host.docker.internal:11434" # If using Ollama
|
||||||
|
|
||||||
|
# Option 2: Google Document AI
|
||||||
|
# OCR_PROVIDER: 'google_docai' # Use Google Document AI
|
||||||
|
# GOOGLE_PROJECT_ID: 'your-project' # Your GCP project ID
|
||||||
|
# GOOGLE_LOCATION: 'us' # Document AI region
|
||||||
|
# GOOGLE_PROCESSOR_ID: 'processor-id' # Your processor ID
|
||||||
|
# GOOGLE_APPLICATION_CREDENTIALS: '/app/credentials.json' # Path to service account key
|
||||||
|
|
||||||
|
# Option 3: Azure Document Intelligence
|
||||||
|
# OCR_PROVIDER: 'azure' # Use Azure Document Intelligence
|
||||||
|
# AZURE_DOCAI_ENDPOINT: 'your-endpoint' # Your Azure endpoint URL
|
||||||
|
# AZURE_DOCAI_KEY: 'your-key' # Your Azure API key
|
||||||
|
# AZURE_DOCAI_MODEL_ID: 'prebuilt-read' # Optional, defaults to prebuilt-read
|
||||||
|
# AZURE_DOCAI_TIMEOUT_SECONDS: '120' # Optional, defaults to 120 seconds
|
||||||
|
|
||||||
|
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto
|
||||||
|
OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit.
|
||||||
|
LOG_LEVEL: "info" # Optional: debug, warn, error
|
||||||
volumes:
|
volumes:
|
||||||
- ./prompts:/app/prompts # Mount the prompts directory
|
- ./prompts:/app/prompts # Mount the prompts directory
|
||||||
|
# For Google Document AI:
|
||||||
|
- ${HOME}/.config/gcloud/application_default_credentials.json:/app/credentials.json
|
||||||
ports:
|
ports:
|
||||||
- "8080:8080"
|
- "8080:8080"
|
||||||
depends_on:
|
depends_on:
|
||||||
|
@ -114,6 +153,7 @@ services:
|
||||||
**Pro Tip**: Replace placeholders with real values and read the logs if something looks off.
|
**Pro Tip**: Replace placeholders with real values and read the logs if something looks off.
|
||||||
|
|
||||||
#### Manual Setup
|
#### Manual Setup
|
||||||
|
|
||||||
1. **Clone the Repository**
|
1. **Clone the Repository**
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/icereed/paperless-gpt.git
|
git clone https://github.com/icereed/paperless-gpt.git
|
||||||
|
@ -145,41 +185,107 @@ services:
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
## OCR Providers
|
||||||
|
|
||||||
|
paperless-gpt supports three different OCR providers, each with unique strengths and capabilities:
|
||||||
|
|
||||||
|
### 1. LLM-based OCR (Default)
|
||||||
|
- **Key Features**:
|
||||||
|
- Uses vision-capable LLMs like GPT-4V or MiniCPM-V
|
||||||
|
- High accuracy with complex layouts and difficult scans
|
||||||
|
- Context-aware text recognition
|
||||||
|
- Self-correcting capabilities for OCR errors
|
||||||
|
- **Best For**:
|
||||||
|
- Complex or unusual document layouts
|
||||||
|
- Poor quality scans
|
||||||
|
- Documents with mixed languages
|
||||||
|
- **Configuration**:
|
||||||
|
```yaml
|
||||||
|
OCR_PROVIDER: "llm"
|
||||||
|
VISION_LLM_PROVIDER: "openai" # or "ollama"
|
||||||
|
VISION_LLM_MODEL: "gpt-4v" # or "minicpm-v"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Azure Document Intelligence
|
||||||
|
- **Key Features**:
|
||||||
|
- Enterprise-grade OCR solution
|
||||||
|
- Prebuilt models for common document types
|
||||||
|
- Layout preservation and table detection
|
||||||
|
- Fast processing speeds
|
||||||
|
- **Best For**:
|
||||||
|
- Business documents and forms
|
||||||
|
- High-volume processing
|
||||||
|
- Documents requiring layout analysis
|
||||||
|
- **Configuration**:
|
||||||
|
```yaml
|
||||||
|
OCR_PROVIDER: "azure"
|
||||||
|
AZURE_DOCAI_ENDPOINT: "https://your-endpoint.cognitiveservices.azure.com/"
|
||||||
|
AZURE_DOCAI_KEY: "your-key"
|
||||||
|
AZURE_DOCAI_MODEL_ID: "prebuilt-read" # optional
|
||||||
|
AZURE_DOCAI_TIMEOUT_SECONDS: "120" # optional
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Google Document AI
|
||||||
|
- **Key Features**:
|
||||||
|
- Specialized document processors
|
||||||
|
- Strong form field detection
|
||||||
|
- Multi-language support
|
||||||
|
- High accuracy on structured documents
|
||||||
|
- **Best For**:
|
||||||
|
- Forms and structured documents
|
||||||
|
- Documents with tables
|
||||||
|
- Multi-language documents
|
||||||
|
- **Configuration**:
|
||||||
|
```yaml
|
||||||
|
OCR_PROVIDER: "google_docai"
|
||||||
|
GOOGLE_PROJECT_ID: "your-project"
|
||||||
|
GOOGLE_LOCATION: "us"
|
||||||
|
GOOGLE_PROCESSOR_ID: "processor-id"
|
||||||
|
```
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
||||||
**Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
|
# **Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
|
||||||
=======
|
|
||||||
| Variable | Description | Required |
|
| Variable | Description | Required | Default |
|
||||||
|------------------------|------------------------------------------------------------------------------------------------------------------|----------|
|
| -------------------------------- | ---------------------------------------------------------------------------------------------------------------- | -------- | ---------------------- |
|
||||||
| `PAPERLESS_BASE_URL` | URL of your paperless-ngx instance (e.g. `http://paperless-ngx:8000`). | Yes |
|
| `PAPERLESS_BASE_URL` | URL of your paperless-ngx instance (e.g. `http://paperless-ngx:8000`). | Yes | |
|
||||||
| `PAPERLESS_API_TOKEN` | API token for paperless-ngx. Generate one in paperless-ngx admin. | Yes |
|
| `PAPERLESS_API_TOKEN` | API token for paperless-ngx. Generate one in paperless-ngx admin. | Yes | |
|
||||||
| `PAPERLESS_PUBLIC_URL` | Public URL for Paperless (if different from `PAPERLESS_BASE_URL`). | No |
|
| `PAPERLESS_PUBLIC_URL` | Public URL for Paperless (if different from `PAPERLESS_BASE_URL`). | No | |
|
||||||
| `MANUAL_TAG` | Tag for manual processing. Default: `paperless-gpt`. | No |
|
| `MANUAL_TAG` | Tag for manual processing. | No | paperless-gpt |
|
||||||
| `AUTO_TAG` | Tag for auto processing. Default: `paperless-gpt-auto`. | No |
|
| `AUTO_TAG` | Tag for auto processing. | No | paperless-gpt-auto |
|
||||||
| `LLM_PROVIDER` | AI backend (`openai` or `ollama`). | Yes |
|
| `LLM_PROVIDER` | AI backend (`openai` or `ollama`). | Yes | |
|
||||||
| `LLM_MODEL` | AI model name, e.g. `gpt-4o`, `gpt-3.5-turbo`, `llama2`. | Yes |
|
| `LLM_MODEL` | AI model name, e.g. `gpt-4o`, `gpt-3.5-turbo`, `deepseek-r1:8b`. | Yes | |
|
||||||
| `OPENAI_API_KEY` | OpenAI API key (required if using OpenAI). | Cond. |
|
| `OPENAI_API_KEY` | OpenAI API key (required if using OpenAI). | Cond. | |
|
||||||
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No |
|
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No | |
|
||||||
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). Default: `English`. | No |
|
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). | No | English |
|
||||||
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No |
|
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No | |
|
||||||
| `VISION_LLM_PROVIDER` | AI backend for OCR (`openai` or `ollama`). | No |
|
| `OCR_PROVIDER` | OCR provider to use (`llm`, `azure`, or `google_docai`). | No | llm |
|
||||||
| `VISION_LLM_MODEL` | Model name for OCR (e.g. `minicpm-v`). | No |
|
| `VISION_LLM_PROVIDER` | AI backend for LLM OCR (`openai` or `ollama`). Required if OCR_PROVIDER is `llm`. | Cond. | |
|
||||||
| `AUTO_OCR_TAG` | Tag for automatically processing docs with OCR. Default: `paperless-gpt-ocr-auto`. | No |
|
| `VISION_LLM_MODEL` | Model name for LLM OCR (e.g. `minicpm-v`). Required if OCR_PROVIDER is `llm`. | Cond. | |
|
||||||
| `LOG_LEVEL` | Application log level (`info`, `debug`, `warn`, `error`). Default: `info`. | No |
|
| `AZURE_DOCAI_ENDPOINT` | Azure Document Intelligence endpoint. Required if OCR_PROVIDER is `azure`. | Cond. | |
|
||||||
| `LISTEN_INTERFACE` | Network interface to listen on. Default: `:8080`. | No |
|
| `AZURE_DOCAI_KEY` | Azure Document Intelligence API key. Required if OCR_PROVIDER is `azure`. | Cond. | |
|
||||||
| `AUTO_GENERATE_TITLE` | Generate titles automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
| `AZURE_DOCAI_MODEL_ID` | Azure Document Intelligence model ID. Optional if using `azure` provider. | No | prebuilt-read |
|
||||||
| `AUTO_GENERATE_TAGS` | Generate tags automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
| `AZURE_DOCAI_TIMEOUT_SECONDS` | Azure Document Intelligence timeout in seconds. | No | 120 |
|
||||||
| `AUTO_GENERATE_CORRESPONDENTS` | Generate correspondents automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
| `GOOGLE_PROJECT_ID` | Google Cloud project ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
||||||
| `OCR_LIMIT_PAGES` | Limit the number of pages for OCR. Set to `0` for no limit. Default: `5`. | No |
|
| `GOOGLE_LOCATION` | Google Cloud region (e.g. `us`, `eu`). Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
||||||
| `TOKEN_LIMIT` | Maximum tokens allowed for prompts/content. Set to `0` to disable limit. Useful for smaller LLMs. | No |
|
| `GOOGLE_PROCESSOR_ID` | Document AI processor ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
||||||
| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`.
|
| `GOOGLE_APPLICATION_CREDENTIALS` | Path to the mounted Google service account key. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
||||||
|
| `AUTO_OCR_TAG` | Tag for automatically processing docs with OCR. | No | paperless-gpt-ocr-auto |
|
||||||
|
| `LOG_LEVEL` | Application log level (`info`, `debug`, `warn`, `error`). | No | info |
|
||||||
|
| `LISTEN_INTERFACE` | Network interface to listen on. | No | 8080 |
|
||||||
|
| `AUTO_GENERATE_TITLE` | Generate titles automatically if `paperless-gpt-auto` is used. | No | true |
|
||||||
|
| `AUTO_GENERATE_TAGS` | Generate tags automatically if `paperless-gpt-auto` is used. | No | true |
|
||||||
|
| `AUTO_GENERATE_CORRESPONDENTS` | Generate correspondents automatically if `paperless-gpt-auto` is used. | No | true |
|
||||||
|
| `OCR_LIMIT_PAGES` | Limit the number of pages for OCR. Set to `0` for no limit. | No | 5 |
|
||||||
|
| `TOKEN_LIMIT` | Maximum tokens allowed for prompts/content. Set to `0` to disable limit. Useful for smaller LLMs. | No | |
|
||||||
|
| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`. | No | |
|
||||||
|
|
||||||
### Custom Prompt Templates
|
### Custom Prompt Templates
|
||||||
|
|
||||||
paperless-gpt’s flexible **prompt templates** let you shape how AI responds:
|
paperless-gpt's flexible **prompt templates** let you shape how AI responds:
|
||||||
|
|
||||||
1. **`title_prompt.tmpl`**: For document titles.
|
1. **`title_prompt.tmpl`**: For document titles.
|
||||||
2. **`tag_prompt.tmpl`**: For tagging logic.
|
2. **`tag_prompt.tmpl`**: For tagging logic.
|
||||||
|
@ -189,8 +295,8 @@ paperless-gpt’s flexible **prompt templates** let you shape how AI responds:
|
||||||
Mount them into your container via:
|
Mount them into your container via:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
volumes:
|
volumes:
|
||||||
- ./prompts:/app/prompts
|
- ./prompts:/app/prompts
|
||||||
```
|
```
|
||||||
|
|
||||||
Then tweak at will—**paperless-gpt** reloads them automatically on startup!
|
Then tweak at will—**paperless-gpt** reloads them automatically on startup!
|
||||||
|
@ -228,21 +334,26 @@ The templates use Go's text/template syntax. paperless-gpt automatically reloads
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
1. **Tag Documents**
|
1. **Tag Documents**
|
||||||
- Add `paperless-gpt` or your custom tag to the docs you want to AI-ify.
|
|
||||||
|
- Add `paperless-gpt` tag to documents for manual processing
|
||||||
|
- Add `paperless-gpt-auto` for automatic processing
|
||||||
|
- Add `paperless-gpt-ocr-auto` for automatic OCR processing
|
||||||
|
|
||||||
2. **Visit Web UI**
|
2. **Visit Web UI**
|
||||||
- Go to `http://localhost:8080` (or your host) in your browser.
|
|
||||||
|
- Go to `http://localhost:8080` (or your host) in your browser
|
||||||
|
- Review documents tagged for processing
|
||||||
|
|
||||||
3. **Generate & Apply Suggestions**
|
3. **Generate & Apply Suggestions**
|
||||||
- Click “Generate Suggestions” to see AI-proposed titles/tags/correspondents.
|
|
||||||
- Approve, edit, or discard. Hit “Apply” to finalize in paperless-ngx.
|
|
||||||
|
|
||||||
4. **Try LLM-Based OCR (Experimental)**
|
- Click "Generate Suggestions" to see AI-proposed titles/tags/correspondents
|
||||||
- If you enabled `VISION_LLM_PROVIDER` and `VISION_LLM_MODEL`, let AI-based OCR read your scanned PDFs.
|
- Review and approve or edit suggestions
|
||||||
- Tag those documents with `paperless-gpt-ocr-auto` (or your custom `AUTO_OCR_TAG`).
|
- Click "Apply" to save changes to paperless-ngx
|
||||||
|
|
||||||
**Tip**: The entire pipeline can be **fully automated** if you prefer minimal manual intervention.
|
|
||||||
|
|
||||||
|
4. **OCR Processing**
|
||||||
|
- Tag documents with appropriate OCR tag to process them
|
||||||
|
- Monitor progress in the Web UI
|
||||||
|
- Review results and apply changes
|
||||||
---
|
---
|
||||||
|
|
||||||
## LLM-Based OCR: Compare for Yourself
|
## LLM-Based OCR: Compare for Yourself
|
||||||
|
@ -257,6 +368,7 @@ The templates use Go's text/template syntax. paperless-gpt automatically reloads
|
||||||

|

|
||||||
|
|
||||||
**Vanilla Paperless-ngx OCR**:
|
**Vanilla Paperless-ngx OCR**:
|
||||||
|
|
||||||
```
|
```
|
||||||
La Grande Recre
|
La Grande Recre
|
||||||
|
|
||||||
|
@ -274,6 +386,7 @@ HERET ET A BIENTOT
|
||||||
```
|
```
|
||||||
|
|
||||||
**LLM-Powered OCR (OpenAI gpt-4o)**:
|
**LLM-Powered OCR (OpenAI gpt-4o)**:
|
||||||
|
|
||||||
```
|
```
|
||||||
La Grande Récré
|
La Grande Récré
|
||||||
Centre Commercial l'Esplanade
|
Centre Commercial l'Esplanade
|
||||||
|
@ -298,6 +411,7 @@ MERCI ET A BIENTOT
|
||||||

|

|
||||||
|
|
||||||
**Vanilla Paperless-ngx OCR**:
|
**Vanilla Paperless-ngx OCR**:
|
||||||
|
|
||||||
```
|
```
|
||||||
Invoice Number: 1-996-84199
|
Invoice Number: 1-996-84199
|
||||||
|
|
||||||
|
@ -359,6 +473,7 @@ PALATINE IL 60094-4515
|
||||||
```
|
```
|
||||||
|
|
||||||
**LLM-Powered OCR (OpenAI gpt-4o)**:
|
**LLM-Powered OCR (OpenAI gpt-4o)**:
|
||||||
|
|
||||||
```
|
```
|
||||||
FedEx. Invoice Number: 1-996-84199
|
FedEx. Invoice Number: 1-996-84199
|
||||||
Invoice Date: Sep 01, 2014
|
Invoice Date: Sep 01, 2014
|
||||||
|
@ -429,16 +544,15 @@ P.O. Box 94515
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
**Why Does It Matter?**
|
**Why Does It Matter?**
|
||||||
|
|
||||||
- Traditional OCR often jumbles text from complex or low-quality scans.
|
- Traditional OCR often jumbles text from complex or low-quality scans.
|
||||||
- Large Language Models interpret context and correct likely errors, producing results that are more precise and readable.
|
- Large Language Models interpret context and correct likely errors, producing results that are more precise and readable.
|
||||||
- You can integrate these cleaned-up texts into your **paperless-ngx** pipeline for better tagging, searching, and archiving.
|
- You can integrate these cleaned-up texts into your **paperless-ngx** pipeline for better tagging, searching, and archiving.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### How It Works
|
### How It Works
|
||||||
|
|
||||||
- **Vanilla OCR** typically uses classical methods or Tesseract-like engines to extract text, which can result in garbled outputs for complex fonts or poor-quality scans.
|
- **Vanilla OCR** typically uses classical methods or Tesseract-like engines to extract text, which can result in garbled outputs for complex fonts or poor-quality scans.
|
||||||
|
@ -453,20 +567,23 @@ P.O. Box 94515
|
||||||
When using local LLMs (like those through Ollama), you might need to adjust certain settings to optimize performance:
|
When using local LLMs (like those through Ollama), you might need to adjust certain settings to optimize performance:
|
||||||
|
|
||||||
#### Token Management
|
#### Token Management
|
||||||
|
|
||||||
- Use `TOKEN_LIMIT` environment variable to control the maximum number of tokens sent to the LLM
|
- Use `TOKEN_LIMIT` environment variable to control the maximum number of tokens sent to the LLM
|
||||||
- Smaller models might truncate content unexpectedly if given too much text
|
- Smaller models might truncate content unexpectedly if given too much text
|
||||||
- Start with a conservative limit (e.g., 2000 tokens) and adjust based on your model's capabilities
|
- Start with a conservative limit (e.g., 1000 tokens) and adjust based on your model's capabilities
|
||||||
- Set to `0` to disable the limit (use with caution)
|
- Set to `0` to disable the limit (use with caution)
|
||||||
|
|
||||||
Example configuration for smaller models:
|
Example configuration for smaller models:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
environment:
|
environment:
|
||||||
TOKEN_LIMIT: '2000' # Adjust based on your model's context window
|
TOKEN_LIMIT: "2000" # Adjust based on your model's context window
|
||||||
LLM_PROVIDER: 'ollama'
|
LLM_PROVIDER: "ollama"
|
||||||
LLM_MODEL: 'llama2' # Or other local model
|
LLM_MODEL: "deepseek-r1:8b" # Or other local model
|
||||||
```
|
```
|
||||||
|
|
||||||
Common issues and solutions:
|
Common issues and solutions:
|
||||||
|
|
||||||
- If you see truncated or incomplete responses, try lowering the `TOKEN_LIMIT`
|
- If you see truncated or incomplete responses, try lowering the `TOKEN_LIMIT`
|
||||||
- If processing is too limited, gradually increase the limit while monitoring performance
|
- If processing is too limited, gradually increase the limit while monitoring performance
|
||||||
- For models with larger context windows, you can increase the limit or disable it entirely
|
- For models with larger context windows, you can increase the limit or disable it entirely
|
||||||
|
@ -474,6 +591,7 @@ Common issues and solutions:
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
**Pull requests** and **issues** are welcome!
|
**Pull requests** and **issues** are welcome!
|
||||||
|
|
||||||
1. Fork the repo
|
1. Fork the repo
|
||||||
2. Create a branch (`feature/my-awesome-update`)
|
2. Create a branch (`feature/my-awesome-update`)
|
||||||
3. Commit changes (`git commit -m "Improve X"`)
|
3. Commit changes (`git commit -m "Improve X"`)
|
||||||
|
@ -490,11 +608,13 @@ paperless-gpt is licensed under the [MIT License](LICENSE). Feel free to adapt a
|
||||||
---
|
---
|
||||||
|
|
||||||
## Star History
|
## Star History
|
||||||
|
|
||||||
[](https://star-history.com/#icereed/paperless-gpt&Date)
|
[](https://star-history.com/#icereed/paperless-gpt&Date)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Disclaimer
|
## Disclaimer
|
||||||
|
|
||||||
This project is **not** officially affiliated with [paperless-ngx][paperless-ngx]. Use at your own risk.
|
This project is **not** officially affiliated with [paperless-ngx][paperless-ngx]. Use at your own risk.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
25
app_llm.go
25
app_llm.go
|
@ -67,7 +67,7 @@ func (app *App) getSuggestedCorrespondent(ctx context.Context, content string, s
|
||||||
return "", fmt.Errorf("error getting response from LLM: %v", err)
|
return "", fmt.Errorf("error getting response from LLM: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
response := strings.TrimSpace(completion.Choices[0].Content)
|
response := stripReasoning(strings.TrimSpace(completion.Choices[0].Content))
|
||||||
return response, nil
|
return response, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -137,7 +137,8 @@ func (app *App) getSuggestedTags(
|
||||||
return nil, fmt.Errorf("error getting response from LLM: %v", err)
|
return nil, fmt.Errorf("error getting response from LLM: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
response := strings.TrimSpace(completion.Choices[0].Content)
|
response := stripReasoning(completion.Choices[0].Content)
|
||||||
|
|
||||||
suggestedTags := strings.Split(response, ",")
|
suggestedTags := strings.Split(response, ",")
|
||||||
for i, tag := range suggestedTags {
|
for i, tag := range suggestedTags {
|
||||||
suggestedTags[i] = strings.TrimSpace(tag)
|
suggestedTags[i] = strings.TrimSpace(tag)
|
||||||
|
@ -273,8 +274,8 @@ func (app *App) getSuggestedTitle(ctx context.Context, content string, originalT
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error getting response from LLM: %v", err)
|
return "", fmt.Errorf("error getting response from LLM: %v", err)
|
||||||
}
|
}
|
||||||
|
result := stripReasoning(completion.Choices[0].Content)
|
||||||
return strings.TrimSpace(strings.Trim(completion.Choices[0].Content, "\"")), nil
|
return strings.TrimSpace(strings.Trim(result, "\"")), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// generateDocumentSuggestions generates suggestions for a set of documents
|
// generateDocumentSuggestions generates suggestions for a set of documents
|
||||||
|
@ -404,3 +405,19 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
|
||||||
|
|
||||||
return documentSuggestions, nil
|
return documentSuggestions, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// stripReasoning removes the reasoning from the content indicated by <think> and </think> tags.
|
||||||
|
func stripReasoning(content string) string {
|
||||||
|
// Remove reasoning from the content
|
||||||
|
reasoningStart := strings.Index(content, "<think>")
|
||||||
|
if reasoningStart != -1 {
|
||||||
|
reasoningEnd := strings.Index(content, "</think>")
|
||||||
|
if reasoningEnd != -1 {
|
||||||
|
content = content[:reasoningStart] + content[reasoningEnd+len("</think>"):]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim whitespace
|
||||||
|
content = strings.TrimSpace(content)
|
||||||
|
return content
|
||||||
|
}
|
||||||
|
|
|
@ -266,3 +266,28 @@ func TestTokenLimitInTitleGeneration(t *testing.T) {
|
||||||
// Final prompt should be within token limit
|
// Final prompt should be within token limit
|
||||||
assert.LessOrEqual(t, len(tokens), 50, "Final prompt should be within token limit")
|
assert.LessOrEqual(t, len(tokens), 50, "Final prompt should be within token limit")
|
||||||
}
|
}
|
||||||
|
func TestStripReasoning(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "No reasoning tags",
|
||||||
|
input: "This is a test content without reasoning tags.",
|
||||||
|
expected: "This is a test content without reasoning tags.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Reasoning tags at the start",
|
||||||
|
input: "<think>Start reasoning</think>\n\nContent \n\n",
|
||||||
|
expected: "Content",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
result := stripReasoning(tc.input)
|
||||||
|
assert.Equal(t, tc.expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
7
build-and-run.sh
Executable file
7
build-and-run.sh
Executable file
|
@ -0,0 +1,7 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -o allexport
|
||||||
|
source .env
|
||||||
|
set +o allexport
|
||||||
|
|
||||||
|
go build
|
||||||
|
./paperless-gpt
|
|
@ -92,3 +92,88 @@
|
||||||
- E2E tests for web interface
|
- E2E tests for web interface
|
||||||
- Test fixtures and mocks
|
- Test fixtures and mocks
|
||||||
- Playwright for frontend testing
|
- Playwright for frontend testing
|
||||||
|
|
||||||
|
## OCR System Patterns
|
||||||
|
|
||||||
|
### OCR Provider Architecture
|
||||||
|
|
||||||
|
#### 1. Provider Interface
|
||||||
|
- Common interface for all OCR implementations
|
||||||
|
- Methods for image processing
|
||||||
|
- Configuration through standardized Config struct
|
||||||
|
- Resource management patterns
|
||||||
|
|
||||||
|
#### 2. LLM Provider Implementation
|
||||||
|
- Supports OpenAI and Ollama vision models
|
||||||
|
- Base64 encoding for OpenAI requests
|
||||||
|
- Binary format for Ollama requests
|
||||||
|
- Template-based OCR prompts
|
||||||
|
|
||||||
|
#### 3. Google Document AI Provider
|
||||||
|
- Enterprise-grade OCR processing
|
||||||
|
- MIME type validation
|
||||||
|
- Processor configuration via environment
|
||||||
|
- Regional endpoint support
|
||||||
|
|
||||||
|
### Logging Patterns
|
||||||
|
|
||||||
|
#### 1. Provider Initialization
|
||||||
|
```
|
||||||
|
[INFO] Initializing OCR provider: llm
|
||||||
|
[INFO] Using LLM OCR provider (provider=ollama, model=minicpm-v)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2. Processing Logs
|
||||||
|
```
|
||||||
|
[DEBUG] Starting OCR processing
|
||||||
|
[DEBUG] Image dimensions (width=800, height=1200)
|
||||||
|
[DEBUG] Using binary image format for non-OpenAI provider
|
||||||
|
[DEBUG] Sending request to vision model
|
||||||
|
[INFO] Successfully processed image (content_length=1536)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Error Logging
|
||||||
|
```
|
||||||
|
[ERROR] Failed to decode image: invalid format
|
||||||
|
[ERROR] Unsupported file type: image/webp
|
||||||
|
[ERROR] Failed to get response from vision model
|
||||||
|
```
|
||||||
|
|
||||||
|
### Error Handling Patterns
|
||||||
|
|
||||||
|
#### 1. Configuration Validation
|
||||||
|
- Required parameter checks
|
||||||
|
- Environment variable validation
|
||||||
|
- Provider-specific configuration
|
||||||
|
- Connection testing
|
||||||
|
|
||||||
|
#### 2. Processing Errors
|
||||||
|
- Image format validation
|
||||||
|
- MIME type checking
|
||||||
|
- Content processing errors
|
||||||
|
- Provider-specific error handling
|
||||||
|
|
||||||
|
#### 3. Error Propagation
|
||||||
|
- Detailed error contexts
|
||||||
|
- Original error wrapping
|
||||||
|
- Logging with error context
|
||||||
|
- Recovery mechanisms
|
||||||
|
|
||||||
|
### Processing Flow
|
||||||
|
|
||||||
|
#### 1. Document Processing
|
||||||
|
```
|
||||||
|
Document Tagged → OCR Provider Selected → Image Processing → Text Extraction → Content Update
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2. Provider Selection
|
||||||
|
```
|
||||||
|
Config Check → Provider Initialization → Resource Setup → Provider Ready
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Error Recovery
|
||||||
|
```
|
||||||
|
Error Detection → Logging → Cleanup → Error Propagation
|
||||||
|
```
|
||||||
|
|
||||||
|
These patterns ensure consistent behavior across OCR providers while maintaining proper logging and error handling throughout the system.
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
version: '3.8'
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
app:
|
app:
|
||||||
build:
|
build:
|
||||||
|
|
|
@ -11,13 +11,13 @@ import (
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:embed dist/*
|
//go:embed web-app/dist/*
|
||||||
var webappContent embed.FS
|
var webappContent embed.FS
|
||||||
|
|
||||||
// CreateEmbeddedFileServer creates a http.FileSystem from our embedded files
|
// CreateEmbeddedFileServer creates a http.FileSystem from our embedded files
|
||||||
func createEmbeddedFileServer() http.FileSystem {
|
func createEmbeddedFileServer() http.FileSystem {
|
||||||
// Strip the "dist" prefix from the embedded files
|
// Strip the "dist" prefix from the embedded files
|
||||||
stripped, err := fs.Sub(webappContent, "dist")
|
stripped, err := fs.Sub(webappContent, "web-app/dist")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
@ -32,7 +32,7 @@ func serveEmbeddedFile(c *gin.Context, prefix string, filepath string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to open the file from our embedded filesystem
|
// Try to open the file from our embedded filesystem
|
||||||
fullPath := path.Join("dist", prefix, filepath)
|
fullPath := path.Join("web-app/dist", prefix, filepath)
|
||||||
f, err := webappContent.Open(fullPath)
|
f, err := webappContent.Open(fullPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// If file not found, serve 404
|
// If file not found, serve 404
|
||||||
|
|
47
go.mod
47
go.mod
|
@ -1,24 +1,33 @@
|
||||||
module paperless-gpt
|
module paperless-gpt
|
||||||
|
|
||||||
go 1.22.0
|
go 1.23.0
|
||||||
|
|
||||||
toolchain go1.23.5
|
toolchain go1.24.1
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
cloud.google.com/go/documentai v1.35.2
|
||||||
github.com/Masterminds/sprig/v3 v3.3.0
|
github.com/Masterminds/sprig/v3 v3.3.0
|
||||||
github.com/fatih/color v1.18.0
|
github.com/fatih/color v1.18.0
|
||||||
|
github.com/gabriel-vasile/mimetype v1.4.8
|
||||||
github.com/gen2brain/go-fitz v1.24.14
|
github.com/gen2brain/go-fitz v1.24.14
|
||||||
github.com/gin-gonic/gin v1.10.0
|
github.com/gin-gonic/gin v1.10.0
|
||||||
github.com/google/uuid v1.6.0
|
github.com/google/uuid v1.6.0
|
||||||
|
github.com/hashicorp/go-retryablehttp v0.7.7
|
||||||
github.com/sirupsen/logrus v1.9.3
|
github.com/sirupsen/logrus v1.9.3
|
||||||
github.com/stretchr/testify v1.10.0
|
github.com/stretchr/testify v1.10.0
|
||||||
github.com/tmc/langchaingo v0.1.13-pre.0.0.20250202074804-0672790bb23a
|
github.com/tmc/langchaingo v0.1.13
|
||||||
golang.org/x/sync v0.10.0
|
golang.org/x/sync v0.12.0
|
||||||
|
google.golang.org/api v0.225.0
|
||||||
gorm.io/driver/sqlite v1.5.7
|
gorm.io/driver/sqlite v1.5.7
|
||||||
gorm.io/gorm v1.25.12
|
gorm.io/gorm v1.25.12
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
cloud.google.com/go v0.118.1 // indirect
|
||||||
|
cloud.google.com/go/auth v0.15.0 // indirect
|
||||||
|
cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
|
||||||
|
cloud.google.com/go/compute/metadata v0.6.0 // indirect
|
||||||
|
cloud.google.com/go/longrunning v0.6.4 // indirect
|
||||||
dario.cat/mergo v1.0.1 // indirect
|
dario.cat/mergo v1.0.1 // indirect
|
||||||
github.com/Masterminds/goutils v1.1.1 // indirect
|
github.com/Masterminds/goutils v1.1.1 // indirect
|
||||||
github.com/Masterminds/semver/v3 v3.3.0 // indirect
|
github.com/Masterminds/semver/v3 v3.3.0 // indirect
|
||||||
|
@ -29,12 +38,18 @@ require (
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
github.com/dlclark/regexp2 v1.10.0 // indirect
|
github.com/dlclark/regexp2 v1.10.0 // indirect
|
||||||
github.com/ebitengine/purego v0.8.0 // indirect
|
github.com/ebitengine/purego v0.8.0 // indirect
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
|
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||||
github.com/gin-contrib/sse v0.1.0 // indirect
|
github.com/gin-contrib/sse v0.1.0 // indirect
|
||||||
|
github.com/go-logr/logr v1.4.2 // indirect
|
||||||
|
github.com/go-logr/stdr v1.2.2 // indirect
|
||||||
github.com/go-playground/locales v0.14.1 // indirect
|
github.com/go-playground/locales v0.14.1 // indirect
|
||||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||||
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
||||||
github.com/goccy/go-json v0.10.2 // indirect
|
github.com/goccy/go-json v0.10.2 // indirect
|
||||||
|
github.com/google/s2a-go v0.1.9 // indirect
|
||||||
|
github.com/googleapis/enterprise-certificate-proxy v0.3.5 // indirect
|
||||||
|
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
|
||||||
|
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
|
||||||
github.com/huandu/xstrings v1.5.0 // indirect
|
github.com/huandu/xstrings v1.5.0 // indirect
|
||||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||||
github.com/jinzhu/now v1.1.5 // indirect
|
github.com/jinzhu/now v1.1.5 // indirect
|
||||||
|
@ -61,11 +76,23 @@ require (
|
||||||
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a // indirect
|
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a // indirect
|
||||||
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
|
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
|
||||||
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
|
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
|
||||||
|
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 // indirect
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
|
||||||
|
go.opentelemetry.io/otel v1.34.0 // indirect
|
||||||
|
go.opentelemetry.io/otel/metric v1.34.0 // indirect
|
||||||
|
go.opentelemetry.io/otel/trace v1.34.0 // indirect
|
||||||
golang.org/x/arch v0.8.0 // indirect
|
golang.org/x/arch v0.8.0 // indirect
|
||||||
golang.org/x/crypto v0.29.0 // indirect
|
golang.org/x/crypto v0.36.0 // indirect
|
||||||
golang.org/x/net v0.25.0 // indirect
|
golang.org/x/net v0.37.0 // indirect
|
||||||
golang.org/x/sys v0.27.0 // indirect
|
golang.org/x/oauth2 v0.28.0 // indirect
|
||||||
golang.org/x/text v0.20.0 // indirect
|
golang.org/x/sys v0.31.0 // indirect
|
||||||
google.golang.org/protobuf v1.34.1 // indirect
|
golang.org/x/text v0.23.0 // indirect
|
||||||
|
golang.org/x/time v0.11.0 // indirect
|
||||||
|
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 // indirect
|
||||||
|
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 // indirect
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect
|
||||||
|
google.golang.org/grpc v1.71.0 // indirect
|
||||||
|
google.golang.org/protobuf v1.36.5 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
140
go.sum
140
go.sum
|
@ -1,3 +1,15 @@
|
||||||
|
cloud.google.com/go v0.118.1 h1:b8RATMcrK9A4BH0rj8yQupPXp+aP+cJ0l6H7V9osV1E=
|
||||||
|
cloud.google.com/go v0.118.1/go.mod h1:CFO4UPEPi8oV21xoezZCrd3d81K4fFkDTEJu4R8K+9M=
|
||||||
|
cloud.google.com/go/auth v0.15.0 h1:Ly0u4aA5vG/fsSsxu98qCQBemXtAtJf+95z9HK+cxps=
|
||||||
|
cloud.google.com/go/auth v0.15.0/go.mod h1:WJDGqZ1o9E9wKIL+IwStfyn/+s59zl4Bi+1KQNVXLZ8=
|
||||||
|
cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M=
|
||||||
|
cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc=
|
||||||
|
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
|
||||||
|
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
|
||||||
|
cloud.google.com/go/documentai v1.35.2 h1:hswVobCWUTXtmn+4QqUIVkai7sDOe0QS2KB3IpqLkik=
|
||||||
|
cloud.google.com/go/documentai v1.35.2/go.mod h1:oh/0YXosgEq3hVhyH4ZQ7VNXPaveRO4eLVM3tBSZOsI=
|
||||||
|
cloud.google.com/go/longrunning v0.6.4 h1:3tyw9rO3E2XVXzSApn1gyEEnH2K9SynNQjMlBi3uHLg=
|
||||||
|
cloud.google.com/go/longrunning v0.6.4/go.mod h1:ttZpLCe6e7EXvn9OxpBRx7kZEB0efv8yBO6YnVMfhJs=
|
||||||
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
|
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
|
||||||
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
|
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
|
||||||
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
|
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
|
||||||
|
@ -23,16 +35,23 @@ github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+Gv
|
||||||
github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
||||||
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
|
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
|
||||||
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
|
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
|
||||||
|
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||||
|
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||||
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
||||||
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
|
||||||
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
|
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
|
||||||
github.com/gen2brain/go-fitz v1.24.14/go.mod h1:0KaZeQgASc20Yp5R/pFzyy7SmP01XcoHKNF842U2/S4=
|
github.com/gen2brain/go-fitz v1.24.14/go.mod h1:0KaZeQgASc20Yp5R/pFzyy7SmP01XcoHKNF842U2/S4=
|
||||||
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
||||||
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
||||||
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
||||||
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
||||||
|
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||||
|
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
|
||||||
|
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||||
|
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||||
|
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||||
|
@ -43,11 +62,27 @@ github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBEx
|
||||||
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
||||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||||
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
|
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
|
||||||
|
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
|
||||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
|
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
|
||||||
|
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
|
||||||
|
github.com/googleapis/enterprise-certificate-proxy v0.3.5 h1:VgzTY2jogw3xt39CusEnFJWm7rlsq5yL5q9XdLOuP5g=
|
||||||
|
github.com/googleapis/enterprise-certificate-proxy v0.3.5/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA=
|
||||||
|
github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q=
|
||||||
|
github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA=
|
||||||
|
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
|
||||||
|
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
|
||||||
|
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
|
||||||
|
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
|
||||||
|
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
|
||||||
|
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
|
||||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
||||||
|
@ -90,8 +125,8 @@ github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAc
|
||||||
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
|
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
|
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
|
||||||
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
|
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
||||||
|
@ -113,10 +148,8 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl
|
||||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/tmc/langchaingo v0.1.12 h1:yXwSu54f3b1IKw0jJ5/DWu+qFVH1NBblwC0xddBzGJE=
|
github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
|
||||||
github.com/tmc/langchaingo v0.1.12/go.mod h1:cd62xD6h+ouk8k/QQFhOsjRYBSA1JJ5UVKXSIgm7Ni4=
|
github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
|
||||||
github.com/tmc/langchaingo v0.1.13-pre.0.0.20250202074804-0672790bb23a h1:uEmyBuBfueLWqdvxHYi9/smSb1BfHfXJpDjJAGI38A4=
|
|
||||||
github.com/tmc/langchaingo v0.1.13-pre.0.0.20250202074804-0672790bb23a/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
|
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||||
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
||||||
|
@ -133,33 +166,86 @@ gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f h1:Wku8eEde
|
||||||
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f/go.mod h1:Tiuhl+njh/JIg0uS/sOJVYi0x2HEa5rc1OAaVsb5tAs=
|
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f/go.mod h1:Tiuhl+njh/JIg0uS/sOJVYi0x2HEa5rc1OAaVsb5tAs=
|
||||||
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJWpU5qHKGNy///1pacZC9txiUI=
|
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJWpU5qHKGNy///1pacZC9txiUI=
|
||||||
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
|
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
|
||||||
|
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
||||||
|
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 h1:rgMkmiGfix9vFJDcDi1PK8WEQP4FLQwLDfhp5ZLpFeE=
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0/go.mod h1:ijPqXp5P6IRRByFVVg9DY8P5HkxkHE5ARIa+86aXPf4=
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
|
||||||
|
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
|
||||||
|
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
|
||||||
|
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
|
||||||
|
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
|
||||||
|
go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
|
||||||
|
go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU=
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ=
|
||||||
|
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
|
||||||
|
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
|
||||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||||
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
||||||
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
|
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
|
||||||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
|
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
|
||||||
golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ=
|
golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs=
|
||||||
golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg=
|
golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ=
|
||||||
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
|
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
|
||||||
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
|
||||||
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
|
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
|
||||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
|
||||||
|
golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
|
||||||
|
golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
|
||||||
|
golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE=
|
||||||
|
golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
||||||
|
golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
|
||||||
|
golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
|
||||||
|
golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc=
|
||||||
|
golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
|
||||||
|
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
|
||||||
|
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
|
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
||||||
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
|
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
|
||||||
golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||||
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
|
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
||||||
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
|
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
||||||
|
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
|
||||||
|
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
|
||||||
|
golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
|
||||||
|
golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
||||||
|
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
|
||||||
|
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
|
google.golang.org/api v0.223.0 h1:JUTaWEriXmEy5AhvdMgksGGPEFsYfUKaPEYXd4c3Wvc=
|
||||||
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
google.golang.org/api v0.223.0/go.mod h1:C+RS7Z+dDwds2b+zoAk5hN/eSfsiCn0UDrYof/M4d2M=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
google.golang.org/api v0.224.0 h1:Ir4UPtDsNiwIOHdExr3fAj4xZ42QjK7uQte3lORLJwU=
|
||||||
|
google.golang.org/api v0.224.0/go.mod h1:3V39my2xAGkodXy0vEqcEtkqgw2GtrFL5WuBZlCTCOQ=
|
||||||
|
google.golang.org/api v0.225.0 h1:+4/IVqBQm0MV5S+JW3kdEGC1WtOmM2mXN1LKH1LdNlw=
|
||||||
|
google.golang.org/api v0.225.0/go.mod h1:WP/0Xm4LVvMOCldfvOISnWquSRWbG2kArDZcg+W2DbY=
|
||||||
|
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 h1:Pw6WnI9W/LIdRxqK7T6XGugGbHIRl5Q7q3BssH6xk4s=
|
||||||
|
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4/go.mod h1:qbZzneIOXSq+KFAFut9krLfRLZiFLzZL5u2t8SV83EE=
|
||||||
|
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 h1:L9JNMl/plZH9wmzQUHleO/ZZDSN+9Gh41wPczNy+5Fk=
|
||||||
|
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6/go.mod h1:iYONQfRdizDB8JJBybql13nArx91jcUk7zCXEsOofM4=
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2 h1:DMTIbak9GhdaSxEjvVzAeNZvyc03I61duqNbnm3SU0M=
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e h1:YA5lmSs3zc/5w+xsRcHqpETkaYyK63ivEPzNTcUUlSA=
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb h1:TLPQVbx1GJ8VKZxz52VAxl1EBgKXXbTiU9Fc5fZeLn4=
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
|
||||||
|
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
|
||||||
|
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
|
||||||
|
google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg=
|
||||||
|
google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
|
||||||
|
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
|
||||||
|
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
|
203
main.go
203
main.go
|
@ -5,8 +5,10 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"paperless-gpt/ocr"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -30,26 +32,30 @@ var (
|
||||||
log = logrus.New()
|
log = logrus.New()
|
||||||
|
|
||||||
// Environment Variables
|
// Environment Variables
|
||||||
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
|
paperlessInsecureSkipVerify = os.Getenv("PAPERLESS_INSECURE_SKIP_VERIFY") == "true"
|
||||||
|
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
|
||||||
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
|
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
|
||||||
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
|
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
|
||||||
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
|
azureDocAIEndpoint = os.Getenv("AZURE_DOCAI_ENDPOINT")
|
||||||
manualTag = os.Getenv("MANUAL_TAG")
|
azureDocAIKey = os.Getenv("AZURE_DOCAI_KEY")
|
||||||
autoTag = os.Getenv("AUTO_TAG")
|
azureDocAIModelID = os.Getenv("AZURE_DOCAI_MODEL_ID")
|
||||||
manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet
|
azureDocAITimeout = os.Getenv("AZURE_DOCAI_TIMEOUT_SECONDS")
|
||||||
autoOcrTag = os.Getenv("AUTO_OCR_TAG")
|
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
|
||||||
llmProvider = os.Getenv("LLM_PROVIDER")
|
manualTag = os.Getenv("MANUAL_TAG")
|
||||||
llmModel = os.Getenv("LLM_MODEL")
|
autoTag = os.Getenv("AUTO_TAG")
|
||||||
visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER")
|
manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet
|
||||||
visionLlmModel = os.Getenv("VISION_LLM_MODEL")
|
autoOcrTag = os.Getenv("AUTO_OCR_TAG")
|
||||||
logLevel = strings.ToLower(os.Getenv("LOG_LEVEL"))
|
llmProvider = os.Getenv("LLM_PROVIDER")
|
||||||
listenInterface = os.Getenv("LISTEN_INTERFACE")
|
llmModel = os.Getenv("LLM_MODEL")
|
||||||
autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE")
|
visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER")
|
||||||
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
|
visionLlmModel = os.Getenv("VISION_LLM_MODEL")
|
||||||
autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS")
|
logLevel = strings.ToLower(os.Getenv("LOG_LEVEL"))
|
||||||
limitOcrPages int // Will be read from OCR_LIMIT_PAGES
|
listenInterface = os.Getenv("LISTEN_INTERFACE")
|
||||||
tokenLimit = 0 // Will be read from TOKEN_LIMIT
|
autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE")
|
||||||
|
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
|
||||||
|
autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS")
|
||||||
|
limitOcrPages int // Will be read from OCR_LIMIT_PAGES
|
||||||
|
tokenLimit = 0 // Will be read from TOKEN_LIMIT
|
||||||
|
|
||||||
// Templates
|
// Templates
|
||||||
titleTemplate *template.Template
|
titleTemplate *template.Template
|
||||||
|
@ -113,10 +119,11 @@ Document Content:
|
||||||
|
|
||||||
// App struct to hold dependencies
|
// App struct to hold dependencies
|
||||||
type App struct {
|
type App struct {
|
||||||
Client *PaperlessClient
|
Client *PaperlessClient
|
||||||
Database *gorm.DB
|
Database *gorm.DB
|
||||||
LLM llms.Model
|
LLM llms.Model
|
||||||
VisionLLM llms.Model
|
VisionLLM llms.Model
|
||||||
|
ocrProvider ocr.Provider // OCR provider interface
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -150,12 +157,66 @@ func main() {
|
||||||
log.Fatalf("Failed to create Vision LLM client: %v", err)
|
log.Fatalf("Failed to create Vision LLM client: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize OCR provider
|
||||||
|
var ocrProvider ocr.Provider
|
||||||
|
providerType := os.Getenv("OCR_PROVIDER")
|
||||||
|
if providerType == "" {
|
||||||
|
providerType = "llm" // Default to LLM provider
|
||||||
|
}
|
||||||
|
|
||||||
|
ocrConfig := ocr.Config{
|
||||||
|
Provider: providerType,
|
||||||
|
GoogleProjectID: os.Getenv("GOOGLE_PROJECT_ID"),
|
||||||
|
GoogleLocation: os.Getenv("GOOGLE_LOCATION"),
|
||||||
|
GoogleProcessorID: os.Getenv("GOOGLE_PROCESSOR_ID"),
|
||||||
|
VisionLLMProvider: visionLlmProvider,
|
||||||
|
VisionLLMModel: visionLlmModel,
|
||||||
|
AzureEndpoint: azureDocAIEndpoint,
|
||||||
|
AzureAPIKey: azureDocAIKey,
|
||||||
|
AzureModelID: azureDocAIModelID,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse Azure timeout if set
|
||||||
|
if azureDocAITimeout != "" {
|
||||||
|
if timeout, err := strconv.Atoi(azureDocAITimeout); err == nil {
|
||||||
|
ocrConfig.AzureTimeout = timeout
|
||||||
|
} else {
|
||||||
|
log.Warnf("Invalid AZURE_DOCAI_TIMEOUT_SECONDS value: %v, using default", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If provider is LLM, but no VISION_LLM_PROVIDER is set, don't initialize OCR provider
|
||||||
|
if providerType == "llm" && visionLlmProvider == "" {
|
||||||
|
log.Warn("OCR provider is set to LLM, but no VISION_LLM_PROVIDER is set. Disabling OCR.")
|
||||||
|
} else {
|
||||||
|
ocrProvider, err = ocr.NewProvider(ocrConfig)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Failed to initialize OCR provider: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize App with dependencies
|
// Initialize App with dependencies
|
||||||
app := &App{
|
app := &App{
|
||||||
Client: client,
|
Client: client,
|
||||||
Database: database,
|
Database: database,
|
||||||
LLM: llm,
|
LLM: llm,
|
||||||
VisionLLM: visionLlm,
|
VisionLLM: visionLlm,
|
||||||
|
ocrProvider: ocrProvider,
|
||||||
|
}
|
||||||
|
|
||||||
|
if app.isOcrEnabled() {
|
||||||
|
fmt.Printf("Using %s as manual OCR tag\n", manualOcrTag)
|
||||||
|
fmt.Printf("Using %s as auto OCR tag\n", autoOcrTag)
|
||||||
|
rawLimitOcrPages := os.Getenv("OCR_LIMIT_PAGES")
|
||||||
|
if rawLimitOcrPages == "" {
|
||||||
|
limitOcrPages = 5
|
||||||
|
} else {
|
||||||
|
var err error
|
||||||
|
limitOcrPages, err = strconv.Atoi(rawLimitOcrPages)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Invalid OCR_LIMIT_PAGES value: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start background process for auto-tagging
|
// Start background process for auto-tagging
|
||||||
|
@ -168,7 +229,7 @@ func main() {
|
||||||
for {
|
for {
|
||||||
processedCount, err := func() (int, error) {
|
processedCount, err := func() (int, error) {
|
||||||
count := 0
|
count := 0
|
||||||
if isOcrEnabled() {
|
if app.isOcrEnabled() {
|
||||||
ocrCount, err := app.processAutoOcrTagDocuments()
|
ocrCount, err := app.processAutoOcrTagDocuments()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("error in processAutoOcrTagDocuments: %w", err)
|
return 0, fmt.Errorf("error in processAutoOcrTagDocuments: %w", err)
|
||||||
|
@ -227,7 +288,7 @@ func main() {
|
||||||
|
|
||||||
// Endpoint to see if user enabled OCR
|
// Endpoint to see if user enabled OCR
|
||||||
api.GET("/experimental/ocr", func(c *gin.Context) {
|
api.GET("/experimental/ocr", func(c *gin.Context) {
|
||||||
enabled := isOcrEnabled()
|
enabled := app.isOcrEnabled()
|
||||||
c.JSON(http.StatusOK, gin.H{"enabled": enabled})
|
c.JSON(http.StatusOK, gin.H{"enabled": enabled})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -337,8 +398,8 @@ func initLogger() {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func isOcrEnabled() bool {
|
func (app *App) isOcrEnabled() bool {
|
||||||
return visionLlmModel != "" && visionLlmProvider != ""
|
return app.ocrProvider != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// validateOrDefaultEnvVars ensures all necessary environment variables are set
|
// validateOrDefaultEnvVars ensures all necessary environment variables are set
|
||||||
|
@ -356,16 +417,10 @@ func validateOrDefaultEnvVars() {
|
||||||
if manualOcrTag == "" {
|
if manualOcrTag == "" {
|
||||||
manualOcrTag = "paperless-gpt-ocr"
|
manualOcrTag = "paperless-gpt-ocr"
|
||||||
}
|
}
|
||||||
if isOcrEnabled() {
|
|
||||||
fmt.Printf("Using %s as manual OCR tag\n", manualOcrTag)
|
|
||||||
}
|
|
||||||
|
|
||||||
if autoOcrTag == "" {
|
if autoOcrTag == "" {
|
||||||
autoOcrTag = "paperless-gpt-ocr-auto"
|
autoOcrTag = "paperless-gpt-ocr-auto"
|
||||||
}
|
}
|
||||||
if isOcrEnabled() {
|
|
||||||
fmt.Printf("Using %s as auto OCR tag\n", autoOcrTag)
|
|
||||||
}
|
|
||||||
|
|
||||||
if paperlessBaseURL == "" {
|
if paperlessBaseURL == "" {
|
||||||
log.Fatal("Please set the PAPERLESS_BASE_URL environment variable.")
|
log.Fatal("Please set the PAPERLESS_BASE_URL environment variable.")
|
||||||
|
@ -383,6 +438,17 @@ func validateOrDefaultEnvVars() {
|
||||||
log.Fatal("Please set the LLM_PROVIDER environment variable to 'openai' or 'ollama'.")
|
log.Fatal("Please set the LLM_PROVIDER environment variable to 'openai' or 'ollama'.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate OCR provider if set
|
||||||
|
ocrProvider := os.Getenv("OCR_PROVIDER")
|
||||||
|
if ocrProvider == "azure" {
|
||||||
|
if azureDocAIEndpoint == "" {
|
||||||
|
log.Fatal("Please set the AZURE_DOCAI_ENDPOINT environment variable for Azure provider")
|
||||||
|
}
|
||||||
|
if azureDocAIKey == "" {
|
||||||
|
log.Fatal("Please set the AZURE_DOCAI_KEY environment variable for Azure provider")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if llmModel == "" {
|
if llmModel == "" {
|
||||||
log.Fatal("Please set the LLM_MODEL environment variable.")
|
log.Fatal("Please set the LLM_MODEL environment variable.")
|
||||||
}
|
}
|
||||||
|
@ -391,19 +457,6 @@ func validateOrDefaultEnvVars() {
|
||||||
log.Fatal("Please set the OPENAI_API_KEY environment variable for OpenAI provider.")
|
log.Fatal("Please set the OPENAI_API_KEY environment variable for OpenAI provider.")
|
||||||
}
|
}
|
||||||
|
|
||||||
if isOcrEnabled() {
|
|
||||||
rawLimitOcrPages := os.Getenv("OCR_LIMIT_PAGES")
|
|
||||||
if rawLimitOcrPages == "" {
|
|
||||||
limitOcrPages = 5
|
|
||||||
} else {
|
|
||||||
var err error
|
|
||||||
limitOcrPages, err = strconv.Atoi(rawLimitOcrPages)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Invalid OCR_LIMIT_PAGES value: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize token limit from environment variable
|
// Initialize token limit from environment variable
|
||||||
if limit := os.Getenv("TOKEN_LIMIT"); limit != "" {
|
if limit := os.Getenv("TOKEN_LIMIT"); limit != "" {
|
||||||
if parsed, err := strconv.Atoi(limit); err == nil {
|
if parsed, err := strconv.Atoi(limit); err == nil {
|
||||||
|
@ -437,7 +490,14 @@ func (app *App) processAutoTagDocuments() (int, error) {
|
||||||
|
|
||||||
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag)
|
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag)
|
||||||
|
|
||||||
|
processedCount := 0
|
||||||
for _, document := range documents {
|
for _, document := range documents {
|
||||||
|
// Skip documents that have the autoOcrTag
|
||||||
|
if slices.Contains(document.Tags, autoOcrTag) {
|
||||||
|
log.Debugf("Skipping document %d as it has the OCR tag %s", document.ID, autoOcrTag)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
docLogger := documentLogger(document.ID)
|
docLogger := documentLogger(document.ID)
|
||||||
docLogger.Info("Processing document for auto-tagging")
|
docLogger.Info("Processing document for auto-tagging")
|
||||||
|
|
||||||
|
@ -450,17 +510,18 @@ func (app *App) processAutoTagDocuments() (int, error) {
|
||||||
|
|
||||||
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger)
|
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("error generating suggestions for document %d: %w", document.ID, err)
|
return processedCount, fmt.Errorf("error generating suggestions for document %d: %w", document.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
|
err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("error updating document %d: %w", document.ID, err)
|
return processedCount, fmt.Errorf("error updating document %d: %w", document.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
docLogger.Info("Successfully processed document")
|
docLogger.Info("Successfully processed document")
|
||||||
|
processedCount++
|
||||||
}
|
}
|
||||||
return len(documents), nil
|
return processedCount, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// processAutoOcrTagDocuments handles the background auto-tagging of OCR documents
|
// processAutoOcrTagDocuments handles the background auto-tagging of OCR documents
|
||||||
|
@ -605,9 +666,11 @@ func createLLM() (llms.Model, error) {
|
||||||
if openaiAPIKey == "" {
|
if openaiAPIKey == "" {
|
||||||
return nil, fmt.Errorf("OpenAI API key is not set")
|
return nil, fmt.Errorf("OpenAI API key is not set")
|
||||||
}
|
}
|
||||||
|
|
||||||
return openai.New(
|
return openai.New(
|
||||||
openai.WithModel(llmModel),
|
openai.WithModel(llmModel),
|
||||||
openai.WithToken(openaiAPIKey),
|
openai.WithToken(openaiAPIKey),
|
||||||
|
openai.WithHTTPClient(createCustomHTTPClient()),
|
||||||
)
|
)
|
||||||
case "ollama":
|
case "ollama":
|
||||||
host := os.Getenv("OLLAMA_HOST")
|
host := os.Getenv("OLLAMA_HOST")
|
||||||
|
@ -629,9 +692,11 @@ func createVisionLLM() (llms.Model, error) {
|
||||||
if openaiAPIKey == "" {
|
if openaiAPIKey == "" {
|
||||||
return nil, fmt.Errorf("OpenAI API key is not set")
|
return nil, fmt.Errorf("OpenAI API key is not set")
|
||||||
}
|
}
|
||||||
|
|
||||||
return openai.New(
|
return openai.New(
|
||||||
openai.WithModel(visionLlmModel),
|
openai.WithModel(visionLlmModel),
|
||||||
openai.WithToken(openaiAPIKey),
|
openai.WithToken(openaiAPIKey),
|
||||||
|
openai.WithHTTPClient(createCustomHTTPClient()),
|
||||||
)
|
)
|
||||||
case "ollama":
|
case "ollama":
|
||||||
host := os.Getenv("OLLAMA_HOST")
|
host := os.Getenv("OLLAMA_HOST")
|
||||||
|
@ -647,3 +712,33 @@ func createVisionLLM() (llms.Model, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func createCustomHTTPClient() *http.Client {
|
||||||
|
// Create custom transport that adds headers
|
||||||
|
customTransport := &headerTransport{
|
||||||
|
transport: http.DefaultTransport,
|
||||||
|
headers: map[string]string{
|
||||||
|
"X-Title": "paperless-gpt",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create custom client with the transport
|
||||||
|
httpClient := http.DefaultClient
|
||||||
|
httpClient.Transport = customTransport
|
||||||
|
|
||||||
|
return httpClient
|
||||||
|
}
|
||||||
|
|
||||||
|
// headerTransport is a custom http.RoundTripper that adds custom headers to requests
|
||||||
|
type headerTransport struct {
|
||||||
|
transport http.RoundTripper
|
||||||
|
headers map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
// RoundTrip implements the http.RoundTripper interface
|
||||||
|
func (t *headerTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||||
|
for key, value := range t.headers {
|
||||||
|
req.Header.Add(key, value)
|
||||||
|
}
|
||||||
|
return t.transport.RoundTrip(req)
|
||||||
|
}
|
||||||
|
|
199
main_test.go
Normal file
199
main_test.go
Normal file
|
@ -0,0 +1,199 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"slices"
|
||||||
|
"testing"
|
||||||
|
"text/template"
|
||||||
|
|
||||||
|
"github.com/Masterminds/sprig/v3"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestProcessAutoTagDocuments(t *testing.T) {
|
||||||
|
// Initialize required global variables
|
||||||
|
autoTag = "paperless-gpt-auto"
|
||||||
|
autoOcrTag = "paperless-gpt-ocr-auto"
|
||||||
|
|
||||||
|
// Initialize templates
|
||||||
|
var err error
|
||||||
|
titleTemplate, err = template.New("title").Funcs(sprig.FuncMap()).Parse("")
|
||||||
|
require.NoError(t, err)
|
||||||
|
tagTemplate, err = template.New("tag").Funcs(sprig.FuncMap()).Parse("")
|
||||||
|
require.NoError(t, err)
|
||||||
|
correspondentTemplate, err = template.New("correspondent").Funcs(sprig.FuncMap()).Parse("")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Create test environment
|
||||||
|
env := newTestEnv(t)
|
||||||
|
defer env.teardown()
|
||||||
|
|
||||||
|
// Set up test cases
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
documents []Document
|
||||||
|
expectedCount int
|
||||||
|
expectedError string
|
||||||
|
updateResponse int // HTTP status code for update response
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Skip document with autoOcrTag",
|
||||||
|
documents: []Document{
|
||||||
|
{
|
||||||
|
ID: 1,
|
||||||
|
Title: "Doc with OCR tag",
|
||||||
|
Tags: []string{autoTag, autoOcrTag},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: 2,
|
||||||
|
Title: "Doc without OCR tag",
|
||||||
|
Tags: []string{autoTag},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: 3,
|
||||||
|
Title: "Doc with OCR tag",
|
||||||
|
Tags: []string{autoTag, autoOcrTag},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedCount: 1,
|
||||||
|
updateResponse: http.StatusOK,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "No documents to process",
|
||||||
|
documents: []Document{},
|
||||||
|
expectedCount: 0,
|
||||||
|
updateResponse: http.StatusOK,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
// Mock the GetAllTags response
|
||||||
|
env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
response := map[string]interface{}{
|
||||||
|
"results": []map[string]interface{}{
|
||||||
|
{"id": 1, "name": autoTag},
|
||||||
|
{"id": 2, "name": autoOcrTag},
|
||||||
|
{"id": 3, "name": "other-tag"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(w).Encode(response)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Mock the GetDocumentsByTags response
|
||||||
|
env.setMockResponse("/api/documents/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
response := GetDocumentsApiResponse{
|
||||||
|
Results: make([]GetDocumentApiResponseResult, len(tc.documents)),
|
||||||
|
}
|
||||||
|
for i, doc := range tc.documents {
|
||||||
|
tagIds := make([]int, len(doc.Tags))
|
||||||
|
for j, tagName := range doc.Tags {
|
||||||
|
switch tagName {
|
||||||
|
case autoTag:
|
||||||
|
tagIds[j] = 1
|
||||||
|
case autoOcrTag:
|
||||||
|
tagIds[j] = 2
|
||||||
|
default:
|
||||||
|
tagIds[j] = 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
response.Results[i] = GetDocumentApiResponseResult{
|
||||||
|
ID: doc.ID,
|
||||||
|
Title: doc.Title,
|
||||||
|
Tags: tagIds,
|
||||||
|
Content: "Test content",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(w).Encode(response)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Mock the correspondent creation endpoint
|
||||||
|
env.setMockResponse("/api/correspondents/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method == "POST" {
|
||||||
|
// Mock successful correspondent creation
|
||||||
|
w.WriteHeader(http.StatusCreated)
|
||||||
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||||
|
"id": 3,
|
||||||
|
"name": "test response",
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
// Mock GET response for existing correspondents
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||||
|
"results": []map[string]interface{}{
|
||||||
|
{"id": 1, "name": "Alpha"},
|
||||||
|
{"id": 2, "name": "Beta"},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Create test app
|
||||||
|
app := &App{
|
||||||
|
Client: env.client,
|
||||||
|
Database: env.db,
|
||||||
|
LLM: &mockLLM{}, // Use mock LLM from app_llm_test.go
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set auto-generate flags
|
||||||
|
autoGenerateTitle = "true"
|
||||||
|
autoGenerateTags = "true"
|
||||||
|
autoGenerateCorrespondents = "true"
|
||||||
|
|
||||||
|
// Mock the document update responses
|
||||||
|
for _, doc := range tc.documents {
|
||||||
|
if !slices.Contains(doc.Tags, autoOcrTag) {
|
||||||
|
updatePath := fmt.Sprintf("/api/documents/%d/", doc.ID)
|
||||||
|
env.setMockResponse(updatePath, func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(tc.updateResponse)
|
||||||
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||||
|
"id": doc.ID,
|
||||||
|
"title": "Updated " + doc.Title,
|
||||||
|
"tags": []int{1, 3}, // Mock updated tag IDs
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the test
|
||||||
|
count, err := app.processAutoTagDocuments()
|
||||||
|
|
||||||
|
// Verify results
|
||||||
|
if tc.expectedError != "" {
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), tc.expectedError)
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, tc.expectedCount, count)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateCustomHTTPClient(t *testing.T) {
|
||||||
|
// Create a test server
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// Verify custom header
|
||||||
|
assert.Equal(t, "paperless-gpt", r.Header.Get("X-Title"), "Expected X-Title header")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
// Get custom client
|
||||||
|
client := createCustomHTTPClient()
|
||||||
|
require.NotNil(t, client, "HTTP client should not be nil")
|
||||||
|
|
||||||
|
// Make a request
|
||||||
|
resp, err := client.Get(server.URL)
|
||||||
|
require.NoError(t, err, "Request should not fail")
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
assert.Equal(t, http.StatusOK, resp.StatusCode, "Expected 200 OK response")
|
||||||
|
}
|
13
ocr.go
13
ocr.go
|
@ -36,13 +36,20 @@ func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string,
|
||||||
return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
|
return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ocrText, err := app.doOCRViaLLM(ctx, imageContent, pageLogger)
|
result, err := app.ocrProvider.ProcessImage(ctx, imageContent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
|
return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
|
||||||
}
|
}
|
||||||
pageLogger.Debug("OCR completed for page")
|
if result == nil {
|
||||||
|
pageLogger.Error("Got nil result from OCR provider")
|
||||||
|
return "", fmt.Errorf("error performing OCR for document %d, page %d: nil result", documentID, i+1)
|
||||||
|
}
|
||||||
|
|
||||||
ocrTexts = append(ocrTexts, ocrText)
|
pageLogger.WithField("has_hocr", result.HOCR != "").
|
||||||
|
WithField("metadata", result.Metadata).
|
||||||
|
Debug("OCR completed for page")
|
||||||
|
|
||||||
|
ocrTexts = append(ocrTexts, result.Text)
|
||||||
}
|
}
|
||||||
|
|
||||||
docLogger.Info("OCR processing completed successfully")
|
docLogger.Info("OCR processing completed successfully")
|
||||||
|
|
224
ocr/azure_provider.go
Normal file
224
ocr/azure_provider.go
Normal file
|
@ -0,0 +1,224 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/gabriel-vasile/mimetype"
|
||||||
|
"github.com/hashicorp/go-retryablehttp"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
apiVersion = "2024-11-30"
|
||||||
|
defaultModelID = "prebuilt-read"
|
||||||
|
defaultTimeout = 120
|
||||||
|
pollingInterval = 2 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
// AzureProvider implements OCR using Azure Document Intelligence
|
||||||
|
type AzureProvider struct {
|
||||||
|
endpoint string
|
||||||
|
apiKey string
|
||||||
|
modelID string
|
||||||
|
timeout time.Duration
|
||||||
|
httpClient *retryablehttp.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Request body for Azure Document Intelligence
|
||||||
|
type analyzeRequest struct {
|
||||||
|
Base64Source string `json:"base64Source"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func newAzureProvider(config Config) (*AzureProvider, error) {
|
||||||
|
logger := log.WithFields(logrus.Fields{
|
||||||
|
"endpoint": config.AzureEndpoint,
|
||||||
|
"model_id": config.AzureModelID,
|
||||||
|
})
|
||||||
|
logger.Info("Creating new Azure Document Intelligence provider")
|
||||||
|
|
||||||
|
// Validate required configuration
|
||||||
|
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
|
||||||
|
logger.Error("Missing required configuration")
|
||||||
|
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set defaults and create provider
|
||||||
|
modelID := defaultModelID
|
||||||
|
if config.AzureModelID != "" {
|
||||||
|
modelID = config.AzureModelID
|
||||||
|
}
|
||||||
|
|
||||||
|
timeout := defaultTimeout
|
||||||
|
if config.AzureTimeout > 0 {
|
||||||
|
timeout = config.AzureTimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure retryablehttp client
|
||||||
|
client := retryablehttp.NewClient()
|
||||||
|
client.RetryMax = 3
|
||||||
|
client.RetryWaitMin = 1 * time.Second
|
||||||
|
client.RetryWaitMax = 5 * time.Second
|
||||||
|
client.Logger = logger
|
||||||
|
|
||||||
|
provider := &AzureProvider{
|
||||||
|
endpoint: config.AzureEndpoint,
|
||||||
|
apiKey: config.AzureAPIKey,
|
||||||
|
modelID: modelID,
|
||||||
|
timeout: time.Duration(timeout) * time.Second,
|
||||||
|
httpClient: client,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info("Successfully initialized Azure Document Intelligence provider")
|
||||||
|
return provider, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *AzureProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
|
||||||
|
logger := log.WithFields(logrus.Fields{
|
||||||
|
"model_id": p.modelID,
|
||||||
|
})
|
||||||
|
logger.Debug("Starting Azure Document Intelligence processing")
|
||||||
|
|
||||||
|
// Detect MIME type
|
||||||
|
mtype := mimetype.Detect(imageContent)
|
||||||
|
logger.WithField("mime_type", mtype.String()).Debug("Detected file type")
|
||||||
|
|
||||||
|
if !isImageMIMEType(mtype.String()) {
|
||||||
|
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
|
||||||
|
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create context with timeout
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, p.timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Submit document for analysis
|
||||||
|
operationLocation, err := p.submitDocument(ctx, imageContent)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error submitting document: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Poll for results
|
||||||
|
result, err := p.pollForResults(ctx, operationLocation)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error polling for results: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to OCR result
|
||||||
|
ocrResult := &OCRResult{
|
||||||
|
Text: result.AnalyzeResult.Content,
|
||||||
|
Metadata: map[string]string{
|
||||||
|
"provider": "azure_docai",
|
||||||
|
"page_count": fmt.Sprintf("%d", len(result.AnalyzeResult.Pages)),
|
||||||
|
"api_version": result.AnalyzeResult.APIVersion,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.WithFields(logrus.Fields{
|
||||||
|
"content_length": len(ocrResult.Text),
|
||||||
|
"page_count": len(result.AnalyzeResult.Pages),
|
||||||
|
}).Info("Successfully processed document")
|
||||||
|
return ocrResult, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *AzureProvider) submitDocument(ctx context.Context, imageContent []byte) (string, error) {
|
||||||
|
requestURL := fmt.Sprintf("%s/documentintelligence/documentModels/%s:analyze?api-version=%s",
|
||||||
|
p.endpoint, p.modelID, apiVersion)
|
||||||
|
|
||||||
|
// Prepare request body
|
||||||
|
requestBody := analyzeRequest{
|
||||||
|
Base64Source: base64.StdEncoding.EncodeToString(imageContent),
|
||||||
|
}
|
||||||
|
requestBodyBytes, err := json.Marshal(requestBody)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("error marshaling request body: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := retryablehttp.NewRequestWithContext(ctx, "POST", requestURL, bytes.NewBuffer(requestBodyBytes))
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("error creating HTTP request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
|
||||||
|
|
||||||
|
resp, err := p.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("error sending HTTP request: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusAccepted {
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return "", fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
operationLocation := resp.Header.Get("Operation-Location")
|
||||||
|
if operationLocation == "" {
|
||||||
|
return "", fmt.Errorf("no Operation-Location header in response")
|
||||||
|
}
|
||||||
|
|
||||||
|
return operationLocation, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *AzureProvider) pollForResults(ctx context.Context, operationLocation string) (*AzureDocumentResult, error) {
|
||||||
|
logger := log.WithField("operation_location", operationLocation)
|
||||||
|
logger.Debug("Starting to poll for results")
|
||||||
|
|
||||||
|
ticker := time.NewTicker(pollingInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, fmt.Errorf("operation timed out after %v: %w", p.timeout, ctx.Err())
|
||||||
|
case <-ticker.C:
|
||||||
|
req, err := retryablehttp.NewRequestWithContext(ctx, "GET", operationLocation, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error creating poll request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
|
||||||
|
|
||||||
|
resp, err := p.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error polling for results: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var result AzureDocumentResult
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||||
|
resp.Body.Close()
|
||||||
|
logger.WithError(err).Error("Failed to decode response")
|
||||||
|
return nil, fmt.Errorf("error decoding response: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
logger.WithFields(logrus.Fields{
|
||||||
|
"status_code": resp.StatusCode,
|
||||||
|
"content_length": len(result.AnalyzeResult.Content),
|
||||||
|
"page_count": len(result.AnalyzeResult.Pages),
|
||||||
|
"status": result.Status,
|
||||||
|
}).Debug("Poll response received")
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("unexpected status code %d while polling", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch result.Status {
|
||||||
|
case "succeeded":
|
||||||
|
return &result, nil
|
||||||
|
case "failed":
|
||||||
|
return nil, fmt.Errorf("document processing failed")
|
||||||
|
case "running":
|
||||||
|
// Continue polling
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unexpected status: %s", result.Status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
222
ocr/azure_provider_test.go
Normal file
222
ocr/azure_provider_test.go
Normal file
|
@ -0,0 +1,222 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/go-retryablehttp"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewAzureProvider(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
config Config
|
||||||
|
wantErr bool
|
||||||
|
errContains string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid config",
|
||||||
|
config: Config{
|
||||||
|
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
|
||||||
|
AzureAPIKey: "test-key",
|
||||||
|
},
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "valid config with custom model and timeout",
|
||||||
|
config: Config{
|
||||||
|
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
|
||||||
|
AzureAPIKey: "test-key",
|
||||||
|
AzureModelID: "custom-model",
|
||||||
|
AzureTimeout: 60,
|
||||||
|
},
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing endpoint",
|
||||||
|
config: Config{
|
||||||
|
AzureAPIKey: "test-key",
|
||||||
|
},
|
||||||
|
wantErr: true,
|
||||||
|
errContains: "missing required Azure Document Intelligence configuration",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing api key",
|
||||||
|
config: Config{
|
||||||
|
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
|
||||||
|
},
|
||||||
|
wantErr: true,
|
||||||
|
errContains: "missing required Azure Document Intelligence configuration",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
provider, err := newAzureProvider(tt.config)
|
||||||
|
if tt.wantErr {
|
||||||
|
assert.Error(t, err)
|
||||||
|
if tt.errContains != "" {
|
||||||
|
assert.Contains(t, err.Error(), tt.errContains)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, provider)
|
||||||
|
|
||||||
|
// Verify default values
|
||||||
|
if tt.config.AzureModelID == "" {
|
||||||
|
assert.Equal(t, defaultModelID, provider.modelID)
|
||||||
|
} else {
|
||||||
|
assert.Equal(t, tt.config.AzureModelID, provider.modelID)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.config.AzureTimeout == 0 {
|
||||||
|
assert.Equal(t, time.Duration(defaultTimeout)*time.Second, provider.timeout)
|
||||||
|
} else {
|
||||||
|
assert.Equal(t, time.Duration(tt.config.AzureTimeout)*time.Second, provider.timeout)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAzureProvider_ProcessImage(t *testing.T) {
|
||||||
|
// Sample success response
|
||||||
|
now := time.Now()
|
||||||
|
successResult := AzureDocumentResult{
|
||||||
|
Status: "succeeded",
|
||||||
|
CreatedDateTime: now,
|
||||||
|
LastUpdatedDateTime: now,
|
||||||
|
AnalyzeResult: AzureAnalyzeResult{
|
||||||
|
APIVersion: apiVersion,
|
||||||
|
ModelID: defaultModelID,
|
||||||
|
StringIndexType: "utf-16",
|
||||||
|
Content: "Test document content",
|
||||||
|
Pages: []AzurePage{
|
||||||
|
{
|
||||||
|
PageNumber: 1,
|
||||||
|
Angle: 0.0,
|
||||||
|
Width: 800,
|
||||||
|
Height: 600,
|
||||||
|
Unit: "pixel",
|
||||||
|
Lines: []AzureLine{
|
||||||
|
{
|
||||||
|
Content: "Test line",
|
||||||
|
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
|
||||||
|
Spans: []AzureSpan{{Offset: 0, Length: 9}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spans: []AzureSpan{{Offset: 0, Length: 9}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Paragraphs: []AzureParagraph{
|
||||||
|
{
|
||||||
|
Content: "Test document content",
|
||||||
|
Spans: []AzureSpan{{Offset: 0, Length: 19}},
|
||||||
|
BoundingRegions: []AzureBoundingBox{
|
||||||
|
{
|
||||||
|
PageNumber: 1,
|
||||||
|
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ContentFormat: "text",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setupServer func() *httptest.Server
|
||||||
|
imageContent []byte
|
||||||
|
wantErr bool
|
||||||
|
errContains string
|
||||||
|
expectedText string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "successful processing",
|
||||||
|
setupServer: func() *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
server := httptest.NewServer(mux)
|
||||||
|
|
||||||
|
mux.HandleFunc("/documentintelligence/documentModels/prebuilt-read:analyze", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Operation-Location", fmt.Sprintf("%s/operations/123", server.URL))
|
||||||
|
w.WriteHeader(http.StatusAccepted)
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("/operations/123", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
json.NewEncoder(w).Encode(successResult)
|
||||||
|
})
|
||||||
|
|
||||||
|
return server
|
||||||
|
},
|
||||||
|
// Create minimal JPEG content with magic numbers
|
||||||
|
imageContent: append([]byte{0xFF, 0xD8, 0xFF, 0xE0}, []byte("JFIF test content")...),
|
||||||
|
expectedText: "Test document content",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid mime type",
|
||||||
|
setupServer: func() *httptest.Server {
|
||||||
|
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
t.Log("Server should not be called with invalid mime type")
|
||||||
|
w.WriteHeader(http.StatusBadRequest)
|
||||||
|
}))
|
||||||
|
},
|
||||||
|
imageContent: []byte("invalid content"),
|
||||||
|
wantErr: true,
|
||||||
|
errContains: "unsupported file type",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "submission error",
|
||||||
|
setupServer: func() *httptest.Server {
|
||||||
|
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusBadRequest)
|
||||||
|
fmt.Fprintln(w, "Invalid request")
|
||||||
|
}))
|
||||||
|
},
|
||||||
|
imageContent: []byte{0xFF, 0xD8, 0xFF, 0xE0}, // JPEG magic numbers
|
||||||
|
wantErr: true,
|
||||||
|
errContains: "unexpected status code 400",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
server := tt.setupServer()
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
client := retryablehttp.NewClient()
|
||||||
|
client.HTTPClient = server.Client()
|
||||||
|
client.Logger = log
|
||||||
|
|
||||||
|
provider := &AzureProvider{
|
||||||
|
endpoint: server.URL,
|
||||||
|
apiKey: "test-key",
|
||||||
|
modelID: defaultModelID,
|
||||||
|
timeout: 5 * time.Second,
|
||||||
|
httpClient: client,
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := provider.ProcessImage(context.Background(), tt.imageContent)
|
||||||
|
if tt.wantErr {
|
||||||
|
assert.Error(t, err)
|
||||||
|
if tt.errContains != "" {
|
||||||
|
assert.Contains(t, err.Error(), tt.errContains)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, result)
|
||||||
|
assert.Equal(t, tt.expectedText, result.Text)
|
||||||
|
assert.Equal(t, "azure_docai", result.Metadata["provider"])
|
||||||
|
assert.Equal(t, apiVersion, result.Metadata["api_version"])
|
||||||
|
assert.Equal(t, "1", result.Metadata["page_count"])
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
72
ocr/azure_types.go
Normal file
72
ocr/azure_types.go
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
// AzureDocumentResult represents the root response from Azure Document Intelligence
|
||||||
|
type AzureDocumentResult struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
CreatedDateTime time.Time `json:"createdDateTime"`
|
||||||
|
LastUpdatedDateTime time.Time `json:"lastUpdatedDateTime"`
|
||||||
|
AnalyzeResult AzureAnalyzeResult `json:"analyzeResult"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureAnalyzeResult represents the analyze result part of the Azure Document Intelligence response
|
||||||
|
type AzureAnalyzeResult struct {
|
||||||
|
APIVersion string `json:"apiVersion"`
|
||||||
|
ModelID string `json:"modelId"`
|
||||||
|
StringIndexType string `json:"stringIndexType"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
Pages []AzurePage `json:"pages"`
|
||||||
|
Paragraphs []AzureParagraph `json:"paragraphs"`
|
||||||
|
Styles []interface{} `json:"styles"`
|
||||||
|
ContentFormat string `json:"contentFormat"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzurePage represents a single page in the document
|
||||||
|
type AzurePage struct {
|
||||||
|
PageNumber int `json:"pageNumber"`
|
||||||
|
Angle float64 `json:"angle"`
|
||||||
|
Width int `json:"width"`
|
||||||
|
Height int `json:"height"`
|
||||||
|
Unit string `json:"unit"`
|
||||||
|
Words []AzureWord `json:"words"`
|
||||||
|
Lines []AzureLine `json:"lines"`
|
||||||
|
Spans []AzureSpan `json:"spans"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureWord represents a single word with its properties
|
||||||
|
type AzureWord struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
Polygon []int `json:"polygon"`
|
||||||
|
Confidence float64 `json:"confidence"`
|
||||||
|
Span AzureSpan `json:"span"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureLine represents a line of text
|
||||||
|
type AzureLine struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
Polygon []int `json:"polygon"`
|
||||||
|
Spans []AzureSpan `json:"spans"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureSpan represents a span of text with offset and length
|
||||||
|
type AzureSpan struct {
|
||||||
|
Offset int `json:"offset"`
|
||||||
|
Length int `json:"length"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureParagraph represents a paragraph of text
|
||||||
|
type AzureParagraph struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
Spans []AzureSpan `json:"spans"`
|
||||||
|
BoundingRegions []AzureBoundingBox `json:"boundingRegions"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureBoundingBox represents the location of content on a page
|
||||||
|
type AzureBoundingBox struct {
|
||||||
|
PageNumber int `json:"pageNumber"`
|
||||||
|
Polygon []int `json:"polygon"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureStyle represents style information for text segments - changed to interface{} as per input
|
||||||
|
type AzureStyle interface{}
|
232
ocr/google_docai_provider.go
Normal file
232
ocr/google_docai_provider.go
Normal file
|
@ -0,0 +1,232 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"html"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
documentai "cloud.google.com/go/documentai/apiv1"
|
||||||
|
"cloud.google.com/go/documentai/apiv1/documentaipb"
|
||||||
|
"github.com/gabriel-vasile/mimetype"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"google.golang.org/api/option"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GoogleDocAIProvider implements OCR using Google Document AI
|
||||||
|
type GoogleDocAIProvider struct {
|
||||||
|
projectID string
|
||||||
|
location string
|
||||||
|
processorID string
|
||||||
|
client *documentai.DocumentProcessorClient
|
||||||
|
}
|
||||||
|
|
||||||
|
func newGoogleDocAIProvider(config Config) (*GoogleDocAIProvider, error) {
|
||||||
|
logger := log.WithFields(logrus.Fields{
|
||||||
|
"location": config.GoogleLocation,
|
||||||
|
"processor_id": config.GoogleProcessorID,
|
||||||
|
})
|
||||||
|
logger.Info("Creating new Google Document AI provider")
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
endpoint := fmt.Sprintf("%s-documentai.googleapis.com:443", config.GoogleLocation)
|
||||||
|
|
||||||
|
client, err := documentai.NewDocumentProcessorClient(ctx, option.WithEndpoint(endpoint))
|
||||||
|
if err != nil {
|
||||||
|
logger.WithError(err).Error("Failed to create Document AI client")
|
||||||
|
return nil, fmt.Errorf("error creating Document AI client: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
provider := &GoogleDocAIProvider{
|
||||||
|
projectID: config.GoogleProjectID,
|
||||||
|
location: config.GoogleLocation,
|
||||||
|
processorID: config.GoogleProcessorID,
|
||||||
|
client: client,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info("Successfully initialized Google Document AI provider")
|
||||||
|
return provider, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
|
||||||
|
logger := log.WithFields(logrus.Fields{
|
||||||
|
"project_id": p.projectID,
|
||||||
|
"location": p.location,
|
||||||
|
"processor_id": p.processorID,
|
||||||
|
})
|
||||||
|
logger.Debug("Starting Document AI processing")
|
||||||
|
|
||||||
|
// Detect MIME type
|
||||||
|
mtype := mimetype.Detect(imageContent)
|
||||||
|
logger.WithField("mime_type", mtype.String()).Debug("Detected file type")
|
||||||
|
|
||||||
|
if !isImageMIMEType(mtype.String()) {
|
||||||
|
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
|
||||||
|
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
name := fmt.Sprintf("projects/%s/locations/%s/processors/%s", p.projectID, p.location, p.processorID)
|
||||||
|
|
||||||
|
req := &documentaipb.ProcessRequest{
|
||||||
|
Name: name,
|
||||||
|
Source: &documentaipb.ProcessRequest_RawDocument{
|
||||||
|
RawDocument: &documentaipb.RawDocument{
|
||||||
|
Content: imageContent,
|
||||||
|
MimeType: mtype.String(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Debug("Sending request to Document AI")
|
||||||
|
resp, err := p.client.ProcessDocument(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
logger.WithError(err).Error("Failed to process document")
|
||||||
|
return nil, fmt.Errorf("error processing document: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp == nil || resp.Document == nil {
|
||||||
|
logger.Error("Received nil response or document from Document AI")
|
||||||
|
return nil, fmt.Errorf("received nil response or document from Document AI")
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.Document.Error != nil {
|
||||||
|
logger.WithField("error", resp.Document.Error.Message).Error("Document processing error")
|
||||||
|
return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata := map[string]string{
|
||||||
|
"provider": "google_docai",
|
||||||
|
"mime_type": mtype.String(),
|
||||||
|
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
|
||||||
|
"processor_id": p.processorID,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safely add language code if available
|
||||||
|
if pages := resp.Document.GetPages(); len(pages) > 0 {
|
||||||
|
if langs := pages[0].GetDetectedLanguages(); len(langs) > 0 {
|
||||||
|
metadata["lang_code"] = langs[0].GetLanguageCode()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &OCRResult{
|
||||||
|
Text: resp.Document.Text,
|
||||||
|
Metadata: metadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add hOCR output if available
|
||||||
|
if len(resp.Document.GetPages()) > 0 {
|
||||||
|
var hocr string
|
||||||
|
func() {
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
logger.WithField("error", r).Error("Panic during hOCR generation")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
hocr = generateHOCR(resp.Document)
|
||||||
|
}()
|
||||||
|
if hocr != "" {
|
||||||
|
result.HOCR = hocr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.WithField("content_length", len(result.Text)).Info("Successfully processed document")
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isImageMIMEType checks if the given MIME type is a supported image type
|
||||||
|
func isImageMIMEType(mimeType string) bool {
|
||||||
|
supportedTypes := map[string]bool{
|
||||||
|
"image/jpeg": true,
|
||||||
|
"image/jpg": true,
|
||||||
|
"image/png": true,
|
||||||
|
"image/tiff": true,
|
||||||
|
"image/bmp": true,
|
||||||
|
"application/pdf": true,
|
||||||
|
}
|
||||||
|
return supportedTypes[mimeType]
|
||||||
|
}
|
||||||
|
|
||||||
|
// generateHOCR converts Document AI response to hOCR format
|
||||||
|
func generateHOCR(doc *documentaipb.Document) string {
|
||||||
|
if len(doc.GetPages()) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var hocr strings.Builder
|
||||||
|
hocr.WriteString(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||||
|
<head>
|
||||||
|
<title>OCR Output</title>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
||||||
|
<meta name='ocr-system' content='google-docai' />
|
||||||
|
</head>
|
||||||
|
<body>`)
|
||||||
|
|
||||||
|
for pageNum, page := range doc.GetPages() {
|
||||||
|
pageWidth := page.GetDimension().GetWidth()
|
||||||
|
pageHeight := page.GetDimension().GetHeight()
|
||||||
|
// Validate dimensions
|
||||||
|
if pageWidth <= 0 || pageHeight <= 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
hocr.WriteString(fmt.Sprintf(`
|
||||||
|
<div class='ocr_page' id='page_%d' title='image;bbox 0 0 %d %d'>`,
|
||||||
|
pageNum+1, int(pageWidth), int(pageHeight)))
|
||||||
|
|
||||||
|
// Process paragraphs
|
||||||
|
for _, para := range page.GetParagraphs() {
|
||||||
|
paraBox := para.GetLayout().GetBoundingPoly().GetNormalizedVertices()
|
||||||
|
if len(paraBox) < 4 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert normalized coordinates to absolute
|
||||||
|
// Use float64 for intermediate calculations to prevent overflow
|
||||||
|
x1 := int(float64(paraBox[0].GetX()) * float64(pageWidth))
|
||||||
|
y1 := int(float64(paraBox[0].GetY()) * float64(pageHeight))
|
||||||
|
x2 := int(float64(paraBox[2].GetX()) * float64(pageWidth))
|
||||||
|
y2 := int(float64(paraBox[2].GetY()) * float64(pageHeight))
|
||||||
|
|
||||||
|
// Validate coordinates
|
||||||
|
if x1 < 0 || y1 < 0 || x2 < 0 || y2 < 0 ||
|
||||||
|
x1 > int(pageWidth) || y1 > int(pageHeight) ||
|
||||||
|
x2 > int(pageWidth) || y2 > int(pageHeight) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
hocr.WriteString(fmt.Sprintf(`
|
||||||
|
<p class='ocr_par' id='par_%d_%d' title='bbox %d %d %d %d'>`,
|
||||||
|
pageNum+1, len(page.GetParagraphs()), x1, y1, x2, y2))
|
||||||
|
|
||||||
|
// Process words within paragraph
|
||||||
|
for _, token := range para.GetLayout().GetTextAnchor().GetTextSegments() {
|
||||||
|
text := doc.Text[token.GetStartIndex():token.GetEndIndex()]
|
||||||
|
if text == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Escape HTML special characters
|
||||||
|
text = html.EscapeString(text)
|
||||||
|
|
||||||
|
hocr.WriteString(fmt.Sprintf(`
|
||||||
|
<span class='ocrx_word'>%s</span>`, text))
|
||||||
|
}
|
||||||
|
|
||||||
|
hocr.WriteString("\n </p>")
|
||||||
|
}
|
||||||
|
hocr.WriteString("\n </div>")
|
||||||
|
}
|
||||||
|
|
||||||
|
hocr.WriteString("\n</body>\n</html>")
|
||||||
|
return hocr.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close releases resources used by the provider
|
||||||
|
func (p *GoogleDocAIProvider) Close() error {
|
||||||
|
if p.client != nil {
|
||||||
|
return p.client.Close()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
94
ocr/google_docai_provider_test.go
Normal file
94
ocr/google_docai_provider_test.go
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"cloud.google.com/go/documentai/apiv1/documentaipb"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGenerateHOCR(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
doc *documentaipb.Document
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty document",
|
||||||
|
doc: &documentaipb.Document{},
|
||||||
|
expected: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "single page with one paragraph",
|
||||||
|
doc: &documentaipb.Document{
|
||||||
|
Text: "Hello World",
|
||||||
|
Pages: []*documentaipb.Document_Page{
|
||||||
|
{
|
||||||
|
Dimension: &documentaipb.Document_Page_Dimension{
|
||||||
|
Width: 800,
|
||||||
|
Height: 600,
|
||||||
|
},
|
||||||
|
Paragraphs: []*documentaipb.Document_Page_Paragraph{
|
||||||
|
{
|
||||||
|
Layout: &documentaipb.Document_Page_Layout{
|
||||||
|
BoundingPoly: &documentaipb.BoundingPoly{
|
||||||
|
NormalizedVertices: []*documentaipb.NormalizedVertex{
|
||||||
|
{X: 0.1, Y: 0.1},
|
||||||
|
{X: 0.9, Y: 0.1},
|
||||||
|
{X: 0.9, Y: 0.2},
|
||||||
|
{X: 0.1, Y: 0.2},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
TextAnchor: &documentaipb.Document_TextAnchor{
|
||||||
|
TextSegments: []*documentaipb.Document_TextAnchor_TextSegment{
|
||||||
|
{
|
||||||
|
StartIndex: 0,
|
||||||
|
EndIndex: 11,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: "(?s).*<div class='ocr_page' id='page_1' title='image;bbox 0 0 800 600'>.*" +
|
||||||
|
"<p class='ocr_par' id='par_1_1' title='bbox 80 60 719 120'>.*" +
|
||||||
|
"<span class='ocrx_word'>Hello World</span>.*</p>.*</div>.*",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := generateHOCR(tt.doc)
|
||||||
|
|
||||||
|
if tt.expected == "" {
|
||||||
|
if result != "" {
|
||||||
|
t.Errorf("expected empty string, got %v", result)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
matched, err := regexp.MatchString(tt.expected, result)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error matching regex: %v", err)
|
||||||
|
}
|
||||||
|
if !matched {
|
||||||
|
t.Errorf("expected to match regex %v\ngot: %v", tt.expected, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify basic hOCR structure
|
||||||
|
if !strings.Contains(result, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>") {
|
||||||
|
t.Error("missing XML declaration")
|
||||||
|
}
|
||||||
|
if !strings.Contains(result, "<html xmlns=\"http://www.w3.org/1999/xhtml\"") {
|
||||||
|
t.Error("missing HTML namespace")
|
||||||
|
}
|
||||||
|
if !strings.Contains(result, "<meta name='ocr-system' content='google-docai'") {
|
||||||
|
t.Error("missing OCR system metadata")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
147
ocr/llm_provider.go
Normal file
147
ocr/llm_provider.go
Normal file
|
@ -0,0 +1,147 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
|
"fmt"
|
||||||
|
"image"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
_ "image/jpeg"
|
||||||
|
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"github.com/tmc/langchaingo/llms"
|
||||||
|
"github.com/tmc/langchaingo/llms/ollama"
|
||||||
|
"github.com/tmc/langchaingo/llms/openai"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LLMProvider implements OCR using LLM vision models
|
||||||
|
type LLMProvider struct {
|
||||||
|
provider string
|
||||||
|
model string
|
||||||
|
llm llms.Model
|
||||||
|
template string // OCR prompt template
|
||||||
|
}
|
||||||
|
|
||||||
|
func newLLMProvider(config Config) (*LLMProvider, error) {
|
||||||
|
logger := log.WithFields(logrus.Fields{
|
||||||
|
"provider": config.VisionLLMProvider,
|
||||||
|
"model": config.VisionLLMModel,
|
||||||
|
})
|
||||||
|
logger.Info("Creating new LLM OCR provider")
|
||||||
|
|
||||||
|
var model llms.Model
|
||||||
|
var err error
|
||||||
|
|
||||||
|
switch strings.ToLower(config.VisionLLMProvider) {
|
||||||
|
case "openai":
|
||||||
|
logger.Debug("Initializing OpenAI vision model")
|
||||||
|
model, err = createOpenAIClient(config)
|
||||||
|
case "ollama":
|
||||||
|
logger.Debug("Initializing Ollama vision model")
|
||||||
|
model, err = createOllamaClient(config)
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unsupported vision LLM provider: %s", config.VisionLLMProvider)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.WithError(err).Error("Failed to create vision LLM client")
|
||||||
|
return nil, fmt.Errorf("error creating vision LLM client: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info("Successfully initialized LLM OCR provider")
|
||||||
|
return &LLMProvider{
|
||||||
|
provider: config.VisionLLMProvider,
|
||||||
|
model: config.VisionLLMModel,
|
||||||
|
llm: model,
|
||||||
|
template: defaultOCRPrompt,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
|
||||||
|
logger := log.WithFields(logrus.Fields{
|
||||||
|
"provider": p.provider,
|
||||||
|
"model": p.model,
|
||||||
|
})
|
||||||
|
logger.Debug("Starting OCR processing")
|
||||||
|
|
||||||
|
// Log the image dimensions
|
||||||
|
img, _, err := image.Decode(bytes.NewReader(imageContent))
|
||||||
|
if err != nil {
|
||||||
|
logger.WithError(err).Error("Failed to decode image")
|
||||||
|
return nil, fmt.Errorf("error decoding image: %w", err)
|
||||||
|
}
|
||||||
|
bounds := img.Bounds()
|
||||||
|
logger.WithFields(logrus.Fields{
|
||||||
|
"width": bounds.Dx(),
|
||||||
|
"height": bounds.Dy(),
|
||||||
|
}).Debug("Image dimensions")
|
||||||
|
|
||||||
|
// Prepare content parts based on provider type
|
||||||
|
var parts []llms.ContentPart
|
||||||
|
if strings.ToLower(p.provider) != "openai" {
|
||||||
|
logger.Debug("Using binary image format for non-OpenAI provider")
|
||||||
|
parts = []llms.ContentPart{
|
||||||
|
llms.BinaryPart("image/jpeg", imageContent),
|
||||||
|
llms.TextPart(p.template),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
logger.Debug("Using base64 image format for OpenAI provider")
|
||||||
|
base64Image := base64.StdEncoding.EncodeToString(imageContent)
|
||||||
|
parts = []llms.ContentPart{
|
||||||
|
llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)),
|
||||||
|
llms.TextPart(p.template),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the image to text
|
||||||
|
logger.Debug("Sending request to vision model")
|
||||||
|
completion, err := p.llm.GenerateContent(ctx, []llms.MessageContent{
|
||||||
|
{
|
||||||
|
Parts: parts,
|
||||||
|
Role: llms.ChatMessageTypeHuman,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
logger.WithError(err).Error("Failed to get response from vision model")
|
||||||
|
return nil, fmt.Errorf("error getting response from LLM: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &OCRResult{
|
||||||
|
Text: completion.Choices[0].Content,
|
||||||
|
Metadata: map[string]string{
|
||||||
|
"provider": p.provider,
|
||||||
|
"model": p.model,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
logger.WithField("content_length", len(result.Text)).Info("Successfully processed image")
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// createOpenAIClient creates a new OpenAI vision model client
|
||||||
|
func createOpenAIClient(config Config) (llms.Model, error) {
|
||||||
|
apiKey := os.Getenv("OPENAI_API_KEY")
|
||||||
|
if apiKey == "" {
|
||||||
|
return nil, fmt.Errorf("OpenAI API key is not set")
|
||||||
|
}
|
||||||
|
return openai.New(
|
||||||
|
openai.WithModel(config.VisionLLMModel),
|
||||||
|
openai.WithToken(apiKey),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// createOllamaClient creates a new Ollama vision model client
|
||||||
|
func createOllamaClient(config Config) (llms.Model, error) {
|
||||||
|
host := os.Getenv("OLLAMA_HOST")
|
||||||
|
if host == "" {
|
||||||
|
host = "http://127.0.0.1:11434"
|
||||||
|
}
|
||||||
|
return ollama.New(
|
||||||
|
ollama.WithModel(config.VisionLLMModel),
|
||||||
|
ollama.WithServerURL(host),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaultOCRPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format but without a code block.`
|
92
ocr/provider.go
Normal file
92
ocr/provider.go
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
var log = logrus.New()
|
||||||
|
|
||||||
|
// OCRResult holds the output from OCR processing
|
||||||
|
type OCRResult struct {
|
||||||
|
// Plain text output (required)
|
||||||
|
Text string
|
||||||
|
|
||||||
|
// hOCR output (optional, if provider supports it)
|
||||||
|
HOCR string
|
||||||
|
|
||||||
|
// Additional provider-specific metadata
|
||||||
|
Metadata map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Provider defines the interface for OCR processing
|
||||||
|
type Provider interface {
|
||||||
|
ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Config holds the OCR provider configuration
|
||||||
|
type Config struct {
|
||||||
|
// Provider type (e.g., "llm", "google_docai", "azure")
|
||||||
|
Provider string
|
||||||
|
|
||||||
|
// Google Document AI settings
|
||||||
|
GoogleProjectID string
|
||||||
|
GoogleLocation string
|
||||||
|
GoogleProcessorID string
|
||||||
|
|
||||||
|
// LLM settings (from existing config)
|
||||||
|
VisionLLMProvider string
|
||||||
|
VisionLLMModel string
|
||||||
|
|
||||||
|
// Azure Document Intelligence settings
|
||||||
|
AzureEndpoint string
|
||||||
|
AzureAPIKey string
|
||||||
|
AzureModelID string // Optional, defaults to "prebuilt-read"
|
||||||
|
AzureTimeout int // Optional, defaults to 120 seconds
|
||||||
|
|
||||||
|
// OCR output options
|
||||||
|
EnableHOCR bool // Whether to request hOCR output if supported by the provider
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewProvider creates a new OCR provider based on configuration
|
||||||
|
func NewProvider(config Config) (Provider, error) {
|
||||||
|
log.Info("Initializing OCR provider: ", config.Provider)
|
||||||
|
|
||||||
|
switch config.Provider {
|
||||||
|
case "google_docai":
|
||||||
|
if config.GoogleProjectID == "" || config.GoogleLocation == "" || config.GoogleProcessorID == "" {
|
||||||
|
return nil, fmt.Errorf("missing required Google Document AI configuration")
|
||||||
|
}
|
||||||
|
log.WithFields(logrus.Fields{
|
||||||
|
"location": config.GoogleLocation,
|
||||||
|
"processor_id": config.GoogleProcessorID,
|
||||||
|
}).Info("Using Google Document AI provider")
|
||||||
|
return newGoogleDocAIProvider(config)
|
||||||
|
|
||||||
|
case "llm":
|
||||||
|
if config.VisionLLMProvider == "" || config.VisionLLMModel == "" {
|
||||||
|
return nil, fmt.Errorf("missing required LLM configuration")
|
||||||
|
}
|
||||||
|
log.WithFields(logrus.Fields{
|
||||||
|
"provider": config.VisionLLMProvider,
|
||||||
|
"model": config.VisionLLMModel,
|
||||||
|
}).Info("Using LLM OCR provider")
|
||||||
|
return newLLMProvider(config)
|
||||||
|
|
||||||
|
case "azure":
|
||||||
|
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
|
||||||
|
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
|
||||||
|
}
|
||||||
|
return newAzureProvider(config)
|
||||||
|
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetLogLevel sets the logging level for the OCR package
|
||||||
|
func SetLogLevel(level logrus.Level) {
|
||||||
|
log.SetLevel(level)
|
||||||
|
}
|
127
paperless.go
127
paperless.go
|
@ -3,11 +3,13 @@ package main
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"crypto/tls"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"image/jpeg"
|
"image/jpeg"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"slices"
|
"slices"
|
||||||
|
@ -16,6 +18,7 @@ import (
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/gen2brain/go-fitz"
|
"github.com/gen2brain/go-fitz"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
)
|
)
|
||||||
|
@ -58,10 +61,18 @@ func hasSameTags(original, suggested []string) bool {
|
||||||
func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient {
|
func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient {
|
||||||
cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR")
|
cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR")
|
||||||
|
|
||||||
|
// Create a custom HTTP transport with TLS configuration
|
||||||
|
tr := &http.Transport{
|
||||||
|
TLSClientConfig: &tls.Config{
|
||||||
|
InsecureSkipVerify: paperlessInsecureSkipVerify,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
httpClient := &http.Client{Transport: tr}
|
||||||
|
|
||||||
return &PaperlessClient{
|
return &PaperlessClient{
|
||||||
BaseURL: strings.TrimRight(baseURL, "/"),
|
BaseURL: strings.TrimRight(baseURL, "/"),
|
||||||
APIToken: apiToken,
|
APIToken: apiToken,
|
||||||
HTTPClient: &http.Client{},
|
HTTPClient: httpClient,
|
||||||
CacheFolder: cacheFolder,
|
CacheFolder: cacheFolder,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -80,7 +91,52 @@ func (client *PaperlessClient) Do(ctx context.Context, method, path string, body
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
}
|
}
|
||||||
|
|
||||||
return client.HTTPClient.Do(req)
|
log.WithFields(logrus.Fields{
|
||||||
|
"method": method,
|
||||||
|
"url": url,
|
||||||
|
}).Debug("Making HTTP request")
|
||||||
|
|
||||||
|
resp, err := client.HTTPClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).WithFields(logrus.Fields{
|
||||||
|
"url": url,
|
||||||
|
"method": method,
|
||||||
|
"error": err,
|
||||||
|
}).Error("HTTP request failed")
|
||||||
|
return nil, fmt.Errorf("HTTP request failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if response is HTML instead of JSON for API endpoints
|
||||||
|
if strings.HasPrefix(path, "api/") {
|
||||||
|
contentType := resp.Header.Get("Content-Type")
|
||||||
|
if strings.Contains(contentType, "text/html") {
|
||||||
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
// Create a new response with the same body for the caller
|
||||||
|
resp = &http.Response{
|
||||||
|
Status: resp.Status,
|
||||||
|
StatusCode: resp.StatusCode,
|
||||||
|
Header: resp.Header,
|
||||||
|
Body: io.NopCloser(bytes.NewBuffer(bodyBytes)),
|
||||||
|
}
|
||||||
|
|
||||||
|
log.WithFields(logrus.Fields{
|
||||||
|
"url": url,
|
||||||
|
"method": method,
|
||||||
|
"content-type": contentType,
|
||||||
|
"status-code": resp.StatusCode,
|
||||||
|
"response": string(bodyBytes),
|
||||||
|
"base-url": client.BaseURL,
|
||||||
|
"request-path": path,
|
||||||
|
"full-headers": resp.Header,
|
||||||
|
}).Error("Received HTML response for API request")
|
||||||
|
|
||||||
|
return nil, fmt.Errorf("received HTML response instead of JSON (status: %d). This often indicates an SSL/TLS issue or invalid authentication. Check your PAPERLESS_URL, PAPERLESS_TOKEN and PAPERLESS_INSECURE_SKIP_VERIFY settings. Full response: %s", resp.StatusCode, string(bodyBytes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetAllTags retrieves all tags from the Paperless-NGX API
|
// GetAllTags retrieves all tags from the Paperless-NGX API
|
||||||
|
@ -120,10 +176,19 @@ func (client *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int,
|
||||||
// Extract relative path from the Next URL
|
// Extract relative path from the Next URL
|
||||||
if tagsResponse.Next != "" {
|
if tagsResponse.Next != "" {
|
||||||
nextURL := tagsResponse.Next
|
nextURL := tagsResponse.Next
|
||||||
if strings.HasPrefix(nextURL, client.BaseURL) {
|
if strings.HasPrefix(nextURL, "http") {
|
||||||
nextURL = strings.TrimPrefix(nextURL, client.BaseURL+"/")
|
// Extract just the path portion from the full URL
|
||||||
|
if parsedURL, err := url.Parse(nextURL); err == nil {
|
||||||
|
path = strings.TrimPrefix(parsedURL.Path, "/")
|
||||||
|
if parsedURL.RawQuery != "" {
|
||||||
|
path += "?" + parsedURL.RawQuery
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return nil, fmt.Errorf("failed to parse next URL: %v", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
path = strings.TrimPrefix(nextURL, "/")
|
||||||
}
|
}
|
||||||
path = nextURL
|
|
||||||
} else {
|
} else {
|
||||||
path = ""
|
path = ""
|
||||||
}
|
}
|
||||||
|
@ -143,19 +208,34 @@ func (client *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []st
|
||||||
|
|
||||||
resp, err := client.Do(ctx, "GET", path, nil)
|
resp, err := client.Do(ctx, "GET", path, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("HTTP request failed in GetDocumentsByTags: %w", err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// Read the response body
|
||||||
|
bodyBytes, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
log.WithFields(logrus.Fields{
|
||||||
return nil, fmt.Errorf("error searching documents: %d, %s", resp.StatusCode, string(bodyBytes))
|
"status_code": resp.StatusCode,
|
||||||
|
"path": path,
|
||||||
|
"response": string(bodyBytes),
|
||||||
|
"headers": resp.Header,
|
||||||
|
}).Error("Error response from server in GetDocumentsByTags")
|
||||||
|
return nil, fmt.Errorf("error searching documents: status=%d, body=%s", resp.StatusCode, string(bodyBytes))
|
||||||
}
|
}
|
||||||
|
|
||||||
var documentsResponse GetDocumentsApiResponse
|
var documentsResponse GetDocumentsApiResponse
|
||||||
err = json.NewDecoder(resp.Body).Decode(&documentsResponse)
|
err = json.Unmarshal(bodyBytes, &documentsResponse)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
log.WithFields(logrus.Fields{
|
||||||
|
"response_body": string(bodyBytes),
|
||||||
|
"error": err,
|
||||||
|
}).Error("Failed to parse JSON response in GetDocumentsByTags")
|
||||||
|
return nil, fmt.Errorf("failed to parse JSON response: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
allTags, err := client.GetAllTags(ctx)
|
allTags, err := client.GetAllTags(ctx)
|
||||||
|
@ -365,12 +445,12 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
|
||||||
updatedFields["correspondent"] = correspondentID
|
updatedFields["correspondent"] = correspondentID
|
||||||
} else {
|
} else {
|
||||||
newCorrespondent := instantiateCorrespondent(document.SuggestedCorrespondent)
|
newCorrespondent := instantiateCorrespondent(document.SuggestedCorrespondent)
|
||||||
newCorrespondentID, err := client.CreateCorrespondent(context.Background(), newCorrespondent)
|
newCorrespondentID, err := client.CreateOrGetCorrespondent(context.Background(), newCorrespondent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error creating correspondent with name %s: %v\n", document.SuggestedCorrespondent, err)
|
log.Errorf("Error creating/getting correspondent with name %s: %v\n", document.SuggestedCorrespondent, err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
log.Infof("Created correspondent with name %s and ID %d\n", document.SuggestedCorrespondent, newCorrespondentID)
|
log.Infof("Using correspondent with name %s and ID %d\n", document.SuggestedCorrespondent, newCorrespondentID)
|
||||||
updatedFields["correspondent"] = newCorrespondentID
|
updatedFields["correspondent"] = newCorrespondentID
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -612,17 +692,27 @@ func instantiateCorrespondent(name string) Correspondent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreateCorrespondent creates a new correspondent in Paperless-NGX
|
// CreateOrGetCorrespondent creates a new correspondent or returns existing one if name already exists
|
||||||
func (client *PaperlessClient) CreateCorrespondent(ctx context.Context, correspondent Correspondent) (int, error) {
|
func (client *PaperlessClient) CreateOrGetCorrespondent(ctx context.Context, correspondent Correspondent) (int, error) {
|
||||||
url := "api/correspondents/"
|
// First try to find existing correspondent
|
||||||
|
correspondents, err := client.GetAllCorrespondents(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("error fetching correspondents: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Marshal the correspondent data to JSON
|
// Check if correspondent already exists
|
||||||
|
if id, exists := correspondents[correspondent.Name]; exists {
|
||||||
|
log.Infof("Using existing correspondent with name %s and ID %d", correspondent.Name, id)
|
||||||
|
return id, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If not found, create new correspondent
|
||||||
|
url := "api/correspondents/"
|
||||||
jsonData, err := json.Marshal(correspondent)
|
jsonData, err := json.Marshal(correspondent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send the POST request
|
|
||||||
resp, err := client.Do(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
resp, err := client.Do(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
|
@ -634,7 +724,6 @@ func (client *PaperlessClient) CreateCorrespondent(ctx context.Context, correspo
|
||||||
return 0, fmt.Errorf("error creating correspondent: %d, %s", resp.StatusCode, string(bodyBytes))
|
return 0, fmt.Errorf("error creating correspondent: %d, %s", resp.StatusCode, string(bodyBytes))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode the response body to get the ID of the created correspondent
|
|
||||||
var createdCorrespondent struct {
|
var createdCorrespondent struct {
|
||||||
ID int `json:"id"`
|
ID int `json:"id"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,7 +9,6 @@ import (
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
@ -180,31 +179,7 @@ func TestGetDocumentsByTags(t *testing.T) {
|
||||||
|
|
||||||
// Mock data for documents
|
// Mock data for documents
|
||||||
documentsResponse := GetDocumentsApiResponse{
|
documentsResponse := GetDocumentsApiResponse{
|
||||||
Results: []struct {
|
Results: []GetDocumentApiResponseResult{
|
||||||
ID int `json:"id"`
|
|
||||||
Correspondent int `json:"correspondent"`
|
|
||||||
DocumentType interface{} `json:"document_type"`
|
|
||||||
StoragePath interface{} `json:"storage_path"`
|
|
||||||
Title string `json:"title"`
|
|
||||||
Content string `json:"content"`
|
|
||||||
Tags []int `json:"tags"`
|
|
||||||
Created time.Time `json:"created"`
|
|
||||||
CreatedDate string `json:"created_date"`
|
|
||||||
Modified time.Time `json:"modified"`
|
|
||||||
Added time.Time `json:"added"`
|
|
||||||
ArchiveSerialNumber interface{} `json:"archive_serial_number"`
|
|
||||||
OriginalFileName string `json:"original_file_name"`
|
|
||||||
ArchivedFileName string `json:"archived_file_name"`
|
|
||||||
Owner int `json:"owner"`
|
|
||||||
UserCanChange bool `json:"user_can_change"`
|
|
||||||
Notes []interface{} `json:"notes"`
|
|
||||||
SearchHit struct {
|
|
||||||
Score float64 `json:"score"`
|
|
||||||
Highlights string `json:"highlights"`
|
|
||||||
NoteHighlights string `json:"note_highlights"`
|
|
||||||
Rank int `json:"rank"`
|
|
||||||
} `json:"__search_hit__"`
|
|
||||||
}{
|
|
||||||
{
|
{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
Title: "Document 1",
|
Title: "Document 1",
|
||||||
|
|
104
types.go
104
types.go
|
@ -1,59 +1,63 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
// GetDocumentsApiResponse is the response payload for /documents endpoint.
|
||||||
"time"
|
// But we are only interested in a subset of the fields.
|
||||||
)
|
|
||||||
|
|
||||||
type GetDocumentsApiResponse struct {
|
type GetDocumentsApiResponse struct {
|
||||||
Count int `json:"count"`
|
Count int `json:"count"`
|
||||||
Next interface{} `json:"next"`
|
// Next interface{} `json:"next"`
|
||||||
Previous interface{} `json:"previous"`
|
// Previous interface{} `json:"previous"`
|
||||||
All []int `json:"all"`
|
All []int `json:"all"`
|
||||||
Results []struct {
|
Results []GetDocumentApiResponseResult `json:"results"`
|
||||||
ID int `json:"id"`
|
|
||||||
Correspondent int `json:"correspondent"`
|
|
||||||
DocumentType interface{} `json:"document_type"`
|
|
||||||
StoragePath interface{} `json:"storage_path"`
|
|
||||||
Title string `json:"title"`
|
|
||||||
Content string `json:"content"`
|
|
||||||
Tags []int `json:"tags"`
|
|
||||||
Created time.Time `json:"created"`
|
|
||||||
CreatedDate string `json:"created_date"`
|
|
||||||
Modified time.Time `json:"modified"`
|
|
||||||
Added time.Time `json:"added"`
|
|
||||||
ArchiveSerialNumber interface{} `json:"archive_serial_number"`
|
|
||||||
OriginalFileName string `json:"original_file_name"`
|
|
||||||
ArchivedFileName string `json:"archived_file_name"`
|
|
||||||
Owner int `json:"owner"`
|
|
||||||
UserCanChange bool `json:"user_can_change"`
|
|
||||||
Notes []interface{} `json:"notes"`
|
|
||||||
SearchHit struct {
|
|
||||||
Score float64 `json:"score"`
|
|
||||||
Highlights string `json:"highlights"`
|
|
||||||
NoteHighlights string `json:"note_highlights"`
|
|
||||||
Rank int `json:"rank"`
|
|
||||||
} `json:"__search_hit__"`
|
|
||||||
} `json:"results"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetDocumentApiResponseResult is a part of the response payload for /documents endpoint.
|
||||||
|
// But we are only interested in a subset of the fields.
|
||||||
|
type GetDocumentApiResponseResult struct {
|
||||||
|
ID int `json:"id"`
|
||||||
|
Correspondent int `json:"correspondent"`
|
||||||
|
// DocumentType interface{} `json:"document_type"`
|
||||||
|
// StoragePath interface{} `json:"storage_path"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
Tags []int `json:"tags"`
|
||||||
|
// Created time.Time `json:"created"`
|
||||||
|
// CreatedDate string `json:"created_date"`
|
||||||
|
// Modified time.Time `json:"modified"`
|
||||||
|
// Added time.Time `json:"added"`
|
||||||
|
// ArchiveSerialNumber interface{} `json:"archive_serial_number"`
|
||||||
|
// OriginalFileName string `json:"original_file_name"`
|
||||||
|
// ArchivedFileName string `json:"archived_file_name"`
|
||||||
|
// Owner int `json:"owner"`
|
||||||
|
// UserCanChange bool `json:"user_can_change"`
|
||||||
|
Notes []interface{} `json:"notes"`
|
||||||
|
// SearchHit struct {
|
||||||
|
// Score float64 `json:"score"`
|
||||||
|
// Highlights string `json:"highlights"`
|
||||||
|
// NoteHighlights string `json:"note_highlights"`
|
||||||
|
// Rank int `json:"rank"`
|
||||||
|
// } `json:"__search_hit__"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetDocumentApiResponse is the response payload for /documents/{id} endpoint.
|
||||||
|
// But we are only interested in a subset of the fields.
|
||||||
type GetDocumentApiResponse struct {
|
type GetDocumentApiResponse struct {
|
||||||
ID int `json:"id"`
|
ID int `json:"id"`
|
||||||
Correspondent int `json:"correspondent"`
|
Correspondent int `json:"correspondent"`
|
||||||
DocumentType interface{} `json:"document_type"`
|
// DocumentType interface{} `json:"document_type"`
|
||||||
StoragePath interface{} `json:"storage_path"`
|
// StoragePath interface{} `json:"storage_path"`
|
||||||
Title string `json:"title"`
|
Title string `json:"title"`
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
Tags []int `json:"tags"`
|
Tags []int `json:"tags"`
|
||||||
Created time.Time `json:"created"`
|
// Created time.Time `json:"created"`
|
||||||
CreatedDate string `json:"created_date"`
|
// CreatedDate string `json:"created_date"`
|
||||||
Modified time.Time `json:"modified"`
|
// Modified time.Time `json:"modified"`
|
||||||
Added time.Time `json:"added"`
|
// Added time.Time `json:"added"`
|
||||||
ArchiveSerialNumber interface{} `json:"archive_serial_number"`
|
// ArchiveSerialNumber interface{} `json:"archive_serial_number"`
|
||||||
OriginalFileName string `json:"original_file_name"`
|
// OriginalFileName string `json:"original_file_name"`
|
||||||
ArchivedFileName string `json:"archived_file_name"`
|
// ArchivedFileName string `json:"archived_file_name"`
|
||||||
Owner int `json:"owner"`
|
// Owner int `json:"owner"`
|
||||||
UserCanChange bool `json:"user_can_change"`
|
// UserCanChange bool `json:"user_can_change"`
|
||||||
Notes []interface{} `json:"notes"`
|
Notes []interface{} `json:"notes"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Document is a stripped down version of the document object from paperless-ngx.
|
// Document is a stripped down version of the document object from paperless-ngx.
|
||||||
|
|
0
web-app/dist/.keep
vendored
Normal file
0
web-app/dist/.keep
vendored
Normal file
678
web-app/package-lock.json
generated
678
web-app/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -43,7 +43,7 @@
|
||||||
"eslint": "^9.9.0",
|
"eslint": "^9.9.0",
|
||||||
"eslint-plugin-react-hooks": "^5.1.0-rc.0",
|
"eslint-plugin-react-hooks": "^5.1.0-rc.0",
|
||||||
"eslint-plugin-react-refresh": "^0.4.9",
|
"eslint-plugin-react-refresh": "^0.4.9",
|
||||||
"globals": "^15.9.0",
|
"globals": "^16.0.0",
|
||||||
"node-fetch": "^3.3.0",
|
"node-fetch": "^3.3.0",
|
||||||
"postcss": "^8.4.47",
|
"postcss": "^8.4.47",
|
||||||
"tailwindcss": "^3.4.12",
|
"tailwindcss": "^3.4.12",
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
{"root":["./src/app.tsx","./src/documentprocessor.tsx","./src/experimentalocr.tsx","./src/history.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/documentcard.tsx","./src/components/documentstoprocess.tsx","./src/components/nodocuments.tsx","./src/components/sidebar.tsx","./src/components/successmodal.tsx","./src/components/suggestioncard.tsx","./src/components/suggestionsreview.tsx","./src/components/undocard.tsx"],"version":"5.7.2"}
|
{"root":["./src/app.tsx","./src/documentprocessor.tsx","./src/experimentalocr.tsx","./src/history.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/documentcard.tsx","./src/components/documentstoprocess.tsx","./src/components/nodocuments.tsx","./src/components/sidebar.tsx","./src/components/successmodal.tsx","./src/components/suggestioncard.tsx","./src/components/suggestionsreview.tsx","./src/components/undocard.tsx"],"version":"5.7.3"}
|
|
@ -1 +1 @@
|
||||||
{"root":["./vite.config.ts"],"version":"5.7.2"}
|
{"root":["./vite.config.ts"],"version":"5.7.3"}
|
Loading…
Reference in a new issue