mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-14 05:38:01 -05:00
Compare commits
No commits in common. "main" and "v0.12.0" have entirely different histories.
19 changed files with 494 additions and 1814 deletions
2
.github/workflows/docker-build-and-push.yml
vendored
2
.github/workflows/docker-build-and-push.yml
vendored
|
@ -230,7 +230,7 @@ jobs:
|
||||||
- name: Setup Node.js
|
- name: Setup Node.js
|
||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: '22'
|
node-version: '20'
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: './web-app/package-lock.json'
|
cache-dependency-path: './web-app/package-lock.json'
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
|
|
14
Dockerfile
14
Dockerfile
|
@ -25,22 +25,22 @@ COPY web-app /app/
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Stage 2: Build the Go binary
|
# Stage 2: Build the Go binary
|
||||||
FROM golang:1.24.1-alpine3.21 AS builder
|
FROM golang:1.23.6-alpine3.21 AS builder
|
||||||
|
|
||||||
# Set the working directory inside the container
|
# Set the working directory inside the container
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Package versions for Renovate
|
# Package versions for Renovate
|
||||||
# renovate: datasource=repology depName=alpine_3_21/gcc versioning=loose
|
# renovate: datasource=repology depName=alpine_3_21/gcc versioning=loose
|
||||||
ENV GCC_VERSION="14.2.0-r4"
|
ENV GCC_VERSION=14.2.0-r4
|
||||||
# renovate: datasource=repology depName=alpine_3_21/musl-dev versioning=loose
|
# renovate: datasource=repology depName=alpine_3_21/musl-dev versioning=loose
|
||||||
ENV MUSL_DEV_VERSION="1.2.5-r9"
|
ENV MUSL_DEV_VERSION=1.2.5-r8
|
||||||
# renovate: datasource=repology depName=alpine_3_21/mupdf versioning=loose
|
# renovate: datasource=repology depName=alpine_3_21/mupdf versioning=loose
|
||||||
ENV MUPDF_VERSION="1.24.10-r0"
|
ENV MUPDF_VERSION=1.24.10-r0
|
||||||
# renovate: datasource=repology depName=alpine_3_21/mupdf-dev versioning=loose
|
# renovate: datasource=repology depName=alpine_3_21/mupdf-dev versioning=loose
|
||||||
ENV MUPDF_DEV_VERSION="1.24.10-r0"
|
ENV MUPDF_DEV_VERSION=1.24.10-r0
|
||||||
# renovate: datasource=repology depName=alpine_3_21/sed versioning=loose
|
# renovate: datasource=repology depName=alpine_3_21/sed versioning=loose
|
||||||
ENV SED_VERSION="4.9-r2"
|
ENV SED_VERSION=4.9-r2
|
||||||
|
|
||||||
# Install necessary packages with pinned versions
|
# Install necessary packages with pinned versions
|
||||||
RUN apk add --no-cache \
|
RUN apk add --no-cache \
|
||||||
|
@ -82,7 +82,7 @@ RUN sed -i \
|
||||||
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
|
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
|
||||||
|
|
||||||
# Stage 3: Create a lightweight image with just the binary
|
# Stage 3: Create a lightweight image with just the binary
|
||||||
FROM alpine:3.21.3
|
FROM alpine:latest
|
||||||
|
|
||||||
ENV GIN_MODE=release
|
ENV GIN_MODE=release
|
||||||
|
|
||||||
|
|
170
README.md
170
README.md
|
@ -22,7 +22,7 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
|
|
||||||
- **LLM OCR**: Use OpenAI or Ollama to extract text from images.
|
- **LLM OCR**: Use OpenAI or Ollama to extract text from images.
|
||||||
- **Google Document AI**: Leverage Google's powerful Document AI for OCR tasks.
|
- **Google Document AI**: Leverage Google's powerful Document AI for OCR tasks.
|
||||||
- **Azure Document Intelligence**: Use Microsoft's enterprise OCR solution.
|
- **More to come**: Stay tuned for more OCR providers!
|
||||||
|
|
||||||
3. **Automatic Title & Tag Generation**
|
3. **Automatic Title & Tag Generation**
|
||||||
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
|
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
|
||||||
|
@ -39,11 +39,11 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
|
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
|
||||||
|
|
||||||
7. **Simple Docker Deployment**
|
7. **Simple Docker Deployment**
|
||||||
A few environment variables, and you're off! Compose it alongside paperless-ngx with minimal fuss.
|
A few environment variables, and you’re off! Compose it alongside paperless-ngx with minimal fuss.
|
||||||
|
|
||||||
8. **Unified Web UI**
|
8. **Unified Web UI**
|
||||||
|
|
||||||
- **Manual Review**: Approve or tweak AI's suggestions.
|
- **Manual Review**: Approve or tweak AI’s suggestions.
|
||||||
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
|
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
@ -56,12 +56,6 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
- [Installation](#installation)
|
- [Installation](#installation)
|
||||||
- [Docker Compose](#docker-compose)
|
- [Docker Compose](#docker-compose)
|
||||||
- [Manual Setup](#manual-setup)
|
- [Manual Setup](#manual-setup)
|
||||||
- [OCR Providers](#ocr-providers)
|
|
||||||
- [LLM-based OCR](#1-llm-based-ocr-default)
|
|
||||||
- [Azure Document Intelligence](#2-azure-document-intelligence)
|
|
||||||
- [Google Document AI](#3-google-document-ai)
|
|
||||||
- [Comparing OCR Providers](#comparing-ocr-providers)
|
|
||||||
- [Choosing the Right Provider](#choosing-the-right-provider)
|
|
||||||
- [Configuration](#configuration)
|
- [Configuration](#configuration)
|
||||||
- [Environment Variables](#environment-variables)
|
- [Environment Variables](#environment-variables)
|
||||||
- [Custom Prompt Templates](#custom-prompt-templates)
|
- [Custom Prompt Templates](#custom-prompt-templates)
|
||||||
|
@ -92,7 +86,7 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
|
|
||||||
#### Docker Compose
|
#### Docker Compose
|
||||||
|
|
||||||
Here's an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
|
Here’s an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
services:
|
services:
|
||||||
|
@ -120,7 +114,7 @@ services:
|
||||||
# Option 1: LLM-based OCR
|
# Option 1: LLM-based OCR
|
||||||
OCR_PROVIDER: "llm" # Default OCR provider
|
OCR_PROVIDER: "llm" # Default OCR provider
|
||||||
VISION_LLM_PROVIDER: "ollama" # openai or ollama
|
VISION_LLM_PROVIDER: "ollama" # openai or ollama
|
||||||
VISION_LLM_MODEL: "minicpm-v" # minicpm-v (ollama) or gpt-4o (openai)
|
VISION_LLM_MODEL: "minicpm-v" # minicpm-v (ollama) or gpt-4v (openai)
|
||||||
OLLAMA_HOST: "http://host.docker.internal:11434" # If using Ollama
|
OLLAMA_HOST: "http://host.docker.internal:11434" # If using Ollama
|
||||||
|
|
||||||
# Option 2: Google Document AI
|
# Option 2: Google Document AI
|
||||||
|
@ -130,13 +124,6 @@ services:
|
||||||
# GOOGLE_PROCESSOR_ID: 'processor-id' # Your processor ID
|
# GOOGLE_PROCESSOR_ID: 'processor-id' # Your processor ID
|
||||||
# GOOGLE_APPLICATION_CREDENTIALS: '/app/credentials.json' # Path to service account key
|
# GOOGLE_APPLICATION_CREDENTIALS: '/app/credentials.json' # Path to service account key
|
||||||
|
|
||||||
# Option 3: Azure Document Intelligence
|
|
||||||
# OCR_PROVIDER: 'azure' # Use Azure Document Intelligence
|
|
||||||
# AZURE_DOCAI_ENDPOINT: 'your-endpoint' # Your Azure endpoint URL
|
|
||||||
# AZURE_DOCAI_KEY: 'your-key' # Your Azure API key
|
|
||||||
# AZURE_DOCAI_MODEL_ID: 'prebuilt-read' # Optional, defaults to prebuilt-read
|
|
||||||
# AZURE_DOCAI_TIMEOUT_SECONDS: '120' # Optional, defaults to 120 seconds
|
|
||||||
|
|
||||||
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto
|
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto
|
||||||
OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit.
|
OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit.
|
||||||
LOG_LEVEL: "info" # Optional: debug, warn, error
|
LOG_LEVEL: "info" # Optional: debug, warn, error
|
||||||
|
@ -185,63 +172,6 @@ services:
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
## OCR Providers
|
|
||||||
|
|
||||||
paperless-gpt supports three different OCR providers, each with unique strengths and capabilities:
|
|
||||||
|
|
||||||
### 1. LLM-based OCR (Default)
|
|
||||||
- **Key Features**:
|
|
||||||
- Uses vision-capable LLMs like gpt-4o or MiniCPM-V
|
|
||||||
- High accuracy with complex layouts and difficult scans
|
|
||||||
- Context-aware text recognition
|
|
||||||
- Self-correcting capabilities for OCR errors
|
|
||||||
- **Best For**:
|
|
||||||
- Complex or unusual document layouts
|
|
||||||
- Poor quality scans
|
|
||||||
- Documents with mixed languages
|
|
||||||
- **Configuration**:
|
|
||||||
```yaml
|
|
||||||
OCR_PROVIDER: "llm"
|
|
||||||
VISION_LLM_PROVIDER: "openai" # or "ollama"
|
|
||||||
VISION_LLM_MODEL: "gpt-4o" # or "minicpm-v"
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Azure Document Intelligence
|
|
||||||
- **Key Features**:
|
|
||||||
- Enterprise-grade OCR solution
|
|
||||||
- Prebuilt models for common document types
|
|
||||||
- Layout preservation and table detection
|
|
||||||
- Fast processing speeds
|
|
||||||
- **Best For**:
|
|
||||||
- Business documents and forms
|
|
||||||
- High-volume processing
|
|
||||||
- Documents requiring layout analysis
|
|
||||||
- **Configuration**:
|
|
||||||
```yaml
|
|
||||||
OCR_PROVIDER: "azure"
|
|
||||||
AZURE_DOCAI_ENDPOINT: "https://your-endpoint.cognitiveservices.azure.com/"
|
|
||||||
AZURE_DOCAI_KEY: "your-key"
|
|
||||||
AZURE_DOCAI_MODEL_ID: "prebuilt-read" # optional
|
|
||||||
AZURE_DOCAI_TIMEOUT_SECONDS: "120" # optional
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Google Document AI
|
|
||||||
- **Key Features**:
|
|
||||||
- Specialized document processors
|
|
||||||
- Strong form field detection
|
|
||||||
- Multi-language support
|
|
||||||
- High accuracy on structured documents
|
|
||||||
- **Best For**:
|
|
||||||
- Forms and structured documents
|
|
||||||
- Documents with tables
|
|
||||||
- Multi-language documents
|
|
||||||
- **Configuration**:
|
|
||||||
```yaml
|
|
||||||
OCR_PROVIDER: "google_docai"
|
|
||||||
GOOGLE_PROJECT_ID: "your-project"
|
|
||||||
GOOGLE_LOCATION: "us"
|
|
||||||
GOOGLE_PROCESSOR_ID: "processor-id"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
|
@ -249,43 +179,39 @@ paperless-gpt supports three different OCR providers, each with unique strengths
|
||||||
|
|
||||||
# **Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
|
# **Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
|
||||||
|
|
||||||
| Variable | Description | Required | Default |
|
| Variable | Description | Required |
|
||||||
| -------------------------------- | ---------------------------------------------------------------------------------------------------------------- | -------- | ---------------------- |
|
| -------------------------------- | ---------------------------------------------------------------------------------------------------------------- | -------- |
|
||||||
| `PAPERLESS_BASE_URL` | URL of your paperless-ngx instance (e.g. `http://paperless-ngx:8000`). | Yes | |
|
| `PAPERLESS_BASE_URL` | URL of your paperless-ngx instance (e.g. `http://paperless-ngx:8000`). | Yes |
|
||||||
| `PAPERLESS_API_TOKEN` | API token for paperless-ngx. Generate one in paperless-ngx admin. | Yes | |
|
| `PAPERLESS_API_TOKEN` | API token for paperless-ngx. Generate one in paperless-ngx admin. | Yes |
|
||||||
| `PAPERLESS_PUBLIC_URL` | Public URL for Paperless (if different from `PAPERLESS_BASE_URL`). | No | |
|
| `PAPERLESS_PUBLIC_URL` | Public URL for Paperless (if different from `PAPERLESS_BASE_URL`). | No |
|
||||||
| `MANUAL_TAG` | Tag for manual processing. | No | paperless-gpt |
|
| `MANUAL_TAG` | Tag for manual processing. Default: `paperless-gpt`. | No |
|
||||||
| `AUTO_TAG` | Tag for auto processing. | No | paperless-gpt-auto |
|
| `AUTO_TAG` | Tag for auto processing. Default: `paperless-gpt-auto`. | No |
|
||||||
| `LLM_PROVIDER` | AI backend (`openai` or `ollama`). | Yes | |
|
| `LLM_PROVIDER` | AI backend (`openai` or `ollama`). | Yes |
|
||||||
| `LLM_MODEL` | AI model name, e.g. `gpt-4o`, `gpt-3.5-turbo`, `deepseek-r1:8b`. | Yes | |
|
| `LLM_MODEL` | AI model name, e.g. `gpt-4o`, `gpt-3.5-turbo`, `deepseek-r1:8b`. | Yes |
|
||||||
| `OPENAI_API_KEY` | OpenAI API key (required if using OpenAI). | Cond. | |
|
| `OPENAI_API_KEY` | OpenAI API key (required if using OpenAI). | Cond. |
|
||||||
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No | |
|
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No |
|
||||||
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). | No | English |
|
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). Default: `English`. | No |
|
||||||
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No | |
|
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No |
|
||||||
| `OCR_PROVIDER` | OCR provider to use (`llm`, `azure`, or `google_docai`). | No | llm |
|
| `OCR_PROVIDER` | OCR provider to use (`llm` or `google_docai`). Default: `llm`. | No |
|
||||||
| `VISION_LLM_PROVIDER` | AI backend for LLM OCR (`openai` or `ollama`). Required if OCR_PROVIDER is `llm`. | Cond. | |
|
| `VISION_LLM_PROVIDER` | AI backend for LLM OCR (`openai` or `ollama`). Required if OCR_PROVIDER is `llm`. | Cond. |
|
||||||
| `VISION_LLM_MODEL` | Model name for LLM OCR (e.g. `minicpm-v`). Required if OCR_PROVIDER is `llm`. | Cond. | |
|
| `VISION_LLM_MODEL` | Model name for LLM OCR (e.g. `minicpm-v`). Required if OCR_PROVIDER is `llm`. | Cond. |
|
||||||
| `AZURE_DOCAI_ENDPOINT` | Azure Document Intelligence endpoint. Required if OCR_PROVIDER is `azure`. | Cond. | |
|
| `GOOGLE_PROJECT_ID` | Google Cloud project ID. Required if OCR_PROVIDER is `google_docai`. | Cond. |
|
||||||
| `AZURE_DOCAI_KEY` | Azure Document Intelligence API key. Required if OCR_PROVIDER is `azure`. | Cond. | |
|
| `GOOGLE_LOCATION` | Google Cloud region (e.g. `us`, `eu`). Required if OCR_PROVIDER is `google_docai`. | Cond. |
|
||||||
| `AZURE_DOCAI_MODEL_ID` | Azure Document Intelligence model ID. Optional if using `azure` provider. | No | prebuilt-read |
|
| `GOOGLE_PROCESSOR_ID` | Document AI processor ID. Required if OCR_PROVIDER is `google_docai`. | Cond. |
|
||||||
| `AZURE_DOCAI_TIMEOUT_SECONDS` | Azure Document Intelligence timeout in seconds. | No | 120 |
|
| `GOOGLE_APPLICATION_CREDENTIALS` | Path to the mounted Google service account key. Required if OCR_PROVIDER is `google_docai`. | Cond. |
|
||||||
| `GOOGLE_PROJECT_ID` | Google Cloud project ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
| `AUTO_OCR_TAG` | Tag for automatically processing docs with OCR. Default: `paperless-gpt-ocr-auto`. | No |
|
||||||
| `GOOGLE_LOCATION` | Google Cloud region (e.g. `us`, `eu`). Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
| `LOG_LEVEL` | Application log level (`info`, `debug`, `warn`, `error`). Default: `info`. | No |
|
||||||
| `GOOGLE_PROCESSOR_ID` | Document AI processor ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
| `LISTEN_INTERFACE` | Network interface to listen on. Default: `:8080`. | No |
|
||||||
| `GOOGLE_APPLICATION_CREDENTIALS` | Path to the mounted Google service account key. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
| `AUTO_GENERATE_TITLE` | Generate titles automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
||||||
| `AUTO_OCR_TAG` | Tag for automatically processing docs with OCR. | No | paperless-gpt-ocr-auto |
|
| `AUTO_GENERATE_TAGS` | Generate tags automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
||||||
| `LOG_LEVEL` | Application log level (`info`, `debug`, `warn`, `error`). | No | info |
|
| `AUTO_GENERATE_CORRESPONDENTS` | Generate correspondents automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
||||||
| `LISTEN_INTERFACE` | Network interface to listen on. | No | 8080 |
|
| `OCR_LIMIT_PAGES` | Limit the number of pages for OCR. Set to `0` for no limit. Default: `5`. | No |
|
||||||
| `AUTO_GENERATE_TITLE` | Generate titles automatically if `paperless-gpt-auto` is used. | No | true |
|
| `TOKEN_LIMIT` | Maximum tokens allowed for prompts/content. Set to `0` to disable limit. Useful for smaller LLMs. | No |
|
||||||
| `AUTO_GENERATE_TAGS` | Generate tags automatically if `paperless-gpt-auto` is used. | No | true |
|
| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`. | No |
|
||||||
| `AUTO_GENERATE_CORRESPONDENTS` | Generate correspondents automatically if `paperless-gpt-auto` is used. | No | true |
|
|
||||||
| `OCR_LIMIT_PAGES` | Limit the number of pages for OCR. Set to `0` for no limit. | No | 5 |
|
|
||||||
| `TOKEN_LIMIT` | Maximum tokens allowed for prompts/content. Set to `0` to disable limit. Useful for smaller LLMs. | No | |
|
|
||||||
| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`. | No | |
|
|
||||||
|
|
||||||
### Custom Prompt Templates
|
### Custom Prompt Templates
|
||||||
|
|
||||||
paperless-gpt's flexible **prompt templates** let you shape how AI responds:
|
paperless-gpt’s flexible **prompt templates** let you shape how AI responds:
|
||||||
|
|
||||||
1. **`title_prompt.tmpl`**: For document titles.
|
1. **`title_prompt.tmpl`**: For document titles.
|
||||||
2. **`tag_prompt.tmpl`**: For tagging logic.
|
2. **`tag_prompt.tmpl`**: For tagging logic.
|
||||||
|
@ -306,11 +232,13 @@ Then tweak at will—**paperless-gpt** reloads them automatically on startup!
|
||||||
Each template has access to specific variables:
|
Each template has access to specific variables:
|
||||||
|
|
||||||
**title_prompt.tmpl**:
|
**title_prompt.tmpl**:
|
||||||
|
|
||||||
- `{{.Language}}` - Target language (e.g., "English")
|
- `{{.Language}}` - Target language (e.g., "English")
|
||||||
- `{{.Content}}` - Document content text
|
- `{{.Content}}` - Document content text
|
||||||
- `{{.Title}}` - Original document title
|
- `{{.Title}}` - Original document title
|
||||||
|
|
||||||
**tag_prompt.tmpl**:
|
**tag_prompt.tmpl**:
|
||||||
|
|
||||||
- `{{.Language}}` - Target language
|
- `{{.Language}}` - Target language
|
||||||
- `{{.AvailableTags}}` - List of existing tags in paperless-ngx
|
- `{{.AvailableTags}}` - List of existing tags in paperless-ngx
|
||||||
- `{{.OriginalTags}}` - Document's current tags
|
- `{{.OriginalTags}}` - Document's current tags
|
||||||
|
@ -318,9 +246,11 @@ Each template has access to specific variables:
|
||||||
- `{{.Content}}` - Document content text
|
- `{{.Content}}` - Document content text
|
||||||
|
|
||||||
**ocr_prompt.tmpl**:
|
**ocr_prompt.tmpl**:
|
||||||
|
|
||||||
- `{{.Language}}` - Target language
|
- `{{.Language}}` - Target language
|
||||||
|
|
||||||
**correspondent_prompt.tmpl**:
|
**correspondent_prompt.tmpl**:
|
||||||
|
|
||||||
- `{{.Language}}` - Target language
|
- `{{.Language}}` - Target language
|
||||||
- `{{.AvailableCorrespondents}}` - List of existing correspondents
|
- `{{.AvailableCorrespondents}}` - List of existing correspondents
|
||||||
- `{{.BlackList}}` - List of blacklisted correspondent names
|
- `{{.BlackList}}` - List of blacklisted correspondent names
|
||||||
|
@ -335,25 +265,23 @@ The templates use Go's text/template syntax. paperless-gpt automatically reloads
|
||||||
|
|
||||||
1. **Tag Documents**
|
1. **Tag Documents**
|
||||||
|
|
||||||
- Add `paperless-gpt` tag to documents for manual processing
|
- Add `paperless-gpt` or your custom tag to the docs you want to AI-ify.
|
||||||
- Add `paperless-gpt-auto` for automatic processing
|
|
||||||
- Add `paperless-gpt-ocr-auto` for automatic OCR processing
|
|
||||||
|
|
||||||
2. **Visit Web UI**
|
2. **Visit Web UI**
|
||||||
|
|
||||||
- Go to `http://localhost:8080` (or your host) in your browser
|
- Go to `http://localhost:8080` (or your host) in your browser.
|
||||||
- Review documents tagged for processing
|
|
||||||
|
|
||||||
3. **Generate & Apply Suggestions**
|
3. **Generate & Apply Suggestions**
|
||||||
|
|
||||||
- Click "Generate Suggestions" to see AI-proposed titles/tags/correspondents
|
- Click “Generate Suggestions” to see AI-proposed titles/tags/correspondents.
|
||||||
- Review and approve or edit suggestions
|
- Approve, edit, or discard. Hit “Apply” to finalize in paperless-ngx.
|
||||||
- Click "Apply" to save changes to paperless-ngx
|
|
||||||
|
4. **Try LLM-Based OCR (Experimental)**
|
||||||
|
- If you enabled `VISION_LLM_PROVIDER` and `VISION_LLM_MODEL`, let AI-based OCR read your scanned PDFs.
|
||||||
|
- Tag those documents with `paperless-gpt-ocr-auto` (or your custom `AUTO_OCR_TAG`).
|
||||||
|
|
||||||
|
**Tip**: The entire pipeline can be **fully automated** if you prefer minimal manual intervention.
|
||||||
|
|
||||||
4. **OCR Processing**
|
|
||||||
- Tag documents with appropriate OCR tag to process them
|
|
||||||
- Monitor progress in the Web UI
|
|
||||||
- Review results and apply changes
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## LLM-Based OCR: Compare for Yourself
|
## LLM-Based OCR: Compare for Yourself
|
||||||
|
|
|
@ -92,88 +92,3 @@
|
||||||
- E2E tests for web interface
|
- E2E tests for web interface
|
||||||
- Test fixtures and mocks
|
- Test fixtures and mocks
|
||||||
- Playwright for frontend testing
|
- Playwright for frontend testing
|
||||||
|
|
||||||
## OCR System Patterns
|
|
||||||
|
|
||||||
### OCR Provider Architecture
|
|
||||||
|
|
||||||
#### 1. Provider Interface
|
|
||||||
- Common interface for all OCR implementations
|
|
||||||
- Methods for image processing
|
|
||||||
- Configuration through standardized Config struct
|
|
||||||
- Resource management patterns
|
|
||||||
|
|
||||||
#### 2. LLM Provider Implementation
|
|
||||||
- Supports OpenAI and Ollama vision models
|
|
||||||
- Base64 encoding for OpenAI requests
|
|
||||||
- Binary format for Ollama requests
|
|
||||||
- Template-based OCR prompts
|
|
||||||
|
|
||||||
#### 3. Google Document AI Provider
|
|
||||||
- Enterprise-grade OCR processing
|
|
||||||
- MIME type validation
|
|
||||||
- Processor configuration via environment
|
|
||||||
- Regional endpoint support
|
|
||||||
|
|
||||||
### Logging Patterns
|
|
||||||
|
|
||||||
#### 1. Provider Initialization
|
|
||||||
```
|
|
||||||
[INFO] Initializing OCR provider: llm
|
|
||||||
[INFO] Using LLM OCR provider (provider=ollama, model=minicpm-v)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 2. Processing Logs
|
|
||||||
```
|
|
||||||
[DEBUG] Starting OCR processing
|
|
||||||
[DEBUG] Image dimensions (width=800, height=1200)
|
|
||||||
[DEBUG] Using binary image format for non-OpenAI provider
|
|
||||||
[DEBUG] Sending request to vision model
|
|
||||||
[INFO] Successfully processed image (content_length=1536)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 3. Error Logging
|
|
||||||
```
|
|
||||||
[ERROR] Failed to decode image: invalid format
|
|
||||||
[ERROR] Unsupported file type: image/webp
|
|
||||||
[ERROR] Failed to get response from vision model
|
|
||||||
```
|
|
||||||
|
|
||||||
### Error Handling Patterns
|
|
||||||
|
|
||||||
#### 1. Configuration Validation
|
|
||||||
- Required parameter checks
|
|
||||||
- Environment variable validation
|
|
||||||
- Provider-specific configuration
|
|
||||||
- Connection testing
|
|
||||||
|
|
||||||
#### 2. Processing Errors
|
|
||||||
- Image format validation
|
|
||||||
- MIME type checking
|
|
||||||
- Content processing errors
|
|
||||||
- Provider-specific error handling
|
|
||||||
|
|
||||||
#### 3. Error Propagation
|
|
||||||
- Detailed error contexts
|
|
||||||
- Original error wrapping
|
|
||||||
- Logging with error context
|
|
||||||
- Recovery mechanisms
|
|
||||||
|
|
||||||
### Processing Flow
|
|
||||||
|
|
||||||
#### 1. Document Processing
|
|
||||||
```
|
|
||||||
Document Tagged → OCR Provider Selected → Image Processing → Text Extraction → Content Update
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 2. Provider Selection
|
|
||||||
```
|
|
||||||
Config Check → Provider Initialization → Resource Setup → Provider Ready
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 3. Error Recovery
|
|
||||||
```
|
|
||||||
Error Detection → Logging → Cleanup → Error Propagation
|
|
||||||
```
|
|
||||||
|
|
||||||
These patterns ensure consistent behavior across OCR providers while maintaining proper logging and error handling throughout the system.
|
|
||||||
|
|
63
go.mod
63
go.mod
|
@ -1,33 +1,32 @@
|
||||||
module paperless-gpt
|
module paperless-gpt
|
||||||
|
|
||||||
go 1.23.0
|
go 1.22.0
|
||||||
|
|
||||||
toolchain go1.24.1
|
toolchain go1.23.6
|
||||||
|
|
||||||
require (
|
require (
|
||||||
cloud.google.com/go/documentai v1.35.2
|
cloud.google.com/go/documentai v1.35.1
|
||||||
github.com/Masterminds/sprig/v3 v3.3.0
|
github.com/Masterminds/sprig/v3 v3.3.0
|
||||||
github.com/fatih/color v1.18.0
|
github.com/fatih/color v1.18.0
|
||||||
github.com/gabriel-vasile/mimetype v1.4.8
|
github.com/gabriel-vasile/mimetype v1.4.3
|
||||||
github.com/gen2brain/go-fitz v1.24.14
|
github.com/gen2brain/go-fitz v1.24.14
|
||||||
github.com/gin-gonic/gin v1.10.0
|
github.com/gin-gonic/gin v1.10.0
|
||||||
github.com/google/uuid v1.6.0
|
github.com/google/uuid v1.6.0
|
||||||
github.com/hashicorp/go-retryablehttp v0.7.7
|
|
||||||
github.com/sirupsen/logrus v1.9.3
|
github.com/sirupsen/logrus v1.9.3
|
||||||
github.com/stretchr/testify v1.10.0
|
github.com/stretchr/testify v1.10.0
|
||||||
github.com/tmc/langchaingo v0.1.13
|
github.com/tmc/langchaingo v0.1.13-pre.1
|
||||||
golang.org/x/sync v0.12.0
|
golang.org/x/sync v0.11.0
|
||||||
google.golang.org/api v0.225.0
|
google.golang.org/api v0.214.0
|
||||||
gorm.io/driver/sqlite v1.5.7
|
gorm.io/driver/sqlite v1.5.7
|
||||||
gorm.io/gorm v1.25.12
|
gorm.io/gorm v1.25.12
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
cloud.google.com/go v0.118.1 // indirect
|
cloud.google.com/go v0.116.0 // indirect
|
||||||
cloud.google.com/go/auth v0.15.0 // indirect
|
cloud.google.com/go/auth v0.13.0 // indirect
|
||||||
cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
|
cloud.google.com/go/auth/oauth2adapt v0.2.6 // indirect
|
||||||
cloud.google.com/go/compute/metadata v0.6.0 // indirect
|
cloud.google.com/go/compute/metadata v0.6.0 // indirect
|
||||||
cloud.google.com/go/longrunning v0.6.4 // indirect
|
cloud.google.com/go/longrunning v0.6.2 // indirect
|
||||||
dario.cat/mergo v1.0.1 // indirect
|
dario.cat/mergo v1.0.1 // indirect
|
||||||
github.com/Masterminds/goutils v1.1.1 // indirect
|
github.com/Masterminds/goutils v1.1.1 // indirect
|
||||||
github.com/Masterminds/semver/v3 v3.3.0 // indirect
|
github.com/Masterminds/semver/v3 v3.3.0 // indirect
|
||||||
|
@ -46,10 +45,9 @@ require (
|
||||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||||
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
||||||
github.com/goccy/go-json v0.10.2 // indirect
|
github.com/goccy/go-json v0.10.2 // indirect
|
||||||
github.com/google/s2a-go v0.1.9 // indirect
|
github.com/google/s2a-go v0.1.8 // indirect
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.5 // indirect
|
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
|
||||||
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
|
github.com/googleapis/gax-go/v2 v2.14.0 // indirect
|
||||||
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
|
|
||||||
github.com/huandu/xstrings v1.5.0 // indirect
|
github.com/huandu/xstrings v1.5.0 // indirect
|
||||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||||
github.com/jinzhu/now v1.1.5 // indirect
|
github.com/jinzhu/now v1.1.5 // indirect
|
||||||
|
@ -76,23 +74,22 @@ require (
|
||||||
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a // indirect
|
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a // indirect
|
||||||
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
|
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
|
||||||
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
|
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
|
||||||
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 // indirect
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
|
go.opentelemetry.io/otel v1.29.0 // indirect
|
||||||
go.opentelemetry.io/otel v1.34.0 // indirect
|
go.opentelemetry.io/otel/metric v1.29.0 // indirect
|
||||||
go.opentelemetry.io/otel/metric v1.34.0 // indirect
|
go.opentelemetry.io/otel/trace v1.29.0 // indirect
|
||||||
go.opentelemetry.io/otel/trace v1.34.0 // indirect
|
|
||||||
golang.org/x/arch v0.8.0 // indirect
|
golang.org/x/arch v0.8.0 // indirect
|
||||||
golang.org/x/crypto v0.36.0 // indirect
|
golang.org/x/crypto v0.31.0 // indirect
|
||||||
golang.org/x/net v0.37.0 // indirect
|
golang.org/x/net v0.33.0 // indirect
|
||||||
golang.org/x/oauth2 v0.28.0 // indirect
|
golang.org/x/oauth2 v0.24.0 // indirect
|
||||||
golang.org/x/sys v0.31.0 // indirect
|
golang.org/x/sys v0.28.0 // indirect
|
||||||
golang.org/x/text v0.23.0 // indirect
|
golang.org/x/text v0.21.0 // indirect
|
||||||
golang.org/x/time v0.11.0 // indirect
|
golang.org/x/time v0.8.0 // indirect
|
||||||
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 // indirect
|
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 // indirect
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 // indirect
|
google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697 // indirect
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 // indirect
|
||||||
google.golang.org/grpc v1.71.0 // indirect
|
google.golang.org/grpc v1.67.3 // indirect
|
||||||
google.golang.org/protobuf v1.36.5 // indirect
|
google.golang.org/protobuf v1.35.2 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
161
go.sum
161
go.sum
|
@ -1,15 +1,15 @@
|
||||||
cloud.google.com/go v0.118.1 h1:b8RATMcrK9A4BH0rj8yQupPXp+aP+cJ0l6H7V9osV1E=
|
cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE=
|
||||||
cloud.google.com/go v0.118.1/go.mod h1:CFO4UPEPi8oV21xoezZCrd3d81K4fFkDTEJu4R8K+9M=
|
cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U=
|
||||||
cloud.google.com/go/auth v0.15.0 h1:Ly0u4aA5vG/fsSsxu98qCQBemXtAtJf+95z9HK+cxps=
|
cloud.google.com/go/auth v0.13.0 h1:8Fu8TZy167JkW8Tj3q7dIkr2v4cndv41ouecJx0PAHs=
|
||||||
cloud.google.com/go/auth v0.15.0/go.mod h1:WJDGqZ1o9E9wKIL+IwStfyn/+s59zl4Bi+1KQNVXLZ8=
|
cloud.google.com/go/auth v0.13.0/go.mod h1:COOjD9gwfKNKz+IIduatIhYJQIc0mG3H102r/EMxX6Q=
|
||||||
cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M=
|
cloud.google.com/go/auth/oauth2adapt v0.2.6 h1:V6a6XDu2lTwPZWOawrAa9HUK+DB2zfJyTuciBG5hFkU=
|
||||||
cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc=
|
cloud.google.com/go/auth/oauth2adapt v0.2.6/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8=
|
||||||
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
|
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
|
||||||
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
|
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
|
||||||
cloud.google.com/go/documentai v1.35.2 h1:hswVobCWUTXtmn+4QqUIVkai7sDOe0QS2KB3IpqLkik=
|
cloud.google.com/go/documentai v1.35.1 h1:52RfiUsoblXcE57CfKJGnITWLxRM30BcqNk/BKZl2LI=
|
||||||
cloud.google.com/go/documentai v1.35.2/go.mod h1:oh/0YXosgEq3hVhyH4ZQ7VNXPaveRO4eLVM3tBSZOsI=
|
cloud.google.com/go/documentai v1.35.1/go.mod h1:WJjwUAQfwQPJORW8fjz7RODprMULDzEGLA2E6WxenFw=
|
||||||
cloud.google.com/go/longrunning v0.6.4 h1:3tyw9rO3E2XVXzSApn1gyEEnH2K9SynNQjMlBi3uHLg=
|
cloud.google.com/go/longrunning v0.6.2 h1:xjDfh1pQcWPEvnfjZmwjKQEcHnpz6lHjfy7Fo0MK+hc=
|
||||||
cloud.google.com/go/longrunning v0.6.4/go.mod h1:ttZpLCe6e7EXvn9OxpBRx7kZEB0efv8yBO6YnVMfhJs=
|
cloud.google.com/go/longrunning v0.6.2/go.mod h1:k/vIs83RN4bE3YCswdXC5PFfWVILjm3hpEUlSko4PiI=
|
||||||
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
|
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
|
||||||
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
|
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
|
||||||
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
|
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
|
||||||
|
@ -39,8 +39,8 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2
|
||||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||||
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
||||||
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
|
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
|
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
||||||
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
|
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
|
||||||
github.com/gen2brain/go-fitz v1.24.14/go.mod h1:0KaZeQgASc20Yp5R/pFzyy7SmP01XcoHKNF842U2/S4=
|
github.com/gen2brain/go-fitz v1.24.14/go.mod h1:0KaZeQgASc20Yp5R/pFzyy7SmP01XcoHKNF842U2/S4=
|
||||||
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
||||||
|
@ -62,27 +62,17 @@ github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBEx
|
||||||
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
||||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
|
||||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
|
github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM=
|
||||||
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
|
github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=
|
||||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
|
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
|
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.5 h1:VgzTY2jogw3xt39CusEnFJWm7rlsq5yL5q9XdLOuP5g=
|
github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o=
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.5/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA=
|
github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk=
|
||||||
github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q=
|
|
||||||
github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA=
|
|
||||||
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
|
|
||||||
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
|
|
||||||
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
|
|
||||||
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
|
|
||||||
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
|
|
||||||
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
|
|
||||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
||||||
|
@ -125,8 +115,8 @@ github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAc
|
||||||
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
|
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
|
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
|
||||||
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
|
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
||||||
|
@ -148,8 +138,8 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl
|
||||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
|
github.com/tmc/langchaingo v0.1.13-pre.1 h1:r+ma9kl0NuFJGtIrnMPFjEn4RhXktwSI31fIpgiiMm4=
|
||||||
github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
|
github.com/tmc/langchaingo v0.1.13-pre.1/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||||
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
||||||
|
@ -166,86 +156,53 @@ gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f h1:Wku8eEde
|
||||||
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f/go.mod h1:Tiuhl+njh/JIg0uS/sOJVYi0x2HEa5rc1OAaVsb5tAs=
|
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f/go.mod h1:Tiuhl+njh/JIg0uS/sOJVYi0x2HEa5rc1OAaVsb5tAs=
|
||||||
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJWpU5qHKGNy///1pacZC9txiUI=
|
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJWpU5qHKGNy///1pacZC9txiUI=
|
||||||
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
|
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
|
||||||
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc=
|
||||||
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI=
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 h1:rgMkmiGfix9vFJDcDi1PK8WEQP4FLQwLDfhp5ZLpFeE=
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk=
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0/go.mod h1:ijPqXp5P6IRRByFVVg9DY8P5HkxkHE5ARIa+86aXPf4=
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8=
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
|
go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
|
go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
|
||||||
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
|
go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
|
||||||
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
|
go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
|
||||||
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
|
go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4=
|
||||||
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
|
go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
|
||||||
go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
|
|
||||||
go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
|
|
||||||
go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU=
|
|
||||||
go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ=
|
|
||||||
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
|
|
||||||
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
|
|
||||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||||
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
||||||
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||||
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
|
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
|
||||||
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
|
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||||
golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs=
|
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
|
||||||
golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ=
|
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
||||||
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
|
golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE=
|
||||||
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
|
golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
||||||
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
|
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
|
||||||
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
|
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
|
|
||||||
golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
|
|
||||||
golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE=
|
|
||||||
golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
|
||||||
golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
|
|
||||||
golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
|
|
||||||
golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc=
|
|
||||||
golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
|
|
||||||
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
|
|
||||||
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
|
||||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
|
||||||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
|
|
||||||
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
|
||||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||||
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
|
||||||
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||||
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
|
golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg=
|
||||||
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
|
golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
||||||
golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
|
|
||||||
golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
|
||||||
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
|
|
||||||
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
|
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
google.golang.org/api v0.223.0 h1:JUTaWEriXmEy5AhvdMgksGGPEFsYfUKaPEYXd4c3Wvc=
|
google.golang.org/api v0.214.0 h1:h2Gkq07OYi6kusGOaT/9rnNljuXmqPnaig7WGPmKbwA=
|
||||||
google.golang.org/api v0.223.0/go.mod h1:C+RS7Z+dDwds2b+zoAk5hN/eSfsiCn0UDrYof/M4d2M=
|
google.golang.org/api v0.214.0/go.mod h1:bYPpLG8AyeMWwDU6NXoB00xC0DFkikVvd5MfwoxjLqE=
|
||||||
google.golang.org/api v0.224.0 h1:Ir4UPtDsNiwIOHdExr3fAj4xZ42QjK7uQte3lORLJwU=
|
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 h1:ToEetK57OidYuqD4Q5w+vfEnPvPpuTwedCNVohYJfNk=
|
||||||
google.golang.org/api v0.224.0/go.mod h1:3V39my2xAGkodXy0vEqcEtkqgw2GtrFL5WuBZlCTCOQ=
|
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697/go.mod h1:JJrvXBWRZaFMxBufik1a4RpFw4HhgVtBBWQeQgUj2cc=
|
||||||
google.golang.org/api v0.225.0 h1:+4/IVqBQm0MV5S+JW3kdEGC1WtOmM2mXN1LKH1LdNlw=
|
google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697 h1:pgr/4QbFyktUv9CtQ/Fq4gzEE6/Xs7iCXbktaGzLHbQ=
|
||||||
google.golang.org/api v0.225.0/go.mod h1:WP/0Xm4LVvMOCldfvOISnWquSRWbG2kArDZcg+W2DbY=
|
google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697/go.mod h1:+D9ySVjN8nY8YCVjc5O7PZDIdZporIDY3KaGfJunh88=
|
||||||
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 h1:Pw6WnI9W/LIdRxqK7T6XGugGbHIRl5Q7q3BssH6xk4s=
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 h1:8ZmaLZE4XWrtU3MyClkYqqtl6Oegr3235h7jxsDyqCY=
|
||||||
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4/go.mod h1:qbZzneIOXSq+KFAFut9krLfRLZiFLzZL5u2t8SV83EE=
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU=
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 h1:L9JNMl/plZH9wmzQUHleO/ZZDSN+9Gh41wPczNy+5Fk=
|
google.golang.org/grpc v1.67.3 h1:OgPcDAFKHnH8X3O4WcO4XUc8GRDeKsKReqbQtiCj7N8=
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6/go.mod h1:iYONQfRdizDB8JJBybql13nArx91jcUk7zCXEsOofM4=
|
google.golang.org/grpc v1.67.3/go.mod h1:YGaHCc6Oap+FzBJTZLBzkGSYt/cvGPFTPxkn7QfSU8s=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2 h1:DMTIbak9GhdaSxEjvVzAeNZvyc03I61duqNbnm3SU0M=
|
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
|
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e h1:YA5lmSs3zc/5w+xsRcHqpETkaYyK63ivEPzNTcUUlSA=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
|
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb h1:TLPQVbx1GJ8VKZxz52VAxl1EBgKXXbTiU9Fc5fZeLn4=
|
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
|
|
||||||
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
|
|
||||||
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
|
|
||||||
google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg=
|
|
||||||
google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
|
|
||||||
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
|
|
||||||
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
|
||||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
|
158
main.go
158
main.go
|
@ -8,7 +8,6 @@ import (
|
||||||
"paperless-gpt/ocr"
|
"paperless-gpt/ocr"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"slices"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -32,30 +31,26 @@ var (
|
||||||
log = logrus.New()
|
log = logrus.New()
|
||||||
|
|
||||||
// Environment Variables
|
// Environment Variables
|
||||||
paperlessInsecureSkipVerify = os.Getenv("PAPERLESS_INSECURE_SKIP_VERIFY") == "true"
|
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
|
||||||
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
|
|
||||||
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
|
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
|
||||||
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
|
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
|
||||||
azureDocAIEndpoint = os.Getenv("AZURE_DOCAI_ENDPOINT")
|
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
|
||||||
azureDocAIKey = os.Getenv("AZURE_DOCAI_KEY")
|
manualTag = os.Getenv("MANUAL_TAG")
|
||||||
azureDocAIModelID = os.Getenv("AZURE_DOCAI_MODEL_ID")
|
autoTag = os.Getenv("AUTO_TAG")
|
||||||
azureDocAITimeout = os.Getenv("AZURE_DOCAI_TIMEOUT_SECONDS")
|
manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet
|
||||||
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
|
autoOcrTag = os.Getenv("AUTO_OCR_TAG")
|
||||||
manualTag = os.Getenv("MANUAL_TAG")
|
llmProvider = os.Getenv("LLM_PROVIDER")
|
||||||
autoTag = os.Getenv("AUTO_TAG")
|
llmModel = os.Getenv("LLM_MODEL")
|
||||||
manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet
|
visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER")
|
||||||
autoOcrTag = os.Getenv("AUTO_OCR_TAG")
|
visionLlmModel = os.Getenv("VISION_LLM_MODEL")
|
||||||
llmProvider = os.Getenv("LLM_PROVIDER")
|
logLevel = strings.ToLower(os.Getenv("LOG_LEVEL"))
|
||||||
llmModel = os.Getenv("LLM_MODEL")
|
listenInterface = os.Getenv("LISTEN_INTERFACE")
|
||||||
visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER")
|
autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE")
|
||||||
visionLlmModel = os.Getenv("VISION_LLM_MODEL")
|
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
|
||||||
logLevel = strings.ToLower(os.Getenv("LOG_LEVEL"))
|
autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS")
|
||||||
listenInterface = os.Getenv("LISTEN_INTERFACE")
|
limitOcrPages int // Will be read from OCR_LIMIT_PAGES
|
||||||
autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE")
|
tokenLimit = 0 // Will be read from TOKEN_LIMIT
|
||||||
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
|
|
||||||
autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS")
|
|
||||||
limitOcrPages int // Will be read from OCR_LIMIT_PAGES
|
|
||||||
tokenLimit = 0 // Will be read from TOKEN_LIMIT
|
|
||||||
|
|
||||||
// Templates
|
// Templates
|
||||||
titleTemplate *template.Template
|
titleTemplate *template.Template
|
||||||
|
@ -171,18 +166,6 @@ func main() {
|
||||||
GoogleProcessorID: os.Getenv("GOOGLE_PROCESSOR_ID"),
|
GoogleProcessorID: os.Getenv("GOOGLE_PROCESSOR_ID"),
|
||||||
VisionLLMProvider: visionLlmProvider,
|
VisionLLMProvider: visionLlmProvider,
|
||||||
VisionLLMModel: visionLlmModel,
|
VisionLLMModel: visionLlmModel,
|
||||||
AzureEndpoint: azureDocAIEndpoint,
|
|
||||||
AzureAPIKey: azureDocAIKey,
|
|
||||||
AzureModelID: azureDocAIModelID,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse Azure timeout if set
|
|
||||||
if azureDocAITimeout != "" {
|
|
||||||
if timeout, err := strconv.Atoi(azureDocAITimeout); err == nil {
|
|
||||||
ocrConfig.AzureTimeout = timeout
|
|
||||||
} else {
|
|
||||||
log.Warnf("Invalid AZURE_DOCAI_TIMEOUT_SECONDS value: %v, using default", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If provider is LLM, but no VISION_LLM_PROVIDER is set, don't initialize OCR provider
|
// If provider is LLM, but no VISION_LLM_PROVIDER is set, don't initialize OCR provider
|
||||||
|
@ -204,21 +187,6 @@ func main() {
|
||||||
ocrProvider: ocrProvider,
|
ocrProvider: ocrProvider,
|
||||||
}
|
}
|
||||||
|
|
||||||
if app.isOcrEnabled() {
|
|
||||||
fmt.Printf("Using %s as manual OCR tag\n", manualOcrTag)
|
|
||||||
fmt.Printf("Using %s as auto OCR tag\n", autoOcrTag)
|
|
||||||
rawLimitOcrPages := os.Getenv("OCR_LIMIT_PAGES")
|
|
||||||
if rawLimitOcrPages == "" {
|
|
||||||
limitOcrPages = 5
|
|
||||||
} else {
|
|
||||||
var err error
|
|
||||||
limitOcrPages, err = strconv.Atoi(rawLimitOcrPages)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Invalid OCR_LIMIT_PAGES value: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start background process for auto-tagging
|
// Start background process for auto-tagging
|
||||||
go func() {
|
go func() {
|
||||||
minBackoffDuration := 10 * time.Second
|
minBackoffDuration := 10 * time.Second
|
||||||
|
@ -229,7 +197,7 @@ func main() {
|
||||||
for {
|
for {
|
||||||
processedCount, err := func() (int, error) {
|
processedCount, err := func() (int, error) {
|
||||||
count := 0
|
count := 0
|
||||||
if app.isOcrEnabled() {
|
if isOcrEnabled() {
|
||||||
ocrCount, err := app.processAutoOcrTagDocuments()
|
ocrCount, err := app.processAutoOcrTagDocuments()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("error in processAutoOcrTagDocuments: %w", err)
|
return 0, fmt.Errorf("error in processAutoOcrTagDocuments: %w", err)
|
||||||
|
@ -288,7 +256,7 @@ func main() {
|
||||||
|
|
||||||
// Endpoint to see if user enabled OCR
|
// Endpoint to see if user enabled OCR
|
||||||
api.GET("/experimental/ocr", func(c *gin.Context) {
|
api.GET("/experimental/ocr", func(c *gin.Context) {
|
||||||
enabled := app.isOcrEnabled()
|
enabled := isOcrEnabled()
|
||||||
c.JSON(http.StatusOK, gin.H{"enabled": enabled})
|
c.JSON(http.StatusOK, gin.H{"enabled": enabled})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -398,8 +366,8 @@ func initLogger() {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (app *App) isOcrEnabled() bool {
|
func isOcrEnabled() bool {
|
||||||
return app.ocrProvider != nil
|
return visionLlmModel != "" && visionLlmProvider != ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// validateOrDefaultEnvVars ensures all necessary environment variables are set
|
// validateOrDefaultEnvVars ensures all necessary environment variables are set
|
||||||
|
@ -417,10 +385,16 @@ func validateOrDefaultEnvVars() {
|
||||||
if manualOcrTag == "" {
|
if manualOcrTag == "" {
|
||||||
manualOcrTag = "paperless-gpt-ocr"
|
manualOcrTag = "paperless-gpt-ocr"
|
||||||
}
|
}
|
||||||
|
if isOcrEnabled() {
|
||||||
|
fmt.Printf("Using %s as manual OCR tag\n", manualOcrTag)
|
||||||
|
}
|
||||||
|
|
||||||
if autoOcrTag == "" {
|
if autoOcrTag == "" {
|
||||||
autoOcrTag = "paperless-gpt-ocr-auto"
|
autoOcrTag = "paperless-gpt-ocr-auto"
|
||||||
}
|
}
|
||||||
|
if isOcrEnabled() {
|
||||||
|
fmt.Printf("Using %s as auto OCR tag\n", autoOcrTag)
|
||||||
|
}
|
||||||
|
|
||||||
if paperlessBaseURL == "" {
|
if paperlessBaseURL == "" {
|
||||||
log.Fatal("Please set the PAPERLESS_BASE_URL environment variable.")
|
log.Fatal("Please set the PAPERLESS_BASE_URL environment variable.")
|
||||||
|
@ -438,17 +412,6 @@ func validateOrDefaultEnvVars() {
|
||||||
log.Fatal("Please set the LLM_PROVIDER environment variable to 'openai' or 'ollama'.")
|
log.Fatal("Please set the LLM_PROVIDER environment variable to 'openai' or 'ollama'.")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate OCR provider if set
|
|
||||||
ocrProvider := os.Getenv("OCR_PROVIDER")
|
|
||||||
if ocrProvider == "azure" {
|
|
||||||
if azureDocAIEndpoint == "" {
|
|
||||||
log.Fatal("Please set the AZURE_DOCAI_ENDPOINT environment variable for Azure provider")
|
|
||||||
}
|
|
||||||
if azureDocAIKey == "" {
|
|
||||||
log.Fatal("Please set the AZURE_DOCAI_KEY environment variable for Azure provider")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if llmModel == "" {
|
if llmModel == "" {
|
||||||
log.Fatal("Please set the LLM_MODEL environment variable.")
|
log.Fatal("Please set the LLM_MODEL environment variable.")
|
||||||
}
|
}
|
||||||
|
@ -457,6 +420,19 @@ func validateOrDefaultEnvVars() {
|
||||||
log.Fatal("Please set the OPENAI_API_KEY environment variable for OpenAI provider.")
|
log.Fatal("Please set the OPENAI_API_KEY environment variable for OpenAI provider.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if isOcrEnabled() {
|
||||||
|
rawLimitOcrPages := os.Getenv("OCR_LIMIT_PAGES")
|
||||||
|
if rawLimitOcrPages == "" {
|
||||||
|
limitOcrPages = 5
|
||||||
|
} else {
|
||||||
|
var err error
|
||||||
|
limitOcrPages, err = strconv.Atoi(rawLimitOcrPages)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Invalid OCR_LIMIT_PAGES value: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize token limit from environment variable
|
// Initialize token limit from environment variable
|
||||||
if limit := os.Getenv("TOKEN_LIMIT"); limit != "" {
|
if limit := os.Getenv("TOKEN_LIMIT"); limit != "" {
|
||||||
if parsed, err := strconv.Atoi(limit); err == nil {
|
if parsed, err := strconv.Atoi(limit); err == nil {
|
||||||
|
@ -490,14 +466,7 @@ func (app *App) processAutoTagDocuments() (int, error) {
|
||||||
|
|
||||||
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag)
|
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag)
|
||||||
|
|
||||||
processedCount := 0
|
|
||||||
for _, document := range documents {
|
for _, document := range documents {
|
||||||
// Skip documents that have the autoOcrTag
|
|
||||||
if slices.Contains(document.Tags, autoOcrTag) {
|
|
||||||
log.Debugf("Skipping document %d as it has the OCR tag %s", document.ID, autoOcrTag)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
docLogger := documentLogger(document.ID)
|
docLogger := documentLogger(document.ID)
|
||||||
docLogger.Info("Processing document for auto-tagging")
|
docLogger.Info("Processing document for auto-tagging")
|
||||||
|
|
||||||
|
@ -510,18 +479,17 @@ func (app *App) processAutoTagDocuments() (int, error) {
|
||||||
|
|
||||||
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger)
|
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return processedCount, fmt.Errorf("error generating suggestions for document %d: %w", document.ID, err)
|
return 0, fmt.Errorf("error generating suggestions for document %d: %w", document.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
|
err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return processedCount, fmt.Errorf("error updating document %d: %w", document.ID, err)
|
return 0, fmt.Errorf("error updating document %d: %w", document.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
docLogger.Info("Successfully processed document")
|
docLogger.Info("Successfully processed document")
|
||||||
processedCount++
|
|
||||||
}
|
}
|
||||||
return processedCount, nil
|
return len(documents), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// processAutoOcrTagDocuments handles the background auto-tagging of OCR documents
|
// processAutoOcrTagDocuments handles the background auto-tagging of OCR documents
|
||||||
|
@ -666,11 +634,9 @@ func createLLM() (llms.Model, error) {
|
||||||
if openaiAPIKey == "" {
|
if openaiAPIKey == "" {
|
||||||
return nil, fmt.Errorf("OpenAI API key is not set")
|
return nil, fmt.Errorf("OpenAI API key is not set")
|
||||||
}
|
}
|
||||||
|
|
||||||
return openai.New(
|
return openai.New(
|
||||||
openai.WithModel(llmModel),
|
openai.WithModel(llmModel),
|
||||||
openai.WithToken(openaiAPIKey),
|
openai.WithToken(openaiAPIKey),
|
||||||
openai.WithHTTPClient(createCustomHTTPClient()),
|
|
||||||
)
|
)
|
||||||
case "ollama":
|
case "ollama":
|
||||||
host := os.Getenv("OLLAMA_HOST")
|
host := os.Getenv("OLLAMA_HOST")
|
||||||
|
@ -692,11 +658,9 @@ func createVisionLLM() (llms.Model, error) {
|
||||||
if openaiAPIKey == "" {
|
if openaiAPIKey == "" {
|
||||||
return nil, fmt.Errorf("OpenAI API key is not set")
|
return nil, fmt.Errorf("OpenAI API key is not set")
|
||||||
}
|
}
|
||||||
|
|
||||||
return openai.New(
|
return openai.New(
|
||||||
openai.WithModel(visionLlmModel),
|
openai.WithModel(visionLlmModel),
|
||||||
openai.WithToken(openaiAPIKey),
|
openai.WithToken(openaiAPIKey),
|
||||||
openai.WithHTTPClient(createCustomHTTPClient()),
|
|
||||||
)
|
)
|
||||||
case "ollama":
|
case "ollama":
|
||||||
host := os.Getenv("OLLAMA_HOST")
|
host := os.Getenv("OLLAMA_HOST")
|
||||||
|
@ -712,33 +676,3 @@ func createVisionLLM() (llms.Model, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func createCustomHTTPClient() *http.Client {
|
|
||||||
// Create custom transport that adds headers
|
|
||||||
customTransport := &headerTransport{
|
|
||||||
transport: http.DefaultTransport,
|
|
||||||
headers: map[string]string{
|
|
||||||
"X-Title": "paperless-gpt",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create custom client with the transport
|
|
||||||
httpClient := http.DefaultClient
|
|
||||||
httpClient.Transport = customTransport
|
|
||||||
|
|
||||||
return httpClient
|
|
||||||
}
|
|
||||||
|
|
||||||
// headerTransport is a custom http.RoundTripper that adds custom headers to requests
|
|
||||||
type headerTransport struct {
|
|
||||||
transport http.RoundTripper
|
|
||||||
headers map[string]string
|
|
||||||
}
|
|
||||||
|
|
||||||
// RoundTrip implements the http.RoundTripper interface
|
|
||||||
func (t *headerTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
|
||||||
for key, value := range t.headers {
|
|
||||||
req.Header.Add(key, value)
|
|
||||||
}
|
|
||||||
return t.transport.RoundTrip(req)
|
|
||||||
}
|
|
||||||
|
|
199
main_test.go
199
main_test.go
|
@ -1,199 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"net/http"
|
|
||||||
"net/http/httptest"
|
|
||||||
"slices"
|
|
||||||
"testing"
|
|
||||||
"text/template"
|
|
||||||
|
|
||||||
"github.com/Masterminds/sprig/v3"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestProcessAutoTagDocuments(t *testing.T) {
|
|
||||||
// Initialize required global variables
|
|
||||||
autoTag = "paperless-gpt-auto"
|
|
||||||
autoOcrTag = "paperless-gpt-ocr-auto"
|
|
||||||
|
|
||||||
// Initialize templates
|
|
||||||
var err error
|
|
||||||
titleTemplate, err = template.New("title").Funcs(sprig.FuncMap()).Parse("")
|
|
||||||
require.NoError(t, err)
|
|
||||||
tagTemplate, err = template.New("tag").Funcs(sprig.FuncMap()).Parse("")
|
|
||||||
require.NoError(t, err)
|
|
||||||
correspondentTemplate, err = template.New("correspondent").Funcs(sprig.FuncMap()).Parse("")
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
// Create test environment
|
|
||||||
env := newTestEnv(t)
|
|
||||||
defer env.teardown()
|
|
||||||
|
|
||||||
// Set up test cases
|
|
||||||
testCases := []struct {
|
|
||||||
name string
|
|
||||||
documents []Document
|
|
||||||
expectedCount int
|
|
||||||
expectedError string
|
|
||||||
updateResponse int // HTTP status code for update response
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "Skip document with autoOcrTag",
|
|
||||||
documents: []Document{
|
|
||||||
{
|
|
||||||
ID: 1,
|
|
||||||
Title: "Doc with OCR tag",
|
|
||||||
Tags: []string{autoTag, autoOcrTag},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: 2,
|
|
||||||
Title: "Doc without OCR tag",
|
|
||||||
Tags: []string{autoTag},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: 3,
|
|
||||||
Title: "Doc with OCR tag",
|
|
||||||
Tags: []string{autoTag, autoOcrTag},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedCount: 1,
|
|
||||||
updateResponse: http.StatusOK,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "No documents to process",
|
|
||||||
documents: []Document{},
|
|
||||||
expectedCount: 0,
|
|
||||||
updateResponse: http.StatusOK,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range testCases {
|
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
|
||||||
// Mock the GetAllTags response
|
|
||||||
env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
response := map[string]interface{}{
|
|
||||||
"results": []map[string]interface{}{
|
|
||||||
{"id": 1, "name": autoTag},
|
|
||||||
{"id": 2, "name": autoOcrTag},
|
|
||||||
{"id": 3, "name": "other-tag"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
json.NewEncoder(w).Encode(response)
|
|
||||||
})
|
|
||||||
|
|
||||||
// Mock the GetDocumentsByTags response
|
|
||||||
env.setMockResponse("/api/documents/", func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
response := GetDocumentsApiResponse{
|
|
||||||
Results: make([]GetDocumentApiResponseResult, len(tc.documents)),
|
|
||||||
}
|
|
||||||
for i, doc := range tc.documents {
|
|
||||||
tagIds := make([]int, len(doc.Tags))
|
|
||||||
for j, tagName := range doc.Tags {
|
|
||||||
switch tagName {
|
|
||||||
case autoTag:
|
|
||||||
tagIds[j] = 1
|
|
||||||
case autoOcrTag:
|
|
||||||
tagIds[j] = 2
|
|
||||||
default:
|
|
||||||
tagIds[j] = 3
|
|
||||||
}
|
|
||||||
}
|
|
||||||
response.Results[i] = GetDocumentApiResponseResult{
|
|
||||||
ID: doc.ID,
|
|
||||||
Title: doc.Title,
|
|
||||||
Tags: tagIds,
|
|
||||||
Content: "Test content",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
json.NewEncoder(w).Encode(response)
|
|
||||||
})
|
|
||||||
|
|
||||||
// Mock the correspondent creation endpoint
|
|
||||||
env.setMockResponse("/api/correspondents/", func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
if r.Method == "POST" {
|
|
||||||
// Mock successful correspondent creation
|
|
||||||
w.WriteHeader(http.StatusCreated)
|
|
||||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
||||||
"id": 3,
|
|
||||||
"name": "test response",
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
// Mock GET response for existing correspondents
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
||||||
"results": []map[string]interface{}{
|
|
||||||
{"id": 1, "name": "Alpha"},
|
|
||||||
{"id": 2, "name": "Beta"},
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Create test app
|
|
||||||
app := &App{
|
|
||||||
Client: env.client,
|
|
||||||
Database: env.db,
|
|
||||||
LLM: &mockLLM{}, // Use mock LLM from app_llm_test.go
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set auto-generate flags
|
|
||||||
autoGenerateTitle = "true"
|
|
||||||
autoGenerateTags = "true"
|
|
||||||
autoGenerateCorrespondents = "true"
|
|
||||||
|
|
||||||
// Mock the document update responses
|
|
||||||
for _, doc := range tc.documents {
|
|
||||||
if !slices.Contains(doc.Tags, autoOcrTag) {
|
|
||||||
updatePath := fmt.Sprintf("/api/documents/%d/", doc.ID)
|
|
||||||
env.setMockResponse(updatePath, func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
w.WriteHeader(tc.updateResponse)
|
|
||||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
||||||
"id": doc.ID,
|
|
||||||
"title": "Updated " + doc.Title,
|
|
||||||
"tags": []int{1, 3}, // Mock updated tag IDs
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run the test
|
|
||||||
count, err := app.processAutoTagDocuments()
|
|
||||||
|
|
||||||
// Verify results
|
|
||||||
if tc.expectedError != "" {
|
|
||||||
require.Error(t, err)
|
|
||||||
assert.Contains(t, err.Error(), tc.expectedError)
|
|
||||||
} else {
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, tc.expectedCount, count)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCreateCustomHTTPClient(t *testing.T) {
|
|
||||||
// Create a test server
|
|
||||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
// Verify custom header
|
|
||||||
assert.Equal(t, "paperless-gpt", r.Header.Get("X-Title"), "Expected X-Title header")
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
}))
|
|
||||||
defer server.Close()
|
|
||||||
|
|
||||||
// Get custom client
|
|
||||||
client := createCustomHTTPClient()
|
|
||||||
require.NotNil(t, client, "HTTP client should not be nil")
|
|
||||||
|
|
||||||
// Make a request
|
|
||||||
resp, err := client.Get(server.URL)
|
|
||||||
require.NoError(t, err, "Request should not fail")
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
assert.Equal(t, http.StatusOK, resp.StatusCode, "Expected 200 OK response")
|
|
||||||
}
|
|
13
ocr.go
13
ocr.go
|
@ -36,20 +36,13 @@ func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string,
|
||||||
return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
|
return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
result, err := app.ocrProvider.ProcessImage(ctx, imageContent)
|
ocrText, err := app.ocrProvider.ProcessImage(ctx, imageContent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
|
return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
|
||||||
}
|
}
|
||||||
if result == nil {
|
pageLogger.Debug("OCR completed for page")
|
||||||
pageLogger.Error("Got nil result from OCR provider")
|
|
||||||
return "", fmt.Errorf("error performing OCR for document %d, page %d: nil result", documentID, i+1)
|
|
||||||
}
|
|
||||||
|
|
||||||
pageLogger.WithField("has_hocr", result.HOCR != "").
|
ocrTexts = append(ocrTexts, ocrText)
|
||||||
WithField("metadata", result.Metadata).
|
|
||||||
Debug("OCR completed for page")
|
|
||||||
|
|
||||||
ocrTexts = append(ocrTexts, result.Text)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
docLogger.Info("OCR processing completed successfully")
|
docLogger.Info("OCR processing completed successfully")
|
||||||
|
|
|
@ -1,224 +0,0 @@
|
||||||
package ocr
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"context"
|
|
||||||
"encoding/base64"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/gabriel-vasile/mimetype"
|
|
||||||
"github.com/hashicorp/go-retryablehttp"
|
|
||||||
"github.com/sirupsen/logrus"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
apiVersion = "2024-11-30"
|
|
||||||
defaultModelID = "prebuilt-read"
|
|
||||||
defaultTimeout = 120
|
|
||||||
pollingInterval = 2 * time.Second
|
|
||||||
)
|
|
||||||
|
|
||||||
// AzureProvider implements OCR using Azure Document Intelligence
|
|
||||||
type AzureProvider struct {
|
|
||||||
endpoint string
|
|
||||||
apiKey string
|
|
||||||
modelID string
|
|
||||||
timeout time.Duration
|
|
||||||
httpClient *retryablehttp.Client
|
|
||||||
}
|
|
||||||
|
|
||||||
// Request body for Azure Document Intelligence
|
|
||||||
type analyzeRequest struct {
|
|
||||||
Base64Source string `json:"base64Source"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func newAzureProvider(config Config) (*AzureProvider, error) {
|
|
||||||
logger := log.WithFields(logrus.Fields{
|
|
||||||
"endpoint": config.AzureEndpoint,
|
|
||||||
"model_id": config.AzureModelID,
|
|
||||||
})
|
|
||||||
logger.Info("Creating new Azure Document Intelligence provider")
|
|
||||||
|
|
||||||
// Validate required configuration
|
|
||||||
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
|
|
||||||
logger.Error("Missing required configuration")
|
|
||||||
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set defaults and create provider
|
|
||||||
modelID := defaultModelID
|
|
||||||
if config.AzureModelID != "" {
|
|
||||||
modelID = config.AzureModelID
|
|
||||||
}
|
|
||||||
|
|
||||||
timeout := defaultTimeout
|
|
||||||
if config.AzureTimeout > 0 {
|
|
||||||
timeout = config.AzureTimeout
|
|
||||||
}
|
|
||||||
|
|
||||||
// Configure retryablehttp client
|
|
||||||
client := retryablehttp.NewClient()
|
|
||||||
client.RetryMax = 3
|
|
||||||
client.RetryWaitMin = 1 * time.Second
|
|
||||||
client.RetryWaitMax = 5 * time.Second
|
|
||||||
client.Logger = logger
|
|
||||||
|
|
||||||
provider := &AzureProvider{
|
|
||||||
endpoint: config.AzureEndpoint,
|
|
||||||
apiKey: config.AzureAPIKey,
|
|
||||||
modelID: modelID,
|
|
||||||
timeout: time.Duration(timeout) * time.Second,
|
|
||||||
httpClient: client,
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.Info("Successfully initialized Azure Document Intelligence provider")
|
|
||||||
return provider, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *AzureProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
|
|
||||||
logger := log.WithFields(logrus.Fields{
|
|
||||||
"model_id": p.modelID,
|
|
||||||
})
|
|
||||||
logger.Debug("Starting Azure Document Intelligence processing")
|
|
||||||
|
|
||||||
// Detect MIME type
|
|
||||||
mtype := mimetype.Detect(imageContent)
|
|
||||||
logger.WithField("mime_type", mtype.String()).Debug("Detected file type")
|
|
||||||
|
|
||||||
if !isImageMIMEType(mtype.String()) {
|
|
||||||
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
|
|
||||||
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create context with timeout
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, p.timeout)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
// Submit document for analysis
|
|
||||||
operationLocation, err := p.submitDocument(ctx, imageContent)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error submitting document: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Poll for results
|
|
||||||
result, err := p.pollForResults(ctx, operationLocation)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error polling for results: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert to OCR result
|
|
||||||
ocrResult := &OCRResult{
|
|
||||||
Text: result.AnalyzeResult.Content,
|
|
||||||
Metadata: map[string]string{
|
|
||||||
"provider": "azure_docai",
|
|
||||||
"page_count": fmt.Sprintf("%d", len(result.AnalyzeResult.Pages)),
|
|
||||||
"api_version": result.AnalyzeResult.APIVersion,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.WithFields(logrus.Fields{
|
|
||||||
"content_length": len(ocrResult.Text),
|
|
||||||
"page_count": len(result.AnalyzeResult.Pages),
|
|
||||||
}).Info("Successfully processed document")
|
|
||||||
return ocrResult, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *AzureProvider) submitDocument(ctx context.Context, imageContent []byte) (string, error) {
|
|
||||||
requestURL := fmt.Sprintf("%s/documentintelligence/documentModels/%s:analyze?api-version=%s",
|
|
||||||
p.endpoint, p.modelID, apiVersion)
|
|
||||||
|
|
||||||
// Prepare request body
|
|
||||||
requestBody := analyzeRequest{
|
|
||||||
Base64Source: base64.StdEncoding.EncodeToString(imageContent),
|
|
||||||
}
|
|
||||||
requestBodyBytes, err := json.Marshal(requestBody)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("error marshaling request body: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
req, err := retryablehttp.NewRequestWithContext(ctx, "POST", requestURL, bytes.NewBuffer(requestBodyBytes))
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("error creating HTTP request: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
req.Header.Set("Content-Type", "application/json")
|
|
||||||
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
|
|
||||||
|
|
||||||
resp, err := p.httpClient.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("error sending HTTP request: %w", err)
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusAccepted {
|
|
||||||
body, _ := io.ReadAll(resp.Body)
|
|
||||||
return "", fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(body))
|
|
||||||
}
|
|
||||||
|
|
||||||
operationLocation := resp.Header.Get("Operation-Location")
|
|
||||||
if operationLocation == "" {
|
|
||||||
return "", fmt.Errorf("no Operation-Location header in response")
|
|
||||||
}
|
|
||||||
|
|
||||||
return operationLocation, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *AzureProvider) pollForResults(ctx context.Context, operationLocation string) (*AzureDocumentResult, error) {
|
|
||||||
logger := log.WithField("operation_location", operationLocation)
|
|
||||||
logger.Debug("Starting to poll for results")
|
|
||||||
|
|
||||||
ticker := time.NewTicker(pollingInterval)
|
|
||||||
defer ticker.Stop()
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return nil, fmt.Errorf("operation timed out after %v: %w", p.timeout, ctx.Err())
|
|
||||||
case <-ticker.C:
|
|
||||||
req, err := retryablehttp.NewRequestWithContext(ctx, "GET", operationLocation, nil)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error creating poll request: %w", err)
|
|
||||||
}
|
|
||||||
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
|
|
||||||
|
|
||||||
resp, err := p.httpClient.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error polling for results: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var result AzureDocumentResult
|
|
||||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
||||||
resp.Body.Close()
|
|
||||||
logger.WithError(err).Error("Failed to decode response")
|
|
||||||
return nil, fmt.Errorf("error decoding response: %w", err)
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
logger.WithFields(logrus.Fields{
|
|
||||||
"status_code": resp.StatusCode,
|
|
||||||
"content_length": len(result.AnalyzeResult.Content),
|
|
||||||
"page_count": len(result.AnalyzeResult.Pages),
|
|
||||||
"status": result.Status,
|
|
||||||
}).Debug("Poll response received")
|
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
|
||||||
return nil, fmt.Errorf("unexpected status code %d while polling", resp.StatusCode)
|
|
||||||
}
|
|
||||||
|
|
||||||
switch result.Status {
|
|
||||||
case "succeeded":
|
|
||||||
return &result, nil
|
|
||||||
case "failed":
|
|
||||||
return nil, fmt.Errorf("document processing failed")
|
|
||||||
case "running":
|
|
||||||
// Continue polling
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("unexpected status: %s", result.Status)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,222 +0,0 @@
|
||||||
package ocr
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"net/http"
|
|
||||||
"net/http/httptest"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/hashicorp/go-retryablehttp"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestNewAzureProvider(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
config Config
|
|
||||||
wantErr bool
|
|
||||||
errContains string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "valid config",
|
|
||||||
config: Config{
|
|
||||||
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
|
|
||||||
AzureAPIKey: "test-key",
|
|
||||||
},
|
|
||||||
wantErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "valid config with custom model and timeout",
|
|
||||||
config: Config{
|
|
||||||
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
|
|
||||||
AzureAPIKey: "test-key",
|
|
||||||
AzureModelID: "custom-model",
|
|
||||||
AzureTimeout: 60,
|
|
||||||
},
|
|
||||||
wantErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "missing endpoint",
|
|
||||||
config: Config{
|
|
||||||
AzureAPIKey: "test-key",
|
|
||||||
},
|
|
||||||
wantErr: true,
|
|
||||||
errContains: "missing required Azure Document Intelligence configuration",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "missing api key",
|
|
||||||
config: Config{
|
|
||||||
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
|
|
||||||
},
|
|
||||||
wantErr: true,
|
|
||||||
errContains: "missing required Azure Document Intelligence configuration",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
provider, err := newAzureProvider(tt.config)
|
|
||||||
if tt.wantErr {
|
|
||||||
assert.Error(t, err)
|
|
||||||
if tt.errContains != "" {
|
|
||||||
assert.Contains(t, err.Error(), tt.errContains)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.NotNil(t, provider)
|
|
||||||
|
|
||||||
// Verify default values
|
|
||||||
if tt.config.AzureModelID == "" {
|
|
||||||
assert.Equal(t, defaultModelID, provider.modelID)
|
|
||||||
} else {
|
|
||||||
assert.Equal(t, tt.config.AzureModelID, provider.modelID)
|
|
||||||
}
|
|
||||||
|
|
||||||
if tt.config.AzureTimeout == 0 {
|
|
||||||
assert.Equal(t, time.Duration(defaultTimeout)*time.Second, provider.timeout)
|
|
||||||
} else {
|
|
||||||
assert.Equal(t, time.Duration(tt.config.AzureTimeout)*time.Second, provider.timeout)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAzureProvider_ProcessImage(t *testing.T) {
|
|
||||||
// Sample success response
|
|
||||||
now := time.Now()
|
|
||||||
successResult := AzureDocumentResult{
|
|
||||||
Status: "succeeded",
|
|
||||||
CreatedDateTime: now,
|
|
||||||
LastUpdatedDateTime: now,
|
|
||||||
AnalyzeResult: AzureAnalyzeResult{
|
|
||||||
APIVersion: apiVersion,
|
|
||||||
ModelID: defaultModelID,
|
|
||||||
StringIndexType: "utf-16",
|
|
||||||
Content: "Test document content",
|
|
||||||
Pages: []AzurePage{
|
|
||||||
{
|
|
||||||
PageNumber: 1,
|
|
||||||
Angle: 0.0,
|
|
||||||
Width: 800,
|
|
||||||
Height: 600,
|
|
||||||
Unit: "pixel",
|
|
||||||
Lines: []AzureLine{
|
|
||||||
{
|
|
||||||
Content: "Test line",
|
|
||||||
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
|
|
||||||
Spans: []AzureSpan{{Offset: 0, Length: 9}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Spans: []AzureSpan{{Offset: 0, Length: 9}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Paragraphs: []AzureParagraph{
|
|
||||||
{
|
|
||||||
Content: "Test document content",
|
|
||||||
Spans: []AzureSpan{{Offset: 0, Length: 19}},
|
|
||||||
BoundingRegions: []AzureBoundingBox{
|
|
||||||
{
|
|
||||||
PageNumber: 1,
|
|
||||||
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
ContentFormat: "text",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
setupServer func() *httptest.Server
|
|
||||||
imageContent []byte
|
|
||||||
wantErr bool
|
|
||||||
errContains string
|
|
||||||
expectedText string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "successful processing",
|
|
||||||
setupServer: func() *httptest.Server {
|
|
||||||
mux := http.NewServeMux()
|
|
||||||
server := httptest.NewServer(mux)
|
|
||||||
|
|
||||||
mux.HandleFunc("/documentintelligence/documentModels/prebuilt-read:analyze", func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
w.Header().Set("Operation-Location", fmt.Sprintf("%s/operations/123", server.URL))
|
|
||||||
w.WriteHeader(http.StatusAccepted)
|
|
||||||
})
|
|
||||||
|
|
||||||
mux.HandleFunc("/operations/123", func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
json.NewEncoder(w).Encode(successResult)
|
|
||||||
})
|
|
||||||
|
|
||||||
return server
|
|
||||||
},
|
|
||||||
// Create minimal JPEG content with magic numbers
|
|
||||||
imageContent: append([]byte{0xFF, 0xD8, 0xFF, 0xE0}, []byte("JFIF test content")...),
|
|
||||||
expectedText: "Test document content",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "invalid mime type",
|
|
||||||
setupServer: func() *httptest.Server {
|
|
||||||
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
t.Log("Server should not be called with invalid mime type")
|
|
||||||
w.WriteHeader(http.StatusBadRequest)
|
|
||||||
}))
|
|
||||||
},
|
|
||||||
imageContent: []byte("invalid content"),
|
|
||||||
wantErr: true,
|
|
||||||
errContains: "unsupported file type",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "submission error",
|
|
||||||
setupServer: func() *httptest.Server {
|
|
||||||
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
w.WriteHeader(http.StatusBadRequest)
|
|
||||||
fmt.Fprintln(w, "Invalid request")
|
|
||||||
}))
|
|
||||||
},
|
|
||||||
imageContent: []byte{0xFF, 0xD8, 0xFF, 0xE0}, // JPEG magic numbers
|
|
||||||
wantErr: true,
|
|
||||||
errContains: "unexpected status code 400",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
server := tt.setupServer()
|
|
||||||
defer server.Close()
|
|
||||||
|
|
||||||
client := retryablehttp.NewClient()
|
|
||||||
client.HTTPClient = server.Client()
|
|
||||||
client.Logger = log
|
|
||||||
|
|
||||||
provider := &AzureProvider{
|
|
||||||
endpoint: server.URL,
|
|
||||||
apiKey: "test-key",
|
|
||||||
modelID: defaultModelID,
|
|
||||||
timeout: 5 * time.Second,
|
|
||||||
httpClient: client,
|
|
||||||
}
|
|
||||||
|
|
||||||
result, err := provider.ProcessImage(context.Background(), tt.imageContent)
|
|
||||||
if tt.wantErr {
|
|
||||||
assert.Error(t, err)
|
|
||||||
if tt.errContains != "" {
|
|
||||||
assert.Contains(t, err.Error(), tt.errContains)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.NotNil(t, result)
|
|
||||||
assert.Equal(t, tt.expectedText, result.Text)
|
|
||||||
assert.Equal(t, "azure_docai", result.Metadata["provider"])
|
|
||||||
assert.Equal(t, apiVersion, result.Metadata["api_version"])
|
|
||||||
assert.Equal(t, "1", result.Metadata["page_count"])
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,72 +0,0 @@
|
||||||
package ocr
|
|
||||||
|
|
||||||
import "time"
|
|
||||||
|
|
||||||
// AzureDocumentResult represents the root response from Azure Document Intelligence
|
|
||||||
type AzureDocumentResult struct {
|
|
||||||
Status string `json:"status"`
|
|
||||||
CreatedDateTime time.Time `json:"createdDateTime"`
|
|
||||||
LastUpdatedDateTime time.Time `json:"lastUpdatedDateTime"`
|
|
||||||
AnalyzeResult AzureAnalyzeResult `json:"analyzeResult"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// AzureAnalyzeResult represents the analyze result part of the Azure Document Intelligence response
|
|
||||||
type AzureAnalyzeResult struct {
|
|
||||||
APIVersion string `json:"apiVersion"`
|
|
||||||
ModelID string `json:"modelId"`
|
|
||||||
StringIndexType string `json:"stringIndexType"`
|
|
||||||
Content string `json:"content"`
|
|
||||||
Pages []AzurePage `json:"pages"`
|
|
||||||
Paragraphs []AzureParagraph `json:"paragraphs"`
|
|
||||||
Styles []interface{} `json:"styles"`
|
|
||||||
ContentFormat string `json:"contentFormat"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// AzurePage represents a single page in the document
|
|
||||||
type AzurePage struct {
|
|
||||||
PageNumber int `json:"pageNumber"`
|
|
||||||
Angle float64 `json:"angle"`
|
|
||||||
Width int `json:"width"`
|
|
||||||
Height int `json:"height"`
|
|
||||||
Unit string `json:"unit"`
|
|
||||||
Words []AzureWord `json:"words"`
|
|
||||||
Lines []AzureLine `json:"lines"`
|
|
||||||
Spans []AzureSpan `json:"spans"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// AzureWord represents a single word with its properties
|
|
||||||
type AzureWord struct {
|
|
||||||
Content string `json:"content"`
|
|
||||||
Polygon []int `json:"polygon"`
|
|
||||||
Confidence float64 `json:"confidence"`
|
|
||||||
Span AzureSpan `json:"span"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// AzureLine represents a line of text
|
|
||||||
type AzureLine struct {
|
|
||||||
Content string `json:"content"`
|
|
||||||
Polygon []int `json:"polygon"`
|
|
||||||
Spans []AzureSpan `json:"spans"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// AzureSpan represents a span of text with offset and length
|
|
||||||
type AzureSpan struct {
|
|
||||||
Offset int `json:"offset"`
|
|
||||||
Length int `json:"length"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// AzureParagraph represents a paragraph of text
|
|
||||||
type AzureParagraph struct {
|
|
||||||
Content string `json:"content"`
|
|
||||||
Spans []AzureSpan `json:"spans"`
|
|
||||||
BoundingRegions []AzureBoundingBox `json:"boundingRegions"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// AzureBoundingBox represents the location of content on a page
|
|
||||||
type AzureBoundingBox struct {
|
|
||||||
PageNumber int `json:"pageNumber"`
|
|
||||||
Polygon []int `json:"polygon"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// AzureStyle represents style information for text segments - changed to interface{} as per input
|
|
||||||
type AzureStyle interface{}
|
|
|
@ -3,8 +3,6 @@ package ocr
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"html"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
documentai "cloud.google.com/go/documentai/apiv1"
|
documentai "cloud.google.com/go/documentai/apiv1"
|
||||||
"cloud.google.com/go/documentai/apiv1/documentaipb"
|
"cloud.google.com/go/documentai/apiv1/documentaipb"
|
||||||
|
@ -48,7 +46,7 @@ func newGoogleDocAIProvider(config Config) (*GoogleDocAIProvider, error) {
|
||||||
return provider, nil
|
return provider, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
|
func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (string, error) {
|
||||||
logger := log.WithFields(logrus.Fields{
|
logger := log.WithFields(logrus.Fields{
|
||||||
"project_id": p.projectID,
|
"project_id": p.projectID,
|
||||||
"location": p.location,
|
"location": p.location,
|
||||||
|
@ -62,7 +60,7 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
|
||||||
|
|
||||||
if !isImageMIMEType(mtype.String()) {
|
if !isImageMIMEType(mtype.String()) {
|
||||||
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
|
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
|
||||||
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
|
return "", fmt.Errorf("unsupported file type: %s", mtype.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
name := fmt.Sprintf("projects/%s/locations/%s/processors/%s", p.projectID, p.location, p.processorID)
|
name := fmt.Sprintf("projects/%s/locations/%s/processors/%s", p.projectID, p.location, p.processorID)
|
||||||
|
@ -81,56 +79,21 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
|
||||||
resp, err := p.client.ProcessDocument(ctx, req)
|
resp, err := p.client.ProcessDocument(ctx, req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.WithError(err).Error("Failed to process document")
|
logger.WithError(err).Error("Failed to process document")
|
||||||
return nil, fmt.Errorf("error processing document: %w", err)
|
return "", fmt.Errorf("error processing document: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp == nil || resp.Document == nil {
|
if resp == nil || resp.Document == nil {
|
||||||
logger.Error("Received nil response or document from Document AI")
|
logger.Error("Received nil response or document from Document AI")
|
||||||
return nil, fmt.Errorf("received nil response or document from Document AI")
|
return "", fmt.Errorf("received nil response or document from Document AI")
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.Document.Error != nil {
|
if resp.Document.Error != nil {
|
||||||
logger.WithField("error", resp.Document.Error.Message).Error("Document processing error")
|
logger.WithField("error", resp.Document.Error.Message).Error("Document processing error")
|
||||||
return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
|
return "", fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
|
||||||
}
|
}
|
||||||
|
|
||||||
metadata := map[string]string{
|
logger.WithField("content_length", len(resp.Document.Text)).Info("Successfully processed document")
|
||||||
"provider": "google_docai",
|
return resp.Document.Text, nil
|
||||||
"mime_type": mtype.String(),
|
|
||||||
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
|
|
||||||
"processor_id": p.processorID,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Safely add language code if available
|
|
||||||
if pages := resp.Document.GetPages(); len(pages) > 0 {
|
|
||||||
if langs := pages[0].GetDetectedLanguages(); len(langs) > 0 {
|
|
||||||
metadata["lang_code"] = langs[0].GetLanguageCode()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result := &OCRResult{
|
|
||||||
Text: resp.Document.Text,
|
|
||||||
Metadata: metadata,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add hOCR output if available
|
|
||||||
if len(resp.Document.GetPages()) > 0 {
|
|
||||||
var hocr string
|
|
||||||
func() {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
logger.WithField("error", r).Error("Panic during hOCR generation")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
hocr = generateHOCR(resp.Document)
|
|
||||||
}()
|
|
||||||
if hocr != "" {
|
|
||||||
result.HOCR = hocr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.WithField("content_length", len(result.Text)).Info("Successfully processed document")
|
|
||||||
return result, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// isImageMIMEType checks if the given MIME type is a supported image type
|
// isImageMIMEType checks if the given MIME type is a supported image type
|
||||||
|
@ -146,83 +109,6 @@ func isImageMIMEType(mimeType string) bool {
|
||||||
return supportedTypes[mimeType]
|
return supportedTypes[mimeType]
|
||||||
}
|
}
|
||||||
|
|
||||||
// generateHOCR converts Document AI response to hOCR format
|
|
||||||
func generateHOCR(doc *documentaipb.Document) string {
|
|
||||||
if len(doc.GetPages()) == 0 {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
var hocr strings.Builder
|
|
||||||
hocr.WriteString(`<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|
||||||
<head>
|
|
||||||
<title>OCR Output</title>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
|
||||||
<meta name='ocr-system' content='google-docai' />
|
|
||||||
</head>
|
|
||||||
<body>`)
|
|
||||||
|
|
||||||
for pageNum, page := range doc.GetPages() {
|
|
||||||
pageWidth := page.GetDimension().GetWidth()
|
|
||||||
pageHeight := page.GetDimension().GetHeight()
|
|
||||||
// Validate dimensions
|
|
||||||
if pageWidth <= 0 || pageHeight <= 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
hocr.WriteString(fmt.Sprintf(`
|
|
||||||
<div class='ocr_page' id='page_%d' title='image;bbox 0 0 %d %d'>`,
|
|
||||||
pageNum+1, int(pageWidth), int(pageHeight)))
|
|
||||||
|
|
||||||
// Process paragraphs
|
|
||||||
for _, para := range page.GetParagraphs() {
|
|
||||||
paraBox := para.GetLayout().GetBoundingPoly().GetNormalizedVertices()
|
|
||||||
if len(paraBox) < 4 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert normalized coordinates to absolute
|
|
||||||
// Use float64 for intermediate calculations to prevent overflow
|
|
||||||
x1 := int(float64(paraBox[0].GetX()) * float64(pageWidth))
|
|
||||||
y1 := int(float64(paraBox[0].GetY()) * float64(pageHeight))
|
|
||||||
x2 := int(float64(paraBox[2].GetX()) * float64(pageWidth))
|
|
||||||
y2 := int(float64(paraBox[2].GetY()) * float64(pageHeight))
|
|
||||||
|
|
||||||
// Validate coordinates
|
|
||||||
if x1 < 0 || y1 < 0 || x2 < 0 || y2 < 0 ||
|
|
||||||
x1 > int(pageWidth) || y1 > int(pageHeight) ||
|
|
||||||
x2 > int(pageWidth) || y2 > int(pageHeight) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
hocr.WriteString(fmt.Sprintf(`
|
|
||||||
<p class='ocr_par' id='par_%d_%d' title='bbox %d %d %d %d'>`,
|
|
||||||
pageNum+1, len(page.GetParagraphs()), x1, y1, x2, y2))
|
|
||||||
|
|
||||||
// Process words within paragraph
|
|
||||||
for _, token := range para.GetLayout().GetTextAnchor().GetTextSegments() {
|
|
||||||
text := doc.Text[token.GetStartIndex():token.GetEndIndex()]
|
|
||||||
if text == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Escape HTML special characters
|
|
||||||
text = html.EscapeString(text)
|
|
||||||
|
|
||||||
hocr.WriteString(fmt.Sprintf(`
|
|
||||||
<span class='ocrx_word'>%s</span>`, text))
|
|
||||||
}
|
|
||||||
|
|
||||||
hocr.WriteString("\n </p>")
|
|
||||||
}
|
|
||||||
hocr.WriteString("\n </div>")
|
|
||||||
}
|
|
||||||
|
|
||||||
hocr.WriteString("\n</body>\n</html>")
|
|
||||||
return hocr.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close releases resources used by the provider
|
// Close releases resources used by the provider
|
||||||
func (p *GoogleDocAIProvider) Close() error {
|
func (p *GoogleDocAIProvider) Close() error {
|
||||||
if p.client != nil {
|
if p.client != nil {
|
||||||
|
|
|
@ -1,94 +0,0 @@
|
||||||
package ocr
|
|
||||||
|
|
||||||
import (
|
|
||||||
"regexp"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"cloud.google.com/go/documentai/apiv1/documentaipb"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestGenerateHOCR(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
doc *documentaipb.Document
|
|
||||||
expected string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "empty document",
|
|
||||||
doc: &documentaipb.Document{},
|
|
||||||
expected: "",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "single page with one paragraph",
|
|
||||||
doc: &documentaipb.Document{
|
|
||||||
Text: "Hello World",
|
|
||||||
Pages: []*documentaipb.Document_Page{
|
|
||||||
{
|
|
||||||
Dimension: &documentaipb.Document_Page_Dimension{
|
|
||||||
Width: 800,
|
|
||||||
Height: 600,
|
|
||||||
},
|
|
||||||
Paragraphs: []*documentaipb.Document_Page_Paragraph{
|
|
||||||
{
|
|
||||||
Layout: &documentaipb.Document_Page_Layout{
|
|
||||||
BoundingPoly: &documentaipb.BoundingPoly{
|
|
||||||
NormalizedVertices: []*documentaipb.NormalizedVertex{
|
|
||||||
{X: 0.1, Y: 0.1},
|
|
||||||
{X: 0.9, Y: 0.1},
|
|
||||||
{X: 0.9, Y: 0.2},
|
|
||||||
{X: 0.1, Y: 0.2},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
TextAnchor: &documentaipb.Document_TextAnchor{
|
|
||||||
TextSegments: []*documentaipb.Document_TextAnchor_TextSegment{
|
|
||||||
{
|
|
||||||
StartIndex: 0,
|
|
||||||
EndIndex: 11,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expected: "(?s).*<div class='ocr_page' id='page_1' title='image;bbox 0 0 800 600'>.*" +
|
|
||||||
"<p class='ocr_par' id='par_1_1' title='bbox 80 60 719 120'>.*" +
|
|
||||||
"<span class='ocrx_word'>Hello World</span>.*</p>.*</div>.*",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
result := generateHOCR(tt.doc)
|
|
||||||
|
|
||||||
if tt.expected == "" {
|
|
||||||
if result != "" {
|
|
||||||
t.Errorf("expected empty string, got %v", result)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
matched, err := regexp.MatchString(tt.expected, result)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error matching regex: %v", err)
|
|
||||||
}
|
|
||||||
if !matched {
|
|
||||||
t.Errorf("expected to match regex %v\ngot: %v", tt.expected, result)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify basic hOCR structure
|
|
||||||
if !strings.Contains(result, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>") {
|
|
||||||
t.Error("missing XML declaration")
|
|
||||||
}
|
|
||||||
if !strings.Contains(result, "<html xmlns=\"http://www.w3.org/1999/xhtml\"") {
|
|
||||||
t.Error("missing HTML namespace")
|
|
||||||
}
|
|
||||||
if !strings.Contains(result, "<meta name='ocr-system' content='google-docai'") {
|
|
||||||
t.Error("missing OCR system metadata")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -60,7 +60,7 @@ func newLLMProvider(config Config) (*LLMProvider, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
|
func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (string, error) {
|
||||||
logger := log.WithFields(logrus.Fields{
|
logger := log.WithFields(logrus.Fields{
|
||||||
"provider": p.provider,
|
"provider": p.provider,
|
||||||
"model": p.model,
|
"model": p.model,
|
||||||
|
@ -71,7 +71,7 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (*O
|
||||||
img, _, err := image.Decode(bytes.NewReader(imageContent))
|
img, _, err := image.Decode(bytes.NewReader(imageContent))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.WithError(err).Error("Failed to decode image")
|
logger.WithError(err).Error("Failed to decode image")
|
||||||
return nil, fmt.Errorf("error decoding image: %w", err)
|
return "", fmt.Errorf("error decoding image: %w", err)
|
||||||
}
|
}
|
||||||
bounds := img.Bounds()
|
bounds := img.Bounds()
|
||||||
logger.WithFields(logrus.Fields{
|
logger.WithFields(logrus.Fields{
|
||||||
|
@ -106,18 +106,11 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (*O
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.WithError(err).Error("Failed to get response from vision model")
|
logger.WithError(err).Error("Failed to get response from vision model")
|
||||||
return nil, fmt.Errorf("error getting response from LLM: %w", err)
|
return "", fmt.Errorf("error getting response from LLM: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
result := &OCRResult{
|
logger.WithField("content_length", len(completion.Choices[0].Content)).Info("Successfully processed image")
|
||||||
Text: completion.Choices[0].Content,
|
return completion.Choices[0].Content, nil
|
||||||
Metadata: map[string]string{
|
|
||||||
"provider": p.provider,
|
|
||||||
"model": p.model,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
logger.WithField("content_length", len(result.Text)).Info("Successfully processed image")
|
|
||||||
return result, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// createOpenAIClient creates a new OpenAI vision model client
|
// createOpenAIClient creates a new OpenAI vision model client
|
||||||
|
|
|
@ -9,26 +9,14 @@ import (
|
||||||
|
|
||||||
var log = logrus.New()
|
var log = logrus.New()
|
||||||
|
|
||||||
// OCRResult holds the output from OCR processing
|
|
||||||
type OCRResult struct {
|
|
||||||
// Plain text output (required)
|
|
||||||
Text string
|
|
||||||
|
|
||||||
// hOCR output (optional, if provider supports it)
|
|
||||||
HOCR string
|
|
||||||
|
|
||||||
// Additional provider-specific metadata
|
|
||||||
Metadata map[string]string
|
|
||||||
}
|
|
||||||
|
|
||||||
// Provider defines the interface for OCR processing
|
// Provider defines the interface for OCR processing
|
||||||
type Provider interface {
|
type Provider interface {
|
||||||
ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error)
|
ProcessImage(ctx context.Context, imageContent []byte) (string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Config holds the OCR provider configuration
|
// Config holds the OCR provider configuration
|
||||||
type Config struct {
|
type Config struct {
|
||||||
// Provider type (e.g., "llm", "google_docai", "azure")
|
// Provider type (e.g., "llm", "google_docai")
|
||||||
Provider string
|
Provider string
|
||||||
|
|
||||||
// Google Document AI settings
|
// Google Document AI settings
|
||||||
|
@ -39,15 +27,6 @@ type Config struct {
|
||||||
// LLM settings (from existing config)
|
// LLM settings (from existing config)
|
||||||
VisionLLMProvider string
|
VisionLLMProvider string
|
||||||
VisionLLMModel string
|
VisionLLMModel string
|
||||||
|
|
||||||
// Azure Document Intelligence settings
|
|
||||||
AzureEndpoint string
|
|
||||||
AzureAPIKey string
|
|
||||||
AzureModelID string // Optional, defaults to "prebuilt-read"
|
|
||||||
AzureTimeout int // Optional, defaults to 120 seconds
|
|
||||||
|
|
||||||
// OCR output options
|
|
||||||
EnableHOCR bool // Whether to request hOCR output if supported by the provider
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewProvider creates a new OCR provider based on configuration
|
// NewProvider creates a new OCR provider based on configuration
|
||||||
|
@ -75,12 +54,6 @@ func NewProvider(config Config) (Provider, error) {
|
||||||
}).Info("Using LLM OCR provider")
|
}).Info("Using LLM OCR provider")
|
||||||
return newLLMProvider(config)
|
return newLLMProvider(config)
|
||||||
|
|
||||||
case "azure":
|
|
||||||
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
|
|
||||||
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
|
|
||||||
}
|
|
||||||
return newAzureProvider(config)
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
|
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
|
||||||
}
|
}
|
||||||
|
|
100
paperless.go
100
paperless.go
|
@ -3,13 +3,11 @@ package main
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"crypto/tls"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"image/jpeg"
|
"image/jpeg"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"slices"
|
"slices"
|
||||||
|
@ -18,7 +16,6 @@ import (
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/gen2brain/go-fitz"
|
"github.com/gen2brain/go-fitz"
|
||||||
"github.com/sirupsen/logrus"
|
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
)
|
)
|
||||||
|
@ -61,18 +58,10 @@ func hasSameTags(original, suggested []string) bool {
|
||||||
func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient {
|
func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient {
|
||||||
cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR")
|
cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR")
|
||||||
|
|
||||||
// Create a custom HTTP transport with TLS configuration
|
|
||||||
tr := &http.Transport{
|
|
||||||
TLSClientConfig: &tls.Config{
|
|
||||||
InsecureSkipVerify: paperlessInsecureSkipVerify,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
httpClient := &http.Client{Transport: tr}
|
|
||||||
|
|
||||||
return &PaperlessClient{
|
return &PaperlessClient{
|
||||||
BaseURL: strings.TrimRight(baseURL, "/"),
|
BaseURL: strings.TrimRight(baseURL, "/"),
|
||||||
APIToken: apiToken,
|
APIToken: apiToken,
|
||||||
HTTPClient: httpClient,
|
HTTPClient: &http.Client{},
|
||||||
CacheFolder: cacheFolder,
|
CacheFolder: cacheFolder,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -91,52 +80,7 @@ func (client *PaperlessClient) Do(ctx context.Context, method, path string, body
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
}
|
}
|
||||||
|
|
||||||
log.WithFields(logrus.Fields{
|
return client.HTTPClient.Do(req)
|
||||||
"method": method,
|
|
||||||
"url": url,
|
|
||||||
}).Debug("Making HTTP request")
|
|
||||||
|
|
||||||
resp, err := client.HTTPClient.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
log.WithError(err).WithFields(logrus.Fields{
|
|
||||||
"url": url,
|
|
||||||
"method": method,
|
|
||||||
"error": err,
|
|
||||||
}).Error("HTTP request failed")
|
|
||||||
return nil, fmt.Errorf("HTTP request failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if response is HTML instead of JSON for API endpoints
|
|
||||||
if strings.HasPrefix(path, "api/") {
|
|
||||||
contentType := resp.Header.Get("Content-Type")
|
|
||||||
if strings.Contains(contentType, "text/html") {
|
|
||||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
|
||||||
resp.Body.Close()
|
|
||||||
|
|
||||||
// Create a new response with the same body for the caller
|
|
||||||
resp = &http.Response{
|
|
||||||
Status: resp.Status,
|
|
||||||
StatusCode: resp.StatusCode,
|
|
||||||
Header: resp.Header,
|
|
||||||
Body: io.NopCloser(bytes.NewBuffer(bodyBytes)),
|
|
||||||
}
|
|
||||||
|
|
||||||
log.WithFields(logrus.Fields{
|
|
||||||
"url": url,
|
|
||||||
"method": method,
|
|
||||||
"content-type": contentType,
|
|
||||||
"status-code": resp.StatusCode,
|
|
||||||
"response": string(bodyBytes),
|
|
||||||
"base-url": client.BaseURL,
|
|
||||||
"request-path": path,
|
|
||||||
"full-headers": resp.Header,
|
|
||||||
}).Error("Received HTML response for API request")
|
|
||||||
|
|
||||||
return nil, fmt.Errorf("received HTML response instead of JSON (status: %d). This often indicates an SSL/TLS issue or invalid authentication. Check your PAPERLESS_URL, PAPERLESS_TOKEN and PAPERLESS_INSECURE_SKIP_VERIFY settings. Full response: %s", resp.StatusCode, string(bodyBytes))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return resp, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetAllTags retrieves all tags from the Paperless-NGX API
|
// GetAllTags retrieves all tags from the Paperless-NGX API
|
||||||
|
@ -176,19 +120,10 @@ func (client *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int,
|
||||||
// Extract relative path from the Next URL
|
// Extract relative path from the Next URL
|
||||||
if tagsResponse.Next != "" {
|
if tagsResponse.Next != "" {
|
||||||
nextURL := tagsResponse.Next
|
nextURL := tagsResponse.Next
|
||||||
if strings.HasPrefix(nextURL, "http") {
|
if strings.HasPrefix(nextURL, client.BaseURL) {
|
||||||
// Extract just the path portion from the full URL
|
nextURL = strings.TrimPrefix(nextURL, client.BaseURL+"/")
|
||||||
if parsedURL, err := url.Parse(nextURL); err == nil {
|
|
||||||
path = strings.TrimPrefix(parsedURL.Path, "/")
|
|
||||||
if parsedURL.RawQuery != "" {
|
|
||||||
path += "?" + parsedURL.RawQuery
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return nil, fmt.Errorf("failed to parse next URL: %v", err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
path = strings.TrimPrefix(nextURL, "/")
|
|
||||||
}
|
}
|
||||||
|
path = nextURL
|
||||||
} else {
|
} else {
|
||||||
path = ""
|
path = ""
|
||||||
}
|
}
|
||||||
|
@ -208,34 +143,19 @@ func (client *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []st
|
||||||
|
|
||||||
resp, err := client.Do(ctx, "GET", path, nil)
|
resp, err := client.Do(ctx, "GET", path, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("HTTP request failed in GetDocumentsByTags: %w", err)
|
return nil, err
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
// Read the response body
|
|
||||||
bodyBytes, err := io.ReadAll(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
log.WithFields(logrus.Fields{
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||||
"status_code": resp.StatusCode,
|
return nil, fmt.Errorf("error searching documents: %d, %s", resp.StatusCode, string(bodyBytes))
|
||||||
"path": path,
|
|
||||||
"response": string(bodyBytes),
|
|
||||||
"headers": resp.Header,
|
|
||||||
}).Error("Error response from server in GetDocumentsByTags")
|
|
||||||
return nil, fmt.Errorf("error searching documents: status=%d, body=%s", resp.StatusCode, string(bodyBytes))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var documentsResponse GetDocumentsApiResponse
|
var documentsResponse GetDocumentsApiResponse
|
||||||
err = json.Unmarshal(bodyBytes, &documentsResponse)
|
err = json.NewDecoder(resp.Body).Decode(&documentsResponse)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.WithFields(logrus.Fields{
|
return nil, err
|
||||||
"response_body": string(bodyBytes),
|
|
||||||
"error": err,
|
|
||||||
}).Error("Failed to parse JSON response in GetDocumentsByTags")
|
|
||||||
return nil, fmt.Errorf("failed to parse JSON response: %w", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
allTags, err := client.GetAllTags(ctx)
|
allTags, err := client.GetAllTags(ctx)
|
||||||
|
|
553
web-app/package-lock.json
generated
553
web-app/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -43,7 +43,7 @@
|
||||||
"eslint": "^9.9.0",
|
"eslint": "^9.9.0",
|
||||||
"eslint-plugin-react-hooks": "^5.1.0-rc.0",
|
"eslint-plugin-react-hooks": "^5.1.0-rc.0",
|
||||||
"eslint-plugin-react-refresh": "^0.4.9",
|
"eslint-plugin-react-refresh": "^0.4.9",
|
||||||
"globals": "^16.0.0",
|
"globals": "^15.9.0",
|
||||||
"node-fetch": "^3.3.0",
|
"node-fetch": "^3.3.0",
|
||||||
"postcss": "^8.4.47",
|
"postcss": "^8.4.47",
|
||||||
"tailwindcss": "^3.4.12",
|
"tailwindcss": "^3.4.12",
|
||||||
|
|
Loading…
Reference in a new issue