Compare commits

...

32 commits

Author SHA1 Message Date
renovate[bot]
53bfc7d252
fix(deps): update module google.golang.org/api to v0.225.0 (#284)
Some checks failed
Build and Push Docker Images / test (push) Has been cancelled
Build and Push Docker Images / build-amd64 (push) Has been cancelled
Build and Push Docker Images / build-arm64 (push) Has been cancelled
Build and Push Docker Images / merge-manifests (push) Has been cancelled
Build and Push Docker Images / E2E Tests (push) Has been cancelled
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-12 09:47:02 +01:00
renovate[bot]
14567480a8
fix(deps): update dependency axios to v1.8.3 (#285)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-12 09:03:30 +01:00
renovate[bot]
72a23ac51e
fix(deps): update dependency react-router-dom to v7.3.0 (#273)
Some checks failed
Build and Push Docker Images / test (push) Has been cancelled
Build and Push Docker Images / build-amd64 (push) Has been cancelled
Build and Push Docker Images / build-arm64 (push) Has been cancelled
Build and Push Docker Images / merge-manifests (push) Has been cancelled
Build and Push Docker Images / E2E Tests (push) Has been cancelled
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-10 12:13:21 +01:00
renovate[bot]
60c141e815
chore(deps): update eslint monorepo to v9.22.0 (#277)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-10 12:13:10 +01:00
renovate[bot]
3788abc9cb
fix(deps): update dependency axios to v1.8.2 (#275)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-10 12:13:01 +01:00
renovate[bot]
d477e26048
fix(deps): update module google.golang.org/api to v0.224.0 (#276)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-10 12:12:47 +01:00
renovate[bot]
184c1a8600
chore(deps): update dependency autoprefixer to v10.4.21 (#278)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-10 12:12:37 +01:00
Icereed
cbd9c5438c
feat(ocr): add support for Azure Document Intelligence provider (#279) 2025-03-10 10:51:56 +01:00
Icereed
360663b05b
feat(ocr): enhance OCR processing with structured results and hOCR su… (#212)
* feat(ocr): enhance OCR processing with structured results and hOCR support

* Update ocr/google_docai_provider.go

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* Update ocr/google_docai_provider_test.go

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* refactor(tests): remove unused context import from google_docai_provider_test.go

* refactor: Add defensive checks for language code in Google DocAI provider (#226)

* Update ocr/google_docai_provider.go

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* Update ocr/google_docai_provider.go

Co-authored-by: gardar <gardar@users.noreply.github.com>

---------

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
Co-authored-by: mkrinke <mad.krinke@googlemail.com>
Co-authored-by: gardar <gardar@users.noreply.github.com>
2025-03-10 08:43:50 +00:00
renovate[bot]
7c7449e197
chore(deps): update dependency go to v1.24.1 (#269)
Some checks failed
Build and Push Docker Images / test (push) Has been cancelled
Build and Push Docker Images / build-amd64 (push) Has been cancelled
Build and Push Docker Images / build-arm64 (push) Has been cancelled
Build and Push Docker Images / merge-manifests (push) Has been cancelled
Build and Push Docker Images / E2E Tests (push) Has been cancelled
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-07 21:43:26 +01:00
renovate[bot]
a5a5afe276
chore(deps): update golang docker tag to v1.24.1 (#270)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-07 21:43:05 +01:00
renovate[bot]
3bb1415b25
fix(deps): update module golang.org/x/sync to v0.12.0 (#274)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-07 21:42:40 +01:00
renovate[bot]
df396dac78
chore(deps): update dependency vite to v6.2.1 (#271)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-07 21:42:12 +01:00
renovate[bot]
c514914b4e
chore(deps): update dependency @playwright/test to v1.51.0 (#272)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-07 21:41:45 +01:00
renovate[bot]
5ac6bb7532
fix(deps): update module google.golang.org/api to v0.223.0 (#264)
Some checks failed
Build and Push Docker Images / test (push) Has been cancelled
Build and Push Docker Images / build-amd64 (push) Has been cancelled
Build and Push Docker Images / build-arm64 (push) Has been cancelled
Build and Push Docker Images / merge-manifests (push) Has been cancelled
Build and Push Docker Images / E2E Tests (push) Has been cancelled
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-03 09:26:25 +01:00
renovate[bot]
6c03fca89c
fix(deps): update dependency axios to v1.8.1 (#265)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-27 06:27:50 +00:00
renovate[bot]
0b5b367b0a
chore(deps): update eslint monorepo to v9.21.0 (#259)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-25 20:58:59 +01:00
renovate[bot]
9b0204180f
chore(deps): update dependency vite to v6.2.0 (#263)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-25 20:58:34 +01:00
renovate[bot]
5c6f50a1a3
chore(deps): update dependency postcss to v8.5.3 (#255)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-21 16:25:50 +01:00
renovate[bot]
74e4419b7c
fix(deps): update module google.golang.org/api to v0.222.0 (#256)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-21 16:25:41 +01:00
renovate[bot]
fc1d69a93b
chore(deps): update dependency globals to v16 (#257)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-20 21:37:29 +01:00
renovate[bot]
1d6cea481f
fix(deps): update dependency react-icons to v5.5.0 (#251)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-19 13:09:28 +01:00
renovate[bot]
c95487e834
chore(deps): update dependency vite to v6.1.1 (#253)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-19 13:09:15 +01:00
renovate[bot]
3bcee9c1c5
fix(deps): update dependency react-router-dom to v7.2.0 (#250)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-19 07:07:54 +01:00
renovate[bot]
ad74e28473
chore(deps): update dependency typescript-eslint to v8.24.1 (#249)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 19:42:01 +01:00
Icereed
b5fb1cb040
Potential fix for code scanning alert no. 3: Clear-text logging of sensitive information (#247)
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2025-02-17 12:05:00 +01:00
renovate[bot]
992b3b824b
chore(deps): update dependency postcss to v8.5.2 (#220)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:54:04 +01:00
renovate[bot]
474fde659c
chore(deps): update dependency @types/node to v22.13.4 (#238)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:53:54 +01:00
renovate[bot]
80365f95a0
chore(deps): update dependency go to v1.24.0 (#225)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:53:42 +01:00
renovate[bot]
bfe1e00392
chore(deps): update alpine docker tag to v3.21.3 (#240)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:53:33 +01:00
renovate[bot]
c1b4f8344f
chore(deps): update react monorepo to v19.0.10 (#242)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:53:24 +01:00
renovate[bot]
b6b8948fe3
fix(deps): update module github.com/tmc/langchaingo to v0.1.13 (#243)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:53:10 +01:00
17 changed files with 1343 additions and 413 deletions

View file

@ -25,7 +25,7 @@ COPY web-app /app/
RUN npm run build RUN npm run build
# Stage 2: Build the Go binary # Stage 2: Build the Go binary
FROM golang:1.24.0-alpine3.21 AS builder FROM golang:1.24.1-alpine3.21 AS builder
# Set the working directory inside the container # Set the working directory inside the container
WORKDIR /app WORKDIR /app
@ -82,7 +82,7 @@ RUN sed -i \
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt . RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
# Stage 3: Create a lightweight image with just the binary # Stage 3: Create a lightweight image with just the binary
FROM alpine:3.21.2 FROM alpine:3.21.3
ENV GIN_MODE=release ENV GIN_MODE=release

112
README.md
View file

@ -22,7 +22,7 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
- **LLM OCR**: Use OpenAI or Ollama to extract text from images. - **LLM OCR**: Use OpenAI or Ollama to extract text from images.
- **Google Document AI**: Leverage Google's powerful Document AI for OCR tasks. - **Google Document AI**: Leverage Google's powerful Document AI for OCR tasks.
- **More to come**: Stay tuned for more OCR providers! - **Azure Document Intelligence**: Use Microsoft's enterprise OCR solution.
3. **Automatic Title & Tag Generation** 3. **Automatic Title & Tag Generation**
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed. No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
@ -39,11 +39,11 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows. - **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
7. **Simple Docker Deployment** 7. **Simple Docker Deployment**
A few environment variables, and youre off! Compose it alongside paperless-ngx with minimal fuss. A few environment variables, and you're off! Compose it alongside paperless-ngx with minimal fuss.
8. **Unified Web UI** 8. **Unified Web UI**
- **Manual Review**: Approve or tweak AIs suggestions. - **Manual Review**: Approve or tweak AI's suggestions.
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you. - **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
--- ---
@ -56,6 +56,12 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
- [Installation](#installation) - [Installation](#installation)
- [Docker Compose](#docker-compose) - [Docker Compose](#docker-compose)
- [Manual Setup](#manual-setup) - [Manual Setup](#manual-setup)
- [OCR Providers](#ocr-providers)
- [LLM-based OCR](#1-llm-based-ocr-default)
- [Azure Document Intelligence](#2-azure-document-intelligence)
- [Google Document AI](#3-google-document-ai)
- [Comparing OCR Providers](#comparing-ocr-providers)
- [Choosing the Right Provider](#choosing-the-right-provider)
- [Configuration](#configuration) - [Configuration](#configuration)
- [Environment Variables](#environment-variables) - [Environment Variables](#environment-variables)
- [Custom Prompt Templates](#custom-prompt-templates) - [Custom Prompt Templates](#custom-prompt-templates)
@ -86,7 +92,7 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
#### Docker Compose #### Docker Compose
Heres an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx: Here's an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
```yaml ```yaml
services: services:
@ -124,6 +130,13 @@ services:
# GOOGLE_PROCESSOR_ID: 'processor-id' # Your processor ID # GOOGLE_PROCESSOR_ID: 'processor-id' # Your processor ID
# GOOGLE_APPLICATION_CREDENTIALS: '/app/credentials.json' # Path to service account key # GOOGLE_APPLICATION_CREDENTIALS: '/app/credentials.json' # Path to service account key
# Option 3: Azure Document Intelligence
# OCR_PROVIDER: 'azure' # Use Azure Document Intelligence
# AZURE_DOCAI_ENDPOINT: 'your-endpoint' # Your Azure endpoint URL
# AZURE_DOCAI_KEY: 'your-key' # Your Azure API key
# AZURE_DOCAI_MODEL_ID: 'prebuilt-read' # Optional, defaults to prebuilt-read
# AZURE_DOCAI_TIMEOUT_SECONDS: '120' # Optional, defaults to 120 seconds
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto
OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit. OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit.
LOG_LEVEL: "info" # Optional: debug, warn, error LOG_LEVEL: "info" # Optional: debug, warn, error
@ -172,6 +185,63 @@ services:
``` ```
--- ---
## OCR Providers
paperless-gpt supports three different OCR providers, each with unique strengths and capabilities:
### 1. LLM-based OCR (Default)
- **Key Features**:
- Uses vision-capable LLMs like GPT-4V or MiniCPM-V
- High accuracy with complex layouts and difficult scans
- Context-aware text recognition
- Self-correcting capabilities for OCR errors
- **Best For**:
- Complex or unusual document layouts
- Poor quality scans
- Documents with mixed languages
- **Configuration**:
```yaml
OCR_PROVIDER: "llm"
VISION_LLM_PROVIDER: "openai" # or "ollama"
VISION_LLM_MODEL: "gpt-4v" # or "minicpm-v"
```
### 2. Azure Document Intelligence
- **Key Features**:
- Enterprise-grade OCR solution
- Prebuilt models for common document types
- Layout preservation and table detection
- Fast processing speeds
- **Best For**:
- Business documents and forms
- High-volume processing
- Documents requiring layout analysis
- **Configuration**:
```yaml
OCR_PROVIDER: "azure"
AZURE_DOCAI_ENDPOINT: "https://your-endpoint.cognitiveservices.azure.com/"
AZURE_DOCAI_KEY: "your-key"
AZURE_DOCAI_MODEL_ID: "prebuilt-read" # optional
AZURE_DOCAI_TIMEOUT_SECONDS: "120" # optional
```
### 3. Google Document AI
- **Key Features**:
- Specialized document processors
- Strong form field detection
- Multi-language support
- High accuracy on structured documents
- **Best For**:
- Forms and structured documents
- Documents with tables
- Multi-language documents
- **Configuration**:
```yaml
OCR_PROVIDER: "google_docai"
GOOGLE_PROJECT_ID: "your-project"
GOOGLE_LOCATION: "us"
GOOGLE_PROCESSOR_ID: "processor-id"
```
## Configuration ## Configuration
@ -192,9 +262,13 @@ services:
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No | | | `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No | |
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). | No | English | | `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). | No | English |
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No | | | `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No | |
| `OCR_PROVIDER` | OCR provider to use (`llm` or `google_docai`). | No | llm | | `OCR_PROVIDER` | OCR provider to use (`llm`, `azure`, or `google_docai`). | No | llm |
| `VISION_LLM_PROVIDER` | AI backend for LLM OCR (`openai` or `ollama`). Required if OCR_PROVIDER is `llm`. | Cond. | | | `VISION_LLM_PROVIDER` | AI backend for LLM OCR (`openai` or `ollama`). Required if OCR_PROVIDER is `llm`. | Cond. | |
| `VISION_LLM_MODEL` | Model name for LLM OCR (e.g. `minicpm-v`). Required if OCR_PROVIDER is `llm`. | Cond. | | | `VISION_LLM_MODEL` | Model name for LLM OCR (e.g. `minicpm-v`). Required if OCR_PROVIDER is `llm`. | Cond. | |
| `AZURE_DOCAI_ENDPOINT` | Azure Document Intelligence endpoint. Required if OCR_PROVIDER is `azure`. | Cond. | |
| `AZURE_DOCAI_KEY` | Azure Document Intelligence API key. Required if OCR_PROVIDER is `azure`. | Cond. | |
| `AZURE_DOCAI_MODEL_ID` | Azure Document Intelligence model ID. Optional if using `azure` provider. | No | prebuilt-read |
| `AZURE_DOCAI_TIMEOUT_SECONDS` | Azure Document Intelligence timeout in seconds. | No | 120 |
| `GOOGLE_PROJECT_ID` | Google Cloud project ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | | | `GOOGLE_PROJECT_ID` | Google Cloud project ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
| `GOOGLE_LOCATION` | Google Cloud region (e.g. `us`, `eu`). Required if OCR_PROVIDER is `google_docai`. | Cond. | | | `GOOGLE_LOCATION` | Google Cloud region (e.g. `us`, `eu`). Required if OCR_PROVIDER is `google_docai`. | Cond. | |
| `GOOGLE_PROCESSOR_ID` | Document AI processor ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | | | `GOOGLE_PROCESSOR_ID` | Document AI processor ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
@ -211,7 +285,7 @@ services:
### Custom Prompt Templates ### Custom Prompt Templates
paperless-gpts flexible **prompt templates** let you shape how AI responds: paperless-gpt's flexible **prompt templates** let you shape how AI responds:
1. **`title_prompt.tmpl`**: For document titles. 1. **`title_prompt.tmpl`**: For document titles.
2. **`tag_prompt.tmpl`**: For tagging logic. 2. **`tag_prompt.tmpl`**: For tagging logic.
@ -232,13 +306,11 @@ Then tweak at will—**paperless-gpt** reloads them automatically on startup!
Each template has access to specific variables: Each template has access to specific variables:
**title_prompt.tmpl**: **title_prompt.tmpl**:
- `{{.Language}}` - Target language (e.g., "English") - `{{.Language}}` - Target language (e.g., "English")
- `{{.Content}}` - Document content text - `{{.Content}}` - Document content text
- `{{.Title}}` - Original document title - `{{.Title}}` - Original document title
**tag_prompt.tmpl**: **tag_prompt.tmpl**:
- `{{.Language}}` - Target language - `{{.Language}}` - Target language
- `{{.AvailableTags}}` - List of existing tags in paperless-ngx - `{{.AvailableTags}}` - List of existing tags in paperless-ngx
- `{{.OriginalTags}}` - Document's current tags - `{{.OriginalTags}}` - Document's current tags
@ -246,11 +318,9 @@ Each template has access to specific variables:
- `{{.Content}}` - Document content text - `{{.Content}}` - Document content text
**ocr_prompt.tmpl**: **ocr_prompt.tmpl**:
- `{{.Language}}` - Target language - `{{.Language}}` - Target language
**correspondent_prompt.tmpl**: **correspondent_prompt.tmpl**:
- `{{.Language}}` - Target language - `{{.Language}}` - Target language
- `{{.AvailableCorrespondents}}` - List of existing correspondents - `{{.AvailableCorrespondents}}` - List of existing correspondents
- `{{.BlackList}}` - List of blacklisted correspondent names - `{{.BlackList}}` - List of blacklisted correspondent names
@ -265,23 +335,25 @@ The templates use Go's text/template syntax. paperless-gpt automatically reloads
1. **Tag Documents** 1. **Tag Documents**
- Add `paperless-gpt` or your custom tag to the docs you want to AI-ify. - Add `paperless-gpt` tag to documents for manual processing
- Add `paperless-gpt-auto` for automatic processing
- Add `paperless-gpt-ocr-auto` for automatic OCR processing
2. **Visit Web UI** 2. **Visit Web UI**
- Go to `http://localhost:8080` (or your host) in your browser. - Go to `http://localhost:8080` (or your host) in your browser
- Review documents tagged for processing
3. **Generate & Apply Suggestions** 3. **Generate & Apply Suggestions**
- Click “Generate Suggestions” to see AI-proposed titles/tags/correspondents. - Click "Generate Suggestions" to see AI-proposed titles/tags/correspondents
- Approve, edit, or discard. Hit “Apply” to finalize in paperless-ngx. - Review and approve or edit suggestions
- Click "Apply" to save changes to paperless-ngx
4. **Try LLM-Based OCR (Experimental)**
- If you enabled `VISION_LLM_PROVIDER` and `VISION_LLM_MODEL`, let AI-based OCR read your scanned PDFs.
- Tag those documents with `paperless-gpt-ocr-auto` (or your custom `AUTO_OCR_TAG`).
**Tip**: The entire pipeline can be **fully automated** if you prefer minimal manual intervention.
4. **OCR Processing**
- Tag documents with appropriate OCR tag to process them
- Monitor progress in the Web UI
- Review results and apply changes
--- ---
## LLM-Based OCR: Compare for Yourself ## LLM-Based OCR: Compare for Yourself

View file

@ -92,3 +92,88 @@
- E2E tests for web interface - E2E tests for web interface
- Test fixtures and mocks - Test fixtures and mocks
- Playwright for frontend testing - Playwright for frontend testing
## OCR System Patterns
### OCR Provider Architecture
#### 1. Provider Interface
- Common interface for all OCR implementations
- Methods for image processing
- Configuration through standardized Config struct
- Resource management patterns
#### 2. LLM Provider Implementation
- Supports OpenAI and Ollama vision models
- Base64 encoding for OpenAI requests
- Binary format for Ollama requests
- Template-based OCR prompts
#### 3. Google Document AI Provider
- Enterprise-grade OCR processing
- MIME type validation
- Processor configuration via environment
- Regional endpoint support
### Logging Patterns
#### 1. Provider Initialization
```
[INFO] Initializing OCR provider: llm
[INFO] Using LLM OCR provider (provider=ollama, model=minicpm-v)
```
#### 2. Processing Logs
```
[DEBUG] Starting OCR processing
[DEBUG] Image dimensions (width=800, height=1200)
[DEBUG] Using binary image format for non-OpenAI provider
[DEBUG] Sending request to vision model
[INFO] Successfully processed image (content_length=1536)
```
#### 3. Error Logging
```
[ERROR] Failed to decode image: invalid format
[ERROR] Unsupported file type: image/webp
[ERROR] Failed to get response from vision model
```
### Error Handling Patterns
#### 1. Configuration Validation
- Required parameter checks
- Environment variable validation
- Provider-specific configuration
- Connection testing
#### 2. Processing Errors
- Image format validation
- MIME type checking
- Content processing errors
- Provider-specific error handling
#### 3. Error Propagation
- Detailed error contexts
- Original error wrapping
- Logging with error context
- Recovery mechanisms
### Processing Flow
#### 1. Document Processing
```
Document Tagged → OCR Provider Selected → Image Processing → Text Extraction → Content Update
```
#### 2. Provider Selection
```
Config Check → Provider Initialization → Resource Setup → Provider Ready
```
#### 3. Error Recovery
```
Error Detection → Logging → Cleanup → Error Propagation
```
These patterns ensure consistent behavior across OCR providers while maintaining proper logging and error handling throughout the system.

36
go.mod
View file

@ -1,8 +1,8 @@
module paperless-gpt module paperless-gpt
go 1.22.7 go 1.23.0
toolchain go1.23.6 toolchain go1.24.1
require ( require (
cloud.google.com/go/documentai v1.35.2 cloud.google.com/go/documentai v1.35.2
@ -12,18 +12,19 @@ require (
github.com/gen2brain/go-fitz v1.24.14 github.com/gen2brain/go-fitz v1.24.14
github.com/gin-gonic/gin v1.10.0 github.com/gin-gonic/gin v1.10.0
github.com/google/uuid v1.6.0 github.com/google/uuid v1.6.0
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/sirupsen/logrus v1.9.3 github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.10.0 github.com/stretchr/testify v1.10.0
github.com/tmc/langchaingo v0.1.13-pre.1 github.com/tmc/langchaingo v0.1.13
golang.org/x/sync v0.11.0 golang.org/x/sync v0.12.0
google.golang.org/api v0.221.0 google.golang.org/api v0.225.0
gorm.io/driver/sqlite v1.5.7 gorm.io/driver/sqlite v1.5.7
gorm.io/gorm v1.25.12 gorm.io/gorm v1.25.12
) )
require ( require (
cloud.google.com/go v0.118.1 // indirect cloud.google.com/go v0.118.1 // indirect
cloud.google.com/go/auth v0.14.1 // indirect cloud.google.com/go/auth v0.15.0 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
cloud.google.com/go/compute/metadata v0.6.0 // indirect cloud.google.com/go/compute/metadata v0.6.0 // indirect
cloud.google.com/go/longrunning v0.6.4 // indirect cloud.google.com/go/longrunning v0.6.4 // indirect
@ -46,8 +47,9 @@ require (
github.com/go-playground/validator/v10 v10.20.0 // indirect github.com/go-playground/validator/v10 v10.20.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect github.com/goccy/go-json v0.10.2 // indirect
github.com/google/s2a-go v0.1.9 // indirect github.com/google/s2a-go v0.1.9 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.5 // indirect
github.com/googleapis/gax-go/v2 v2.14.1 // indirect github.com/googleapis/gax-go/v2 v2.14.1 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/huandu/xstrings v1.5.0 // indirect github.com/huandu/xstrings v1.5.0 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect github.com/jinzhu/now v1.1.5 // indirect
@ -75,22 +77,22 @@ require (
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
go.opentelemetry.io/otel v1.34.0 // indirect go.opentelemetry.io/otel v1.34.0 // indirect
go.opentelemetry.io/otel/metric v1.34.0 // indirect go.opentelemetry.io/otel/metric v1.34.0 // indirect
go.opentelemetry.io/otel/trace v1.34.0 // indirect go.opentelemetry.io/otel/trace v1.34.0 // indirect
golang.org/x/arch v0.8.0 // indirect golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.33.0 // indirect golang.org/x/crypto v0.36.0 // indirect
golang.org/x/net v0.35.0 // indirect golang.org/x/net v0.37.0 // indirect
golang.org/x/oauth2 v0.26.0 // indirect golang.org/x/oauth2 v0.28.0 // indirect
golang.org/x/sys v0.30.0 // indirect golang.org/x/sys v0.31.0 // indirect
golang.org/x/text v0.22.0 // indirect golang.org/x/text v0.23.0 // indirect
golang.org/x/time v0.10.0 // indirect golang.org/x/time v0.11.0 // indirect
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 // indirect google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect
google.golang.org/grpc v1.70.0 // indirect google.golang.org/grpc v1.71.0 // indirect
google.golang.org/protobuf v1.36.5 // indirect google.golang.org/protobuf v1.36.5 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect
) )

147
go.sum
View file

@ -1,23 +1,13 @@
cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE=
cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U=
cloud.google.com/go v0.118.1 h1:b8RATMcrK9A4BH0rj8yQupPXp+aP+cJ0l6H7V9osV1E= cloud.google.com/go v0.118.1 h1:b8RATMcrK9A4BH0rj8yQupPXp+aP+cJ0l6H7V9osV1E=
cloud.google.com/go v0.118.1/go.mod h1:CFO4UPEPi8oV21xoezZCrd3d81K4fFkDTEJu4R8K+9M= cloud.google.com/go v0.118.1/go.mod h1:CFO4UPEPi8oV21xoezZCrd3d81K4fFkDTEJu4R8K+9M=
cloud.google.com/go/auth v0.13.0 h1:8Fu8TZy167JkW8Tj3q7dIkr2v4cndv41ouecJx0PAHs= cloud.google.com/go/auth v0.15.0 h1:Ly0u4aA5vG/fsSsxu98qCQBemXtAtJf+95z9HK+cxps=
cloud.google.com/go/auth v0.13.0/go.mod h1:COOjD9gwfKNKz+IIduatIhYJQIc0mG3H102r/EMxX6Q= cloud.google.com/go/auth v0.15.0/go.mod h1:WJDGqZ1o9E9wKIL+IwStfyn/+s59zl4Bi+1KQNVXLZ8=
cloud.google.com/go/auth v0.14.1 h1:AwoJbzUdxA/whv1qj3TLKwh3XX5sikny2fc40wUl+h0=
cloud.google.com/go/auth v0.14.1/go.mod h1:4JHUxlGXisL0AW8kXPtUF6ztuOksyfUQNFjfsOCXkPM=
cloud.google.com/go/auth/oauth2adapt v0.2.6 h1:V6a6XDu2lTwPZWOawrAa9HUK+DB2zfJyTuciBG5hFkU=
cloud.google.com/go/auth/oauth2adapt v0.2.6/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8=
cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M= cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M=
cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc= cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc=
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I= cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg= cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
cloud.google.com/go/documentai v1.35.1 h1:52RfiUsoblXcE57CfKJGnITWLxRM30BcqNk/BKZl2LI=
cloud.google.com/go/documentai v1.35.1/go.mod h1:WJjwUAQfwQPJORW8fjz7RODprMULDzEGLA2E6WxenFw=
cloud.google.com/go/documentai v1.35.2 h1:hswVobCWUTXtmn+4QqUIVkai7sDOe0QS2KB3IpqLkik= cloud.google.com/go/documentai v1.35.2 h1:hswVobCWUTXtmn+4QqUIVkai7sDOe0QS2KB3IpqLkik=
cloud.google.com/go/documentai v1.35.2/go.mod h1:oh/0YXosgEq3hVhyH4ZQ7VNXPaveRO4eLVM3tBSZOsI= cloud.google.com/go/documentai v1.35.2/go.mod h1:oh/0YXosgEq3hVhyH4ZQ7VNXPaveRO4eLVM3tBSZOsI=
cloud.google.com/go/longrunning v0.6.2 h1:xjDfh1pQcWPEvnfjZmwjKQEcHnpz6lHjfy7Fo0MK+hc=
cloud.google.com/go/longrunning v0.6.2/go.mod h1:k/vIs83RN4bE3YCswdXC5PFfWVILjm3hpEUlSko4PiI=
cloud.google.com/go/longrunning v0.6.4 h1:3tyw9rO3E2XVXzSApn1gyEEnH2K9SynNQjMlBi3uHLg= cloud.google.com/go/longrunning v0.6.4 h1:3tyw9rO3E2XVXzSApn1gyEEnH2K9SynNQjMlBi3uHLg=
cloud.google.com/go/longrunning v0.6.4/go.mod h1:ttZpLCe6e7EXvn9OxpBRx7kZEB0efv8yBO6YnVMfhJs= cloud.google.com/go/longrunning v0.6.4/go.mod h1:ttZpLCe6e7EXvn9OxpBRx7kZEB0efv8yBO6YnVMfhJs=
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
@ -49,8 +39,6 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM= github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8= github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo= github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
@ -74,21 +62,27 @@ github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBEx
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM=
github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw= github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o= github.com/googleapis/enterprise-certificate-proxy v0.3.5 h1:VgzTY2jogw3xt39CusEnFJWm7rlsq5yL5q9XdLOuP5g=
github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk= github.com/googleapis/enterprise-certificate-proxy v0.3.5/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA=
github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q= github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q=
github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA= github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
@ -131,8 +125,8 @@ github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAc
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg= github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
@ -154,8 +148,8 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tmc/langchaingo v0.1.13-pre.1 h1:r+ma9kl0NuFJGtIrnMPFjEn4RhXktwSI31fIpgiiMm4= github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
github.com/tmc/langchaingo v0.1.13-pre.1/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg= github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
@ -174,105 +168,84 @@ gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJW
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU= gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 h1:rgMkmiGfix9vFJDcDi1PK8WEQP4FLQwLDfhp5ZLpFeE=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0/go.mod h1:ijPqXp5P6IRRByFVVg9DY8P5HkxkHE5ARIa+86aXPf4=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 h1:PS8wXpbyaDJQ2VDHHncMe9Vct0Zn1fEjpsjrLxGJoSc= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0/go.mod h1:HDBUsEjOuRC0EzKZ1bSaRGZWUBAzo+MhAcUUORSr4D0= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q=
go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY= go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI= go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ= go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE= go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4= go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ= go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU=
go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ=
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k= go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE= go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc=
golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ=
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70=
golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE= golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE=
golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc=
golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4= golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
google.golang.org/api v0.214.0 h1:h2Gkq07OYi6kusGOaT/9rnNljuXmqPnaig7WGPmKbwA= google.golang.org/api v0.223.0 h1:JUTaWEriXmEy5AhvdMgksGGPEFsYfUKaPEYXd4c3Wvc=
google.golang.org/api v0.214.0/go.mod h1:bYPpLG8AyeMWwDU6NXoB00xC0DFkikVvd5MfwoxjLqE= google.golang.org/api v0.223.0/go.mod h1:C+RS7Z+dDwds2b+zoAk5hN/eSfsiCn0UDrYof/M4d2M=
google.golang.org/api v0.220.0 h1:3oMI4gdBgB72WFVwE1nerDD8W3HUOS4kypK6rRLbGns= google.golang.org/api v0.224.0 h1:Ir4UPtDsNiwIOHdExr3fAj4xZ42QjK7uQte3lORLJwU=
google.golang.org/api v0.220.0/go.mod h1:26ZAlY6aN/8WgpCzjPNy18QpYaz7Zgg1h0qe1GkZEmY= google.golang.org/api v0.224.0/go.mod h1:3V39my2xAGkodXy0vEqcEtkqgw2GtrFL5WuBZlCTCOQ=
google.golang.org/api v0.221.0 h1:qzaJfLhDsbMeFee8zBRdt/Nc+xmOuafD/dbdgGfutOU= google.golang.org/api v0.225.0 h1:+4/IVqBQm0MV5S+JW3kdEGC1WtOmM2mXN1LKH1LdNlw=
google.golang.org/api v0.221.0/go.mod h1:7sOU2+TL4TxUTdbi0gWgAIg7tH5qBXxoyhtL+9x3biQ= google.golang.org/api v0.225.0/go.mod h1:WP/0Xm4LVvMOCldfvOISnWquSRWbG2kArDZcg+W2DbY=
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 h1:ToEetK57OidYuqD4Q5w+vfEnPvPpuTwedCNVohYJfNk=
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697/go.mod h1:JJrvXBWRZaFMxBufik1a4RpFw4HhgVtBBWQeQgUj2cc=
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 h1:Pw6WnI9W/LIdRxqK7T6XGugGbHIRl5Q7q3BssH6xk4s= google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 h1:Pw6WnI9W/LIdRxqK7T6XGugGbHIRl5Q7q3BssH6xk4s=
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4/go.mod h1:qbZzneIOXSq+KFAFut9krLfRLZiFLzZL5u2t8SV83EE= google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4/go.mod h1:qbZzneIOXSq+KFAFut9krLfRLZiFLzZL5u2t8SV83EE=
google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697 h1:pgr/4QbFyktUv9CtQ/Fq4gzEE6/Xs7iCXbktaGzLHbQ=
google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697/go.mod h1:+D9ySVjN8nY8YCVjc5O7PZDIdZporIDY3KaGfJunh88=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 h1:CkkIfIt50+lT6NHAVoRYEyAvQGFM7xEwXUUywFvEb3Q=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08=
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 h1:L9JNMl/plZH9wmzQUHleO/ZZDSN+9Gh41wPczNy+5Fk= google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 h1:L9JNMl/plZH9wmzQUHleO/ZZDSN+9Gh41wPczNy+5Fk=
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6/go.mod h1:iYONQfRdizDB8JJBybql13nArx91jcUk7zCXEsOofM4= google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6/go.mod h1:iYONQfRdizDB8JJBybql13nArx91jcUk7zCXEsOofM4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 h1:8ZmaLZE4XWrtU3MyClkYqqtl6Oegr3235h7jxsDyqCY= google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2 h1:DMTIbak9GhdaSxEjvVzAeNZvyc03I61duqNbnm3SU0M=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU= google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287 h1:J1H9f+LEdWAfHcez/4cvaVBox7cOYT+IU6rgqj5x++8= google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e h1:YA5lmSs3zc/5w+xsRcHqpETkaYyK63ivEPzNTcUUlSA=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287/go.mod h1:8BS3B93F/U1juMFq9+EDk+qOT5CO1R9IzXxG3PTqiRk= google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6 h1:2duwAxN2+k0xLNpjnHTXoMUgnv6VPSp5fiqTuwSxjmI= google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb h1:TLPQVbx1GJ8VKZxz52VAxl1EBgKXXbTiU9Fc5fZeLn4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6/go.mod h1:8BS3B93F/U1juMFq9+EDk+qOT5CO1R9IzXxG3PTqiRk= google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/grpc v1.67.3 h1:OgPcDAFKHnH8X3O4WcO4XUc8GRDeKsKReqbQtiCj7N8=
google.golang.org/grpc v1.67.3/go.mod h1:YGaHCc6Oap+FzBJTZLBzkGSYt/cvGPFTPxkn7QfSU8s=
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ= google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw= google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg=
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM=
google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

27
main.go
View file

@ -36,6 +36,10 @@ var (
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",") correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL") paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN") paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
azureDocAIEndpoint = os.Getenv("AZURE_DOCAI_ENDPOINT")
azureDocAIKey = os.Getenv("AZURE_DOCAI_KEY")
azureDocAIModelID = os.Getenv("AZURE_DOCAI_MODEL_ID")
azureDocAITimeout = os.Getenv("AZURE_DOCAI_TIMEOUT_SECONDS")
openaiAPIKey = os.Getenv("OPENAI_API_KEY") openaiAPIKey = os.Getenv("OPENAI_API_KEY")
manualTag = os.Getenv("MANUAL_TAG") manualTag = os.Getenv("MANUAL_TAG")
autoTag = os.Getenv("AUTO_TAG") autoTag = os.Getenv("AUTO_TAG")
@ -167,6 +171,18 @@ func main() {
GoogleProcessorID: os.Getenv("GOOGLE_PROCESSOR_ID"), GoogleProcessorID: os.Getenv("GOOGLE_PROCESSOR_ID"),
VisionLLMProvider: visionLlmProvider, VisionLLMProvider: visionLlmProvider,
VisionLLMModel: visionLlmModel, VisionLLMModel: visionLlmModel,
AzureEndpoint: azureDocAIEndpoint,
AzureAPIKey: azureDocAIKey,
AzureModelID: azureDocAIModelID,
}
// Parse Azure timeout if set
if azureDocAITimeout != "" {
if timeout, err := strconv.Atoi(azureDocAITimeout); err == nil {
ocrConfig.AzureTimeout = timeout
} else {
log.Warnf("Invalid AZURE_DOCAI_TIMEOUT_SECONDS value: %v, using default", err)
}
} }
// If provider is LLM, but no VISION_LLM_PROVIDER is set, don't initialize OCR provider // If provider is LLM, but no VISION_LLM_PROVIDER is set, don't initialize OCR provider
@ -422,6 +438,17 @@ func validateOrDefaultEnvVars() {
log.Fatal("Please set the LLM_PROVIDER environment variable to 'openai' or 'ollama'.") log.Fatal("Please set the LLM_PROVIDER environment variable to 'openai' or 'ollama'.")
} }
// Validate OCR provider if set
ocrProvider := os.Getenv("OCR_PROVIDER")
if ocrProvider == "azure" {
if azureDocAIEndpoint == "" {
log.Fatal("Please set the AZURE_DOCAI_ENDPOINT environment variable for Azure provider")
}
if azureDocAIKey == "" {
log.Fatal("Please set the AZURE_DOCAI_KEY environment variable for Azure provider")
}
}
if llmModel == "" { if llmModel == "" {
log.Fatal("Please set the LLM_MODEL environment variable.") log.Fatal("Please set the LLM_MODEL environment variable.")
} }

13
ocr.go
View file

@ -36,13 +36,20 @@ func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string,
return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err) return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
} }
ocrText, err := app.ocrProvider.ProcessImage(ctx, imageContent) result, err := app.ocrProvider.ProcessImage(ctx, imageContent)
if err != nil { if err != nil {
return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err) return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
} }
pageLogger.Debug("OCR completed for page") if result == nil {
pageLogger.Error("Got nil result from OCR provider")
return "", fmt.Errorf("error performing OCR for document %d, page %d: nil result", documentID, i+1)
}
ocrTexts = append(ocrTexts, ocrText) pageLogger.WithField("has_hocr", result.HOCR != "").
WithField("metadata", result.Metadata).
Debug("OCR completed for page")
ocrTexts = append(ocrTexts, result.Text)
} }
docLogger.Info("OCR processing completed successfully") docLogger.Info("OCR processing completed successfully")

224
ocr/azure_provider.go Normal file
View file

@ -0,0 +1,224 @@
package ocr
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"github.com/gabriel-vasile/mimetype"
"github.com/hashicorp/go-retryablehttp"
"github.com/sirupsen/logrus"
)
const (
apiVersion = "2024-11-30"
defaultModelID = "prebuilt-read"
defaultTimeout = 120
pollingInterval = 2 * time.Second
)
// AzureProvider implements OCR using Azure Document Intelligence
type AzureProvider struct {
endpoint string
apiKey string
modelID string
timeout time.Duration
httpClient *retryablehttp.Client
}
// Request body for Azure Document Intelligence
type analyzeRequest struct {
Base64Source string `json:"base64Source"`
}
func newAzureProvider(config Config) (*AzureProvider, error) {
logger := log.WithFields(logrus.Fields{
"endpoint": config.AzureEndpoint,
"model_id": config.AzureModelID,
})
logger.Info("Creating new Azure Document Intelligence provider")
// Validate required configuration
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
logger.Error("Missing required configuration")
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
}
// Set defaults and create provider
modelID := defaultModelID
if config.AzureModelID != "" {
modelID = config.AzureModelID
}
timeout := defaultTimeout
if config.AzureTimeout > 0 {
timeout = config.AzureTimeout
}
// Configure retryablehttp client
client := retryablehttp.NewClient()
client.RetryMax = 3
client.RetryWaitMin = 1 * time.Second
client.RetryWaitMax = 5 * time.Second
client.Logger = logger
provider := &AzureProvider{
endpoint: config.AzureEndpoint,
apiKey: config.AzureAPIKey,
modelID: modelID,
timeout: time.Duration(timeout) * time.Second,
httpClient: client,
}
logger.Info("Successfully initialized Azure Document Intelligence provider")
return provider, nil
}
func (p *AzureProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
logger := log.WithFields(logrus.Fields{
"model_id": p.modelID,
})
logger.Debug("Starting Azure Document Intelligence processing")
// Detect MIME type
mtype := mimetype.Detect(imageContent)
logger.WithField("mime_type", mtype.String()).Debug("Detected file type")
if !isImageMIMEType(mtype.String()) {
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
}
// Create context with timeout
ctx, cancel := context.WithTimeout(ctx, p.timeout)
defer cancel()
// Submit document for analysis
operationLocation, err := p.submitDocument(ctx, imageContent)
if err != nil {
return nil, fmt.Errorf("error submitting document: %w", err)
}
// Poll for results
result, err := p.pollForResults(ctx, operationLocation)
if err != nil {
return nil, fmt.Errorf("error polling for results: %w", err)
}
// Convert to OCR result
ocrResult := &OCRResult{
Text: result.AnalyzeResult.Content,
Metadata: map[string]string{
"provider": "azure_docai",
"page_count": fmt.Sprintf("%d", len(result.AnalyzeResult.Pages)),
"api_version": result.AnalyzeResult.APIVersion,
},
}
logger.WithFields(logrus.Fields{
"content_length": len(ocrResult.Text),
"page_count": len(result.AnalyzeResult.Pages),
}).Info("Successfully processed document")
return ocrResult, nil
}
func (p *AzureProvider) submitDocument(ctx context.Context, imageContent []byte) (string, error) {
requestURL := fmt.Sprintf("%s/documentintelligence/documentModels/%s:analyze?api-version=%s",
p.endpoint, p.modelID, apiVersion)
// Prepare request body
requestBody := analyzeRequest{
Base64Source: base64.StdEncoding.EncodeToString(imageContent),
}
requestBodyBytes, err := json.Marshal(requestBody)
if err != nil {
return "", fmt.Errorf("error marshaling request body: %w", err)
}
req, err := retryablehttp.NewRequestWithContext(ctx, "POST", requestURL, bytes.NewBuffer(requestBodyBytes))
if err != nil {
return "", fmt.Errorf("error creating HTTP request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
resp, err := p.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("error sending HTTP request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusAccepted {
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(body))
}
operationLocation := resp.Header.Get("Operation-Location")
if operationLocation == "" {
return "", fmt.Errorf("no Operation-Location header in response")
}
return operationLocation, nil
}
func (p *AzureProvider) pollForResults(ctx context.Context, operationLocation string) (*AzureDocumentResult, error) {
logger := log.WithField("operation_location", operationLocation)
logger.Debug("Starting to poll for results")
ticker := time.NewTicker(pollingInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return nil, fmt.Errorf("operation timed out after %v: %w", p.timeout, ctx.Err())
case <-ticker.C:
req, err := retryablehttp.NewRequestWithContext(ctx, "GET", operationLocation, nil)
if err != nil {
return nil, fmt.Errorf("error creating poll request: %w", err)
}
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
resp, err := p.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("error polling for results: %w", err)
}
var result AzureDocumentResult
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
resp.Body.Close()
logger.WithError(err).Error("Failed to decode response")
return nil, fmt.Errorf("error decoding response: %w", err)
}
defer resp.Body.Close()
logger.WithFields(logrus.Fields{
"status_code": resp.StatusCode,
"content_length": len(result.AnalyzeResult.Content),
"page_count": len(result.AnalyzeResult.Pages),
"status": result.Status,
}).Debug("Poll response received")
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code %d while polling", resp.StatusCode)
}
switch result.Status {
case "succeeded":
return &result, nil
case "failed":
return nil, fmt.Errorf("document processing failed")
case "running":
// Continue polling
default:
return nil, fmt.Errorf("unexpected status: %s", result.Status)
}
}
}
}

222
ocr/azure_provider_test.go Normal file
View file

@ -0,0 +1,222 @@
package ocr
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/hashicorp/go-retryablehttp"
"github.com/stretchr/testify/assert"
)
func TestNewAzureProvider(t *testing.T) {
tests := []struct {
name string
config Config
wantErr bool
errContains string
}{
{
name: "valid config",
config: Config{
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
AzureAPIKey: "test-key",
},
wantErr: false,
},
{
name: "valid config with custom model and timeout",
config: Config{
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
AzureAPIKey: "test-key",
AzureModelID: "custom-model",
AzureTimeout: 60,
},
wantErr: false,
},
{
name: "missing endpoint",
config: Config{
AzureAPIKey: "test-key",
},
wantErr: true,
errContains: "missing required Azure Document Intelligence configuration",
},
{
name: "missing api key",
config: Config{
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
},
wantErr: true,
errContains: "missing required Azure Document Intelligence configuration",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
provider, err := newAzureProvider(tt.config)
if tt.wantErr {
assert.Error(t, err)
if tt.errContains != "" {
assert.Contains(t, err.Error(), tt.errContains)
}
return
}
assert.NoError(t, err)
assert.NotNil(t, provider)
// Verify default values
if tt.config.AzureModelID == "" {
assert.Equal(t, defaultModelID, provider.modelID)
} else {
assert.Equal(t, tt.config.AzureModelID, provider.modelID)
}
if tt.config.AzureTimeout == 0 {
assert.Equal(t, time.Duration(defaultTimeout)*time.Second, provider.timeout)
} else {
assert.Equal(t, time.Duration(tt.config.AzureTimeout)*time.Second, provider.timeout)
}
})
}
}
func TestAzureProvider_ProcessImage(t *testing.T) {
// Sample success response
now := time.Now()
successResult := AzureDocumentResult{
Status: "succeeded",
CreatedDateTime: now,
LastUpdatedDateTime: now,
AnalyzeResult: AzureAnalyzeResult{
APIVersion: apiVersion,
ModelID: defaultModelID,
StringIndexType: "utf-16",
Content: "Test document content",
Pages: []AzurePage{
{
PageNumber: 1,
Angle: 0.0,
Width: 800,
Height: 600,
Unit: "pixel",
Lines: []AzureLine{
{
Content: "Test line",
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
Spans: []AzureSpan{{Offset: 0, Length: 9}},
},
},
Spans: []AzureSpan{{Offset: 0, Length: 9}},
},
},
Paragraphs: []AzureParagraph{
{
Content: "Test document content",
Spans: []AzureSpan{{Offset: 0, Length: 19}},
BoundingRegions: []AzureBoundingBox{
{
PageNumber: 1,
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
},
},
},
},
ContentFormat: "text",
},
}
tests := []struct {
name string
setupServer func() *httptest.Server
imageContent []byte
wantErr bool
errContains string
expectedText string
}{
{
name: "successful processing",
setupServer: func() *httptest.Server {
mux := http.NewServeMux()
server := httptest.NewServer(mux)
mux.HandleFunc("/documentintelligence/documentModels/prebuilt-read:analyze", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Operation-Location", fmt.Sprintf("%s/operations/123", server.URL))
w.WriteHeader(http.StatusAccepted)
})
mux.HandleFunc("/operations/123", func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(successResult)
})
return server
},
// Create minimal JPEG content with magic numbers
imageContent: append([]byte{0xFF, 0xD8, 0xFF, 0xE0}, []byte("JFIF test content")...),
expectedText: "Test document content",
},
{
name: "invalid mime type",
setupServer: func() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Log("Server should not be called with invalid mime type")
w.WriteHeader(http.StatusBadRequest)
}))
},
imageContent: []byte("invalid content"),
wantErr: true,
errContains: "unsupported file type",
},
{
name: "submission error",
setupServer: func() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintln(w, "Invalid request")
}))
},
imageContent: []byte{0xFF, 0xD8, 0xFF, 0xE0}, // JPEG magic numbers
wantErr: true,
errContains: "unexpected status code 400",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
server := tt.setupServer()
defer server.Close()
client := retryablehttp.NewClient()
client.HTTPClient = server.Client()
client.Logger = log
provider := &AzureProvider{
endpoint: server.URL,
apiKey: "test-key",
modelID: defaultModelID,
timeout: 5 * time.Second,
httpClient: client,
}
result, err := provider.ProcessImage(context.Background(), tt.imageContent)
if tt.wantErr {
assert.Error(t, err)
if tt.errContains != "" {
assert.Contains(t, err.Error(), tt.errContains)
}
return
}
assert.NoError(t, err)
assert.NotNil(t, result)
assert.Equal(t, tt.expectedText, result.Text)
assert.Equal(t, "azure_docai", result.Metadata["provider"])
assert.Equal(t, apiVersion, result.Metadata["api_version"])
assert.Equal(t, "1", result.Metadata["page_count"])
})
}
}

72
ocr/azure_types.go Normal file
View file

@ -0,0 +1,72 @@
package ocr
import "time"
// AzureDocumentResult represents the root response from Azure Document Intelligence
type AzureDocumentResult struct {
Status string `json:"status"`
CreatedDateTime time.Time `json:"createdDateTime"`
LastUpdatedDateTime time.Time `json:"lastUpdatedDateTime"`
AnalyzeResult AzureAnalyzeResult `json:"analyzeResult"`
}
// AzureAnalyzeResult represents the analyze result part of the Azure Document Intelligence response
type AzureAnalyzeResult struct {
APIVersion string `json:"apiVersion"`
ModelID string `json:"modelId"`
StringIndexType string `json:"stringIndexType"`
Content string `json:"content"`
Pages []AzurePage `json:"pages"`
Paragraphs []AzureParagraph `json:"paragraphs"`
Styles []interface{} `json:"styles"`
ContentFormat string `json:"contentFormat"`
}
// AzurePage represents a single page in the document
type AzurePage struct {
PageNumber int `json:"pageNumber"`
Angle float64 `json:"angle"`
Width int `json:"width"`
Height int `json:"height"`
Unit string `json:"unit"`
Words []AzureWord `json:"words"`
Lines []AzureLine `json:"lines"`
Spans []AzureSpan `json:"spans"`
}
// AzureWord represents a single word with its properties
type AzureWord struct {
Content string `json:"content"`
Polygon []int `json:"polygon"`
Confidence float64 `json:"confidence"`
Span AzureSpan `json:"span"`
}
// AzureLine represents a line of text
type AzureLine struct {
Content string `json:"content"`
Polygon []int `json:"polygon"`
Spans []AzureSpan `json:"spans"`
}
// AzureSpan represents a span of text with offset and length
type AzureSpan struct {
Offset int `json:"offset"`
Length int `json:"length"`
}
// AzureParagraph represents a paragraph of text
type AzureParagraph struct {
Content string `json:"content"`
Spans []AzureSpan `json:"spans"`
BoundingRegions []AzureBoundingBox `json:"boundingRegions"`
}
// AzureBoundingBox represents the location of content on a page
type AzureBoundingBox struct {
PageNumber int `json:"pageNumber"`
Polygon []int `json:"polygon"`
}
// AzureStyle represents style information for text segments - changed to interface{} as per input
type AzureStyle interface{}

View file

@ -3,6 +3,8 @@ package ocr
import ( import (
"context" "context"
"fmt" "fmt"
"html"
"strings"
documentai "cloud.google.com/go/documentai/apiv1" documentai "cloud.google.com/go/documentai/apiv1"
"cloud.google.com/go/documentai/apiv1/documentaipb" "cloud.google.com/go/documentai/apiv1/documentaipb"
@ -46,7 +48,7 @@ func newGoogleDocAIProvider(config Config) (*GoogleDocAIProvider, error) {
return provider, nil return provider, nil
} }
func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (string, error) { func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
logger := log.WithFields(logrus.Fields{ logger := log.WithFields(logrus.Fields{
"project_id": p.projectID, "project_id": p.projectID,
"location": p.location, "location": p.location,
@ -60,7 +62,7 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
if !isImageMIMEType(mtype.String()) { if !isImageMIMEType(mtype.String()) {
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type") logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
return "", fmt.Errorf("unsupported file type: %s", mtype.String()) return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
} }
name := fmt.Sprintf("projects/%s/locations/%s/processors/%s", p.projectID, p.location, p.processorID) name := fmt.Sprintf("projects/%s/locations/%s/processors/%s", p.projectID, p.location, p.processorID)
@ -79,21 +81,56 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
resp, err := p.client.ProcessDocument(ctx, req) resp, err := p.client.ProcessDocument(ctx, req)
if err != nil { if err != nil {
logger.WithError(err).Error("Failed to process document") logger.WithError(err).Error("Failed to process document")
return "", fmt.Errorf("error processing document: %w", err) return nil, fmt.Errorf("error processing document: %w", err)
} }
if resp == nil || resp.Document == nil { if resp == nil || resp.Document == nil {
logger.Error("Received nil response or document from Document AI") logger.Error("Received nil response or document from Document AI")
return "", fmt.Errorf("received nil response or document from Document AI") return nil, fmt.Errorf("received nil response or document from Document AI")
} }
if resp.Document.Error != nil { if resp.Document.Error != nil {
logger.WithField("error", resp.Document.Error.Message).Error("Document processing error") logger.WithField("error", resp.Document.Error.Message).Error("Document processing error")
return "", fmt.Errorf("document processing error: %s", resp.Document.Error.Message) return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
} }
logger.WithField("content_length", len(resp.Document.Text)).Info("Successfully processed document") metadata := map[string]string{
return resp.Document.Text, nil "provider": "google_docai",
"mime_type": mtype.String(),
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
"processor_id": p.processorID,
}
// Safely add language code if available
if pages := resp.Document.GetPages(); len(pages) > 0 {
if langs := pages[0].GetDetectedLanguages(); len(langs) > 0 {
metadata["lang_code"] = langs[0].GetLanguageCode()
}
}
result := &OCRResult{
Text: resp.Document.Text,
Metadata: metadata,
}
// Add hOCR output if available
if len(resp.Document.GetPages()) > 0 {
var hocr string
func() {
defer func() {
if r := recover(); r != nil {
logger.WithField("error", r).Error("Panic during hOCR generation")
}
}()
hocr = generateHOCR(resp.Document)
}()
if hocr != "" {
result.HOCR = hocr
}
}
logger.WithField("content_length", len(result.Text)).Info("Successfully processed document")
return result, nil
} }
// isImageMIMEType checks if the given MIME type is a supported image type // isImageMIMEType checks if the given MIME type is a supported image type
@ -109,6 +146,83 @@ func isImageMIMEType(mimeType string) bool {
return supportedTypes[mimeType] return supportedTypes[mimeType]
} }
// generateHOCR converts Document AI response to hOCR format
func generateHOCR(doc *documentaipb.Document) string {
if len(doc.GetPages()) == 0 {
return ""
}
var hocr strings.Builder
hocr.WriteString(`<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>OCR Output</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name='ocr-system' content='google-docai' />
</head>
<body>`)
for pageNum, page := range doc.GetPages() {
pageWidth := page.GetDimension().GetWidth()
pageHeight := page.GetDimension().GetHeight()
// Validate dimensions
if pageWidth <= 0 || pageHeight <= 0 {
continue
}
hocr.WriteString(fmt.Sprintf(`
<div class='ocr_page' id='page_%d' title='image;bbox 0 0 %d %d'>`,
pageNum+1, int(pageWidth), int(pageHeight)))
// Process paragraphs
for _, para := range page.GetParagraphs() {
paraBox := para.GetLayout().GetBoundingPoly().GetNormalizedVertices()
if len(paraBox) < 4 {
continue
}
// Convert normalized coordinates to absolute
// Use float64 for intermediate calculations to prevent overflow
x1 := int(float64(paraBox[0].GetX()) * float64(pageWidth))
y1 := int(float64(paraBox[0].GetY()) * float64(pageHeight))
x2 := int(float64(paraBox[2].GetX()) * float64(pageWidth))
y2 := int(float64(paraBox[2].GetY()) * float64(pageHeight))
// Validate coordinates
if x1 < 0 || y1 < 0 || x2 < 0 || y2 < 0 ||
x1 > int(pageWidth) || y1 > int(pageHeight) ||
x2 > int(pageWidth) || y2 > int(pageHeight) {
continue
}
hocr.WriteString(fmt.Sprintf(`
<p class='ocr_par' id='par_%d_%d' title='bbox %d %d %d %d'>`,
pageNum+1, len(page.GetParagraphs()), x1, y1, x2, y2))
// Process words within paragraph
for _, token := range para.GetLayout().GetTextAnchor().GetTextSegments() {
text := doc.Text[token.GetStartIndex():token.GetEndIndex()]
if text == "" {
continue
}
// Escape HTML special characters
text = html.EscapeString(text)
hocr.WriteString(fmt.Sprintf(`
<span class='ocrx_word'>%s</span>`, text))
}
hocr.WriteString("\n </p>")
}
hocr.WriteString("\n </div>")
}
hocr.WriteString("\n</body>\n</html>")
return hocr.String()
}
// Close releases resources used by the provider // Close releases resources used by the provider
func (p *GoogleDocAIProvider) Close() error { func (p *GoogleDocAIProvider) Close() error {
if p.client != nil { if p.client != nil {

View file

@ -0,0 +1,94 @@
package ocr
import (
"regexp"
"strings"
"testing"
"cloud.google.com/go/documentai/apiv1/documentaipb"
)
func TestGenerateHOCR(t *testing.T) {
tests := []struct {
name string
doc *documentaipb.Document
expected string
}{
{
name: "empty document",
doc: &documentaipb.Document{},
expected: "",
},
{
name: "single page with one paragraph",
doc: &documentaipb.Document{
Text: "Hello World",
Pages: []*documentaipb.Document_Page{
{
Dimension: &documentaipb.Document_Page_Dimension{
Width: 800,
Height: 600,
},
Paragraphs: []*documentaipb.Document_Page_Paragraph{
{
Layout: &documentaipb.Document_Page_Layout{
BoundingPoly: &documentaipb.BoundingPoly{
NormalizedVertices: []*documentaipb.NormalizedVertex{
{X: 0.1, Y: 0.1},
{X: 0.9, Y: 0.1},
{X: 0.9, Y: 0.2},
{X: 0.1, Y: 0.2},
},
},
TextAnchor: &documentaipb.Document_TextAnchor{
TextSegments: []*documentaipb.Document_TextAnchor_TextSegment{
{
StartIndex: 0,
EndIndex: 11,
},
},
},
},
},
},
},
},
},
expected: "(?s).*<div class='ocr_page' id='page_1' title='image;bbox 0 0 800 600'>.*" +
"<p class='ocr_par' id='par_1_1' title='bbox 80 60 719 120'>.*" +
"<span class='ocrx_word'>Hello World</span>.*</p>.*</div>.*",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := generateHOCR(tt.doc)
if tt.expected == "" {
if result != "" {
t.Errorf("expected empty string, got %v", result)
}
return
}
matched, err := regexp.MatchString(tt.expected, result)
if err != nil {
t.Fatalf("error matching regex: %v", err)
}
if !matched {
t.Errorf("expected to match regex %v\ngot: %v", tt.expected, result)
}
// Verify basic hOCR structure
if !strings.Contains(result, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>") {
t.Error("missing XML declaration")
}
if !strings.Contains(result, "<html xmlns=\"http://www.w3.org/1999/xhtml\"") {
t.Error("missing HTML namespace")
}
if !strings.Contains(result, "<meta name='ocr-system' content='google-docai'") {
t.Error("missing OCR system metadata")
}
})
}
}

View file

@ -60,7 +60,7 @@ func newLLMProvider(config Config) (*LLMProvider, error) {
}, nil }, nil
} }
func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (string, error) { func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
logger := log.WithFields(logrus.Fields{ logger := log.WithFields(logrus.Fields{
"provider": p.provider, "provider": p.provider,
"model": p.model, "model": p.model,
@ -71,7 +71,7 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (st
img, _, err := image.Decode(bytes.NewReader(imageContent)) img, _, err := image.Decode(bytes.NewReader(imageContent))
if err != nil { if err != nil {
logger.WithError(err).Error("Failed to decode image") logger.WithError(err).Error("Failed to decode image")
return "", fmt.Errorf("error decoding image: %w", err) return nil, fmt.Errorf("error decoding image: %w", err)
} }
bounds := img.Bounds() bounds := img.Bounds()
logger.WithFields(logrus.Fields{ logger.WithFields(logrus.Fields{
@ -106,11 +106,18 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (st
}) })
if err != nil { if err != nil {
logger.WithError(err).Error("Failed to get response from vision model") logger.WithError(err).Error("Failed to get response from vision model")
return "", fmt.Errorf("error getting response from LLM: %w", err) return nil, fmt.Errorf("error getting response from LLM: %w", err)
} }
logger.WithField("content_length", len(completion.Choices[0].Content)).Info("Successfully processed image") result := &OCRResult{
return completion.Choices[0].Content, nil Text: completion.Choices[0].Content,
Metadata: map[string]string{
"provider": p.provider,
"model": p.model,
},
}
logger.WithField("content_length", len(result.Text)).Info("Successfully processed image")
return result, nil
} }
// createOpenAIClient creates a new OpenAI vision model client // createOpenAIClient creates a new OpenAI vision model client

View file

@ -9,14 +9,26 @@ import (
var log = logrus.New() var log = logrus.New()
// OCRResult holds the output from OCR processing
type OCRResult struct {
// Plain text output (required)
Text string
// hOCR output (optional, if provider supports it)
HOCR string
// Additional provider-specific metadata
Metadata map[string]string
}
// Provider defines the interface for OCR processing // Provider defines the interface for OCR processing
type Provider interface { type Provider interface {
ProcessImage(ctx context.Context, imageContent []byte) (string, error) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error)
} }
// Config holds the OCR provider configuration // Config holds the OCR provider configuration
type Config struct { type Config struct {
// Provider type (e.g., "llm", "google_docai") // Provider type (e.g., "llm", "google_docai", "azure")
Provider string Provider string
// Google Document AI settings // Google Document AI settings
@ -27,6 +39,15 @@ type Config struct {
// LLM settings (from existing config) // LLM settings (from existing config)
VisionLLMProvider string VisionLLMProvider string
VisionLLMModel string VisionLLMModel string
// Azure Document Intelligence settings
AzureEndpoint string
AzureAPIKey string
AzureModelID string // Optional, defaults to "prebuilt-read"
AzureTimeout int // Optional, defaults to 120 seconds
// OCR output options
EnableHOCR bool // Whether to request hOCR output if supported by the provider
} }
// NewProvider creates a new OCR provider based on configuration // NewProvider creates a new OCR provider based on configuration
@ -54,6 +75,12 @@ func NewProvider(config Config) (Provider, error) {
}).Info("Using LLM OCR provider") }).Info("Using LLM OCR provider")
return newLLMProvider(config) return newLLMProvider(config)
case "azure":
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
}
return newAzureProvider(config)
default: default:
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider) return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
} }

View file

@ -94,7 +94,6 @@ func (client *PaperlessClient) Do(ctx context.Context, method, path string, body
log.WithFields(logrus.Fields{ log.WithFields(logrus.Fields{
"method": method, "method": method,
"url": url, "url": url,
"headers": req.Header,
}).Debug("Making HTTP request") }).Debug("Making HTTP request")
resp, err := client.HTTPClient.Do(req) resp, err := client.HTTPClient.Do(req)

File diff suppressed because it is too large Load diff

View file

@ -43,7 +43,7 @@
"eslint": "^9.9.0", "eslint": "^9.9.0",
"eslint-plugin-react-hooks": "^5.1.0-rc.0", "eslint-plugin-react-hooks": "^5.1.0-rc.0",
"eslint-plugin-react-refresh": "^0.4.9", "eslint-plugin-react-refresh": "^0.4.9",
"globals": "^15.9.0", "globals": "^16.0.0",
"node-fetch": "^3.3.0", "node-fetch": "^3.3.0",
"postcss": "^8.4.47", "postcss": "^8.4.47",
"tailwindcss": "^3.4.12", "tailwindcss": "^3.4.12",