Compare commits

..

32 commits

Author SHA1 Message Date
renovate[bot]
53bfc7d252
fix(deps): update module google.golang.org/api to v0.225.0 (#284)
Some checks failed
Build and Push Docker Images / test (push) Has been cancelled
Build and Push Docker Images / build-amd64 (push) Has been cancelled
Build and Push Docker Images / build-arm64 (push) Has been cancelled
Build and Push Docker Images / merge-manifests (push) Has been cancelled
Build and Push Docker Images / E2E Tests (push) Has been cancelled
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-12 09:47:02 +01:00
renovate[bot]
14567480a8
fix(deps): update dependency axios to v1.8.3 (#285)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-12 09:03:30 +01:00
renovate[bot]
72a23ac51e
fix(deps): update dependency react-router-dom to v7.3.0 (#273)
Some checks failed
Build and Push Docker Images / test (push) Has been cancelled
Build and Push Docker Images / build-amd64 (push) Has been cancelled
Build and Push Docker Images / build-arm64 (push) Has been cancelled
Build and Push Docker Images / merge-manifests (push) Has been cancelled
Build and Push Docker Images / E2E Tests (push) Has been cancelled
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-10 12:13:21 +01:00
renovate[bot]
60c141e815
chore(deps): update eslint monorepo to v9.22.0 (#277)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-10 12:13:10 +01:00
renovate[bot]
3788abc9cb
fix(deps): update dependency axios to v1.8.2 (#275)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-10 12:13:01 +01:00
renovate[bot]
d477e26048
fix(deps): update module google.golang.org/api to v0.224.0 (#276)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-10 12:12:47 +01:00
renovate[bot]
184c1a8600
chore(deps): update dependency autoprefixer to v10.4.21 (#278)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-10 12:12:37 +01:00
Icereed
cbd9c5438c
feat(ocr): add support for Azure Document Intelligence provider (#279) 2025-03-10 10:51:56 +01:00
Icereed
360663b05b
feat(ocr): enhance OCR processing with structured results and hOCR su… (#212)
* feat(ocr): enhance OCR processing with structured results and hOCR support

* Update ocr/google_docai_provider.go

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* Update ocr/google_docai_provider_test.go

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* refactor(tests): remove unused context import from google_docai_provider_test.go

* refactor: Add defensive checks for language code in Google DocAI provider (#226)

* Update ocr/google_docai_provider.go

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* Update ocr/google_docai_provider.go

Co-authored-by: gardar <gardar@users.noreply.github.com>

---------

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
Co-authored-by: mkrinke <mad.krinke@googlemail.com>
Co-authored-by: gardar <gardar@users.noreply.github.com>
2025-03-10 08:43:50 +00:00
renovate[bot]
7c7449e197
chore(deps): update dependency go to v1.24.1 (#269)
Some checks failed
Build and Push Docker Images / test (push) Has been cancelled
Build and Push Docker Images / build-amd64 (push) Has been cancelled
Build and Push Docker Images / build-arm64 (push) Has been cancelled
Build and Push Docker Images / merge-manifests (push) Has been cancelled
Build and Push Docker Images / E2E Tests (push) Has been cancelled
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-07 21:43:26 +01:00
renovate[bot]
a5a5afe276
chore(deps): update golang docker tag to v1.24.1 (#270)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-07 21:43:05 +01:00
renovate[bot]
3bb1415b25
fix(deps): update module golang.org/x/sync to v0.12.0 (#274)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-07 21:42:40 +01:00
renovate[bot]
df396dac78
chore(deps): update dependency vite to v6.2.1 (#271)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-07 21:42:12 +01:00
renovate[bot]
c514914b4e
chore(deps): update dependency @playwright/test to v1.51.0 (#272)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-07 21:41:45 +01:00
renovate[bot]
5ac6bb7532
fix(deps): update module google.golang.org/api to v0.223.0 (#264)
Some checks failed
Build and Push Docker Images / test (push) Has been cancelled
Build and Push Docker Images / build-amd64 (push) Has been cancelled
Build and Push Docker Images / build-arm64 (push) Has been cancelled
Build and Push Docker Images / merge-manifests (push) Has been cancelled
Build and Push Docker Images / E2E Tests (push) Has been cancelled
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-03 09:26:25 +01:00
renovate[bot]
6c03fca89c
fix(deps): update dependency axios to v1.8.1 (#265)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-27 06:27:50 +00:00
renovate[bot]
0b5b367b0a
chore(deps): update eslint monorepo to v9.21.0 (#259)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-25 20:58:59 +01:00
renovate[bot]
9b0204180f
chore(deps): update dependency vite to v6.2.0 (#263)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-25 20:58:34 +01:00
renovate[bot]
5c6f50a1a3
chore(deps): update dependency postcss to v8.5.3 (#255)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-21 16:25:50 +01:00
renovate[bot]
74e4419b7c
fix(deps): update module google.golang.org/api to v0.222.0 (#256)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-21 16:25:41 +01:00
renovate[bot]
fc1d69a93b
chore(deps): update dependency globals to v16 (#257)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-20 21:37:29 +01:00
renovate[bot]
1d6cea481f
fix(deps): update dependency react-icons to v5.5.0 (#251)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-19 13:09:28 +01:00
renovate[bot]
c95487e834
chore(deps): update dependency vite to v6.1.1 (#253)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-19 13:09:15 +01:00
renovate[bot]
3bcee9c1c5
fix(deps): update dependency react-router-dom to v7.2.0 (#250)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-19 07:07:54 +01:00
renovate[bot]
ad74e28473
chore(deps): update dependency typescript-eslint to v8.24.1 (#249)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 19:42:01 +01:00
Icereed
b5fb1cb040
Potential fix for code scanning alert no. 3: Clear-text logging of sensitive information (#247)
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2025-02-17 12:05:00 +01:00
renovate[bot]
992b3b824b
chore(deps): update dependency postcss to v8.5.2 (#220)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:54:04 +01:00
renovate[bot]
474fde659c
chore(deps): update dependency @types/node to v22.13.4 (#238)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:53:54 +01:00
renovate[bot]
80365f95a0
chore(deps): update dependency go to v1.24.0 (#225)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:53:42 +01:00
renovate[bot]
bfe1e00392
chore(deps): update alpine docker tag to v3.21.3 (#240)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:53:33 +01:00
renovate[bot]
c1b4f8344f
chore(deps): update react monorepo to v19.0.10 (#242)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:53:24 +01:00
renovate[bot]
b6b8948fe3
fix(deps): update module github.com/tmc/langchaingo to v0.1.13 (#243)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-17 11:53:10 +01:00
17 changed files with 1343 additions and 413 deletions

View file

@ -25,7 +25,7 @@ COPY web-app /app/
RUN npm run build
# Stage 2: Build the Go binary
FROM golang:1.24.0-alpine3.21 AS builder
FROM golang:1.24.1-alpine3.21 AS builder
# Set the working directory inside the container
WORKDIR /app
@ -82,7 +82,7 @@ RUN sed -i \
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
# Stage 3: Create a lightweight image with just the binary
FROM alpine:3.21.2
FROM alpine:3.21.3
ENV GIN_MODE=release

112
README.md
View file

@ -22,7 +22,7 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
- **LLM OCR**: Use OpenAI or Ollama to extract text from images.
- **Google Document AI**: Leverage Google's powerful Document AI for OCR tasks.
- **More to come**: Stay tuned for more OCR providers!
- **Azure Document Intelligence**: Use Microsoft's enterprise OCR solution.
3. **Automatic Title & Tag Generation**
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
@ -39,11 +39,11 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
7. **Simple Docker Deployment**
A few environment variables, and youre off! Compose it alongside paperless-ngx with minimal fuss.
A few environment variables, and you're off! Compose it alongside paperless-ngx with minimal fuss.
8. **Unified Web UI**
- **Manual Review**: Approve or tweak AIs suggestions.
- **Manual Review**: Approve or tweak AI's suggestions.
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
---
@ -56,6 +56,12 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
- [Installation](#installation)
- [Docker Compose](#docker-compose)
- [Manual Setup](#manual-setup)
- [OCR Providers](#ocr-providers)
- [LLM-based OCR](#1-llm-based-ocr-default)
- [Azure Document Intelligence](#2-azure-document-intelligence)
- [Google Document AI](#3-google-document-ai)
- [Comparing OCR Providers](#comparing-ocr-providers)
- [Choosing the Right Provider](#choosing-the-right-provider)
- [Configuration](#configuration)
- [Environment Variables](#environment-variables)
- [Custom Prompt Templates](#custom-prompt-templates)
@ -86,7 +92,7 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
#### Docker Compose
Heres an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
Here's an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
```yaml
services:
@ -124,6 +130,13 @@ services:
# GOOGLE_PROCESSOR_ID: 'processor-id' # Your processor ID
# GOOGLE_APPLICATION_CREDENTIALS: '/app/credentials.json' # Path to service account key
# Option 3: Azure Document Intelligence
# OCR_PROVIDER: 'azure' # Use Azure Document Intelligence
# AZURE_DOCAI_ENDPOINT: 'your-endpoint' # Your Azure endpoint URL
# AZURE_DOCAI_KEY: 'your-key' # Your Azure API key
# AZURE_DOCAI_MODEL_ID: 'prebuilt-read' # Optional, defaults to prebuilt-read
# AZURE_DOCAI_TIMEOUT_SECONDS: '120' # Optional, defaults to 120 seconds
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto
OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit.
LOG_LEVEL: "info" # Optional: debug, warn, error
@ -172,6 +185,63 @@ services:
```
---
## OCR Providers
paperless-gpt supports three different OCR providers, each with unique strengths and capabilities:
### 1. LLM-based OCR (Default)
- **Key Features**:
- Uses vision-capable LLMs like GPT-4V or MiniCPM-V
- High accuracy with complex layouts and difficult scans
- Context-aware text recognition
- Self-correcting capabilities for OCR errors
- **Best For**:
- Complex or unusual document layouts
- Poor quality scans
- Documents with mixed languages
- **Configuration**:
```yaml
OCR_PROVIDER: "llm"
VISION_LLM_PROVIDER: "openai" # or "ollama"
VISION_LLM_MODEL: "gpt-4v" # or "minicpm-v"
```
### 2. Azure Document Intelligence
- **Key Features**:
- Enterprise-grade OCR solution
- Prebuilt models for common document types
- Layout preservation and table detection
- Fast processing speeds
- **Best For**:
- Business documents and forms
- High-volume processing
- Documents requiring layout analysis
- **Configuration**:
```yaml
OCR_PROVIDER: "azure"
AZURE_DOCAI_ENDPOINT: "https://your-endpoint.cognitiveservices.azure.com/"
AZURE_DOCAI_KEY: "your-key"
AZURE_DOCAI_MODEL_ID: "prebuilt-read" # optional
AZURE_DOCAI_TIMEOUT_SECONDS: "120" # optional
```
### 3. Google Document AI
- **Key Features**:
- Specialized document processors
- Strong form field detection
- Multi-language support
- High accuracy on structured documents
- **Best For**:
- Forms and structured documents
- Documents with tables
- Multi-language documents
- **Configuration**:
```yaml
OCR_PROVIDER: "google_docai"
GOOGLE_PROJECT_ID: "your-project"
GOOGLE_LOCATION: "us"
GOOGLE_PROCESSOR_ID: "processor-id"
```
## Configuration
@ -192,9 +262,13 @@ services:
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No | |
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). | No | English |
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No | |
| `OCR_PROVIDER` | OCR provider to use (`llm` or `google_docai`). | No | llm |
| `OCR_PROVIDER` | OCR provider to use (`llm`, `azure`, or `google_docai`). | No | llm |
| `VISION_LLM_PROVIDER` | AI backend for LLM OCR (`openai` or `ollama`). Required if OCR_PROVIDER is `llm`. | Cond. | |
| `VISION_LLM_MODEL` | Model name for LLM OCR (e.g. `minicpm-v`). Required if OCR_PROVIDER is `llm`. | Cond. | |
| `AZURE_DOCAI_ENDPOINT` | Azure Document Intelligence endpoint. Required if OCR_PROVIDER is `azure`. | Cond. | |
| `AZURE_DOCAI_KEY` | Azure Document Intelligence API key. Required if OCR_PROVIDER is `azure`. | Cond. | |
| `AZURE_DOCAI_MODEL_ID` | Azure Document Intelligence model ID. Optional if using `azure` provider. | No | prebuilt-read |
| `AZURE_DOCAI_TIMEOUT_SECONDS` | Azure Document Intelligence timeout in seconds. | No | 120 |
| `GOOGLE_PROJECT_ID` | Google Cloud project ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
| `GOOGLE_LOCATION` | Google Cloud region (e.g. `us`, `eu`). Required if OCR_PROVIDER is `google_docai`. | Cond. | |
| `GOOGLE_PROCESSOR_ID` | Document AI processor ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
@ -211,7 +285,7 @@ services:
### Custom Prompt Templates
paperless-gpts flexible **prompt templates** let you shape how AI responds:
paperless-gpt's flexible **prompt templates** let you shape how AI responds:
1. **`title_prompt.tmpl`**: For document titles.
2. **`tag_prompt.tmpl`**: For tagging logic.
@ -232,13 +306,11 @@ Then tweak at will—**paperless-gpt** reloads them automatically on startup!
Each template has access to specific variables:
**title_prompt.tmpl**:
- `{{.Language}}` - Target language (e.g., "English")
- `{{.Content}}` - Document content text
- `{{.Title}}` - Original document title
**tag_prompt.tmpl**:
- `{{.Language}}` - Target language
- `{{.AvailableTags}}` - List of existing tags in paperless-ngx
- `{{.OriginalTags}}` - Document's current tags
@ -246,11 +318,9 @@ Each template has access to specific variables:
- `{{.Content}}` - Document content text
**ocr_prompt.tmpl**:
- `{{.Language}}` - Target language
**correspondent_prompt.tmpl**:
- `{{.Language}}` - Target language
- `{{.AvailableCorrespondents}}` - List of existing correspondents
- `{{.BlackList}}` - List of blacklisted correspondent names
@ -265,23 +335,25 @@ The templates use Go's text/template syntax. paperless-gpt automatically reloads
1. **Tag Documents**
- Add `paperless-gpt` or your custom tag to the docs you want to AI-ify.
- Add `paperless-gpt` tag to documents for manual processing
- Add `paperless-gpt-auto` for automatic processing
- Add `paperless-gpt-ocr-auto` for automatic OCR processing
2. **Visit Web UI**
- Go to `http://localhost:8080` (or your host) in your browser.
- Go to `http://localhost:8080` (or your host) in your browser
- Review documents tagged for processing
3. **Generate & Apply Suggestions**
- Click “Generate Suggestions” to see AI-proposed titles/tags/correspondents.
- Approve, edit, or discard. Hit “Apply” to finalize in paperless-ngx.
4. **Try LLM-Based OCR (Experimental)**
- If you enabled `VISION_LLM_PROVIDER` and `VISION_LLM_MODEL`, let AI-based OCR read your scanned PDFs.
- Tag those documents with `paperless-gpt-ocr-auto` (or your custom `AUTO_OCR_TAG`).
**Tip**: The entire pipeline can be **fully automated** if you prefer minimal manual intervention.
- Click "Generate Suggestions" to see AI-proposed titles/tags/correspondents
- Review and approve or edit suggestions
- Click "Apply" to save changes to paperless-ngx
4. **OCR Processing**
- Tag documents with appropriate OCR tag to process them
- Monitor progress in the Web UI
- Review results and apply changes
---
## LLM-Based OCR: Compare for Yourself

View file

@ -92,3 +92,88 @@
- E2E tests for web interface
- Test fixtures and mocks
- Playwright for frontend testing
## OCR System Patterns
### OCR Provider Architecture
#### 1. Provider Interface
- Common interface for all OCR implementations
- Methods for image processing
- Configuration through standardized Config struct
- Resource management patterns
#### 2. LLM Provider Implementation
- Supports OpenAI and Ollama vision models
- Base64 encoding for OpenAI requests
- Binary format for Ollama requests
- Template-based OCR prompts
#### 3. Google Document AI Provider
- Enterprise-grade OCR processing
- MIME type validation
- Processor configuration via environment
- Regional endpoint support
### Logging Patterns
#### 1. Provider Initialization
```
[INFO] Initializing OCR provider: llm
[INFO] Using LLM OCR provider (provider=ollama, model=minicpm-v)
```
#### 2. Processing Logs
```
[DEBUG] Starting OCR processing
[DEBUG] Image dimensions (width=800, height=1200)
[DEBUG] Using binary image format for non-OpenAI provider
[DEBUG] Sending request to vision model
[INFO] Successfully processed image (content_length=1536)
```
#### 3. Error Logging
```
[ERROR] Failed to decode image: invalid format
[ERROR] Unsupported file type: image/webp
[ERROR] Failed to get response from vision model
```
### Error Handling Patterns
#### 1. Configuration Validation
- Required parameter checks
- Environment variable validation
- Provider-specific configuration
- Connection testing
#### 2. Processing Errors
- Image format validation
- MIME type checking
- Content processing errors
- Provider-specific error handling
#### 3. Error Propagation
- Detailed error contexts
- Original error wrapping
- Logging with error context
- Recovery mechanisms
### Processing Flow
#### 1. Document Processing
```
Document Tagged → OCR Provider Selected → Image Processing → Text Extraction → Content Update
```
#### 2. Provider Selection
```
Config Check → Provider Initialization → Resource Setup → Provider Ready
```
#### 3. Error Recovery
```
Error Detection → Logging → Cleanup → Error Propagation
```
These patterns ensure consistent behavior across OCR providers while maintaining proper logging and error handling throughout the system.

36
go.mod
View file

@ -1,8 +1,8 @@
module paperless-gpt
go 1.22.7
go 1.23.0
toolchain go1.23.6
toolchain go1.24.1
require (
cloud.google.com/go/documentai v1.35.2
@ -12,18 +12,19 @@ require (
github.com/gen2brain/go-fitz v1.24.14
github.com/gin-gonic/gin v1.10.0
github.com/google/uuid v1.6.0
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.10.0
github.com/tmc/langchaingo v0.1.13-pre.1
golang.org/x/sync v0.11.0
google.golang.org/api v0.221.0
github.com/tmc/langchaingo v0.1.13
golang.org/x/sync v0.12.0
google.golang.org/api v0.225.0
gorm.io/driver/sqlite v1.5.7
gorm.io/gorm v1.25.12
)
require (
cloud.google.com/go v0.118.1 // indirect
cloud.google.com/go/auth v0.14.1 // indirect
cloud.google.com/go/auth v0.15.0 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
cloud.google.com/go/compute/metadata v0.6.0 // indirect
cloud.google.com/go/longrunning v0.6.4 // indirect
@ -46,8 +47,9 @@ require (
github.com/go-playground/validator/v10 v10.20.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/google/s2a-go v0.1.9 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.5 // indirect
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/huandu/xstrings v1.5.0 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
@ -75,22 +77,22 @@ require (
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
go.opentelemetry.io/otel v1.34.0 // indirect
go.opentelemetry.io/otel/metric v1.34.0 // indirect
go.opentelemetry.io/otel/trace v1.34.0 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.33.0 // indirect
golang.org/x/net v0.35.0 // indirect
golang.org/x/oauth2 v0.26.0 // indirect
golang.org/x/sys v0.30.0 // indirect
golang.org/x/text v0.22.0 // indirect
golang.org/x/time v0.10.0 // indirect
golang.org/x/crypto v0.36.0 // indirect
golang.org/x/net v0.37.0 // indirect
golang.org/x/oauth2 v0.28.0 // indirect
golang.org/x/sys v0.31.0 // indirect
golang.org/x/text v0.23.0 // indirect
golang.org/x/time v0.11.0 // indirect
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6 // indirect
google.golang.org/grpc v1.70.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect
google.golang.org/grpc v1.71.0 // indirect
google.golang.org/protobuf v1.36.5 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

147
go.sum
View file

@ -1,23 +1,13 @@
cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE=
cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U=
cloud.google.com/go v0.118.1 h1:b8RATMcrK9A4BH0rj8yQupPXp+aP+cJ0l6H7V9osV1E=
cloud.google.com/go v0.118.1/go.mod h1:CFO4UPEPi8oV21xoezZCrd3d81K4fFkDTEJu4R8K+9M=
cloud.google.com/go/auth v0.13.0 h1:8Fu8TZy167JkW8Tj3q7dIkr2v4cndv41ouecJx0PAHs=
cloud.google.com/go/auth v0.13.0/go.mod h1:COOjD9gwfKNKz+IIduatIhYJQIc0mG3H102r/EMxX6Q=
cloud.google.com/go/auth v0.14.1 h1:AwoJbzUdxA/whv1qj3TLKwh3XX5sikny2fc40wUl+h0=
cloud.google.com/go/auth v0.14.1/go.mod h1:4JHUxlGXisL0AW8kXPtUF6ztuOksyfUQNFjfsOCXkPM=
cloud.google.com/go/auth/oauth2adapt v0.2.6 h1:V6a6XDu2lTwPZWOawrAa9HUK+DB2zfJyTuciBG5hFkU=
cloud.google.com/go/auth/oauth2adapt v0.2.6/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8=
cloud.google.com/go/auth v0.15.0 h1:Ly0u4aA5vG/fsSsxu98qCQBemXtAtJf+95z9HK+cxps=
cloud.google.com/go/auth v0.15.0/go.mod h1:WJDGqZ1o9E9wKIL+IwStfyn/+s59zl4Bi+1KQNVXLZ8=
cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M=
cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc=
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
cloud.google.com/go/documentai v1.35.1 h1:52RfiUsoblXcE57CfKJGnITWLxRM30BcqNk/BKZl2LI=
cloud.google.com/go/documentai v1.35.1/go.mod h1:WJjwUAQfwQPJORW8fjz7RODprMULDzEGLA2E6WxenFw=
cloud.google.com/go/documentai v1.35.2 h1:hswVobCWUTXtmn+4QqUIVkai7sDOe0QS2KB3IpqLkik=
cloud.google.com/go/documentai v1.35.2/go.mod h1:oh/0YXosgEq3hVhyH4ZQ7VNXPaveRO4eLVM3tBSZOsI=
cloud.google.com/go/longrunning v0.6.2 h1:xjDfh1pQcWPEvnfjZmwjKQEcHnpz6lHjfy7Fo0MK+hc=
cloud.google.com/go/longrunning v0.6.2/go.mod h1:k/vIs83RN4bE3YCswdXC5PFfWVILjm3hpEUlSko4PiI=
cloud.google.com/go/longrunning v0.6.4 h1:3tyw9rO3E2XVXzSApn1gyEEnH2K9SynNQjMlBi3uHLg=
cloud.google.com/go/longrunning v0.6.4/go.mod h1:ttZpLCe6e7EXvn9OxpBRx7kZEB0efv8yBO6YnVMfhJs=
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
@ -49,8 +39,6 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
@ -74,21 +62,27 @@ github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBEx
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM=
github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o=
github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk=
github.com/googleapis/enterprise-certificate-proxy v0.3.5 h1:VgzTY2jogw3xt39CusEnFJWm7rlsq5yL5q9XdLOuP5g=
github.com/googleapis/enterprise-certificate-proxy v0.3.5/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA=
github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q=
github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
@ -131,8 +125,8 @@ github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAc
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
@ -154,8 +148,8 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tmc/langchaingo v0.1.13-pre.1 h1:r+ma9kl0NuFJGtIrnMPFjEn4RhXktwSI31fIpgiiMm4=
github.com/tmc/langchaingo v0.1.13-pre.1/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
@ -174,105 +168,84 @@ gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJW
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 h1:PS8wXpbyaDJQ2VDHHncMe9Vct0Zn1fEjpsjrLxGJoSc=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0/go.mod h1:HDBUsEjOuRC0EzKZ1bSaRGZWUBAzo+MhAcUUORSr4D0=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q=
go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 h1:rgMkmiGfix9vFJDcDi1PK8WEQP4FLQwLDfhp5ZLpFeE=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0/go.mod h1:ijPqXp5P6IRRByFVVg9DY8P5HkxkHE5ARIa+86aXPf4=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4=
go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU=
go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ=
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc=
golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs=
golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ=
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE=
golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70=
golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE=
golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc=
golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg=
golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
google.golang.org/api v0.214.0 h1:h2Gkq07OYi6kusGOaT/9rnNljuXmqPnaig7WGPmKbwA=
google.golang.org/api v0.214.0/go.mod h1:bYPpLG8AyeMWwDU6NXoB00xC0DFkikVvd5MfwoxjLqE=
google.golang.org/api v0.220.0 h1:3oMI4gdBgB72WFVwE1nerDD8W3HUOS4kypK6rRLbGns=
google.golang.org/api v0.220.0/go.mod h1:26ZAlY6aN/8WgpCzjPNy18QpYaz7Zgg1h0qe1GkZEmY=
google.golang.org/api v0.221.0 h1:qzaJfLhDsbMeFee8zBRdt/Nc+xmOuafD/dbdgGfutOU=
google.golang.org/api v0.221.0/go.mod h1:7sOU2+TL4TxUTdbi0gWgAIg7tH5qBXxoyhtL+9x3biQ=
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 h1:ToEetK57OidYuqD4Q5w+vfEnPvPpuTwedCNVohYJfNk=
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697/go.mod h1:JJrvXBWRZaFMxBufik1a4RpFw4HhgVtBBWQeQgUj2cc=
google.golang.org/api v0.223.0 h1:JUTaWEriXmEy5AhvdMgksGGPEFsYfUKaPEYXd4c3Wvc=
google.golang.org/api v0.223.0/go.mod h1:C+RS7Z+dDwds2b+zoAk5hN/eSfsiCn0UDrYof/M4d2M=
google.golang.org/api v0.224.0 h1:Ir4UPtDsNiwIOHdExr3fAj4xZ42QjK7uQte3lORLJwU=
google.golang.org/api v0.224.0/go.mod h1:3V39my2xAGkodXy0vEqcEtkqgw2GtrFL5WuBZlCTCOQ=
google.golang.org/api v0.225.0 h1:+4/IVqBQm0MV5S+JW3kdEGC1WtOmM2mXN1LKH1LdNlw=
google.golang.org/api v0.225.0/go.mod h1:WP/0Xm4LVvMOCldfvOISnWquSRWbG2kArDZcg+W2DbY=
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 h1:Pw6WnI9W/LIdRxqK7T6XGugGbHIRl5Q7q3BssH6xk4s=
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4/go.mod h1:qbZzneIOXSq+KFAFut9krLfRLZiFLzZL5u2t8SV83EE=
google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697 h1:pgr/4QbFyktUv9CtQ/Fq4gzEE6/Xs7iCXbktaGzLHbQ=
google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697/go.mod h1:+D9ySVjN8nY8YCVjc5O7PZDIdZporIDY3KaGfJunh88=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 h1:CkkIfIt50+lT6NHAVoRYEyAvQGFM7xEwXUUywFvEb3Q=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08=
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 h1:L9JNMl/plZH9wmzQUHleO/ZZDSN+9Gh41wPczNy+5Fk=
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6/go.mod h1:iYONQfRdizDB8JJBybql13nArx91jcUk7zCXEsOofM4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 h1:8ZmaLZE4XWrtU3MyClkYqqtl6Oegr3235h7jxsDyqCY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287 h1:J1H9f+LEdWAfHcez/4cvaVBox7cOYT+IU6rgqj5x++8=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287/go.mod h1:8BS3B93F/U1juMFq9+EDk+qOT5CO1R9IzXxG3PTqiRk=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6 h1:2duwAxN2+k0xLNpjnHTXoMUgnv6VPSp5fiqTuwSxjmI=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6/go.mod h1:8BS3B93F/U1juMFq9+EDk+qOT5CO1R9IzXxG3PTqiRk=
google.golang.org/grpc v1.67.3 h1:OgPcDAFKHnH8X3O4WcO4XUc8GRDeKsKReqbQtiCj7N8=
google.golang.org/grpc v1.67.3/go.mod h1:YGaHCc6Oap+FzBJTZLBzkGSYt/cvGPFTPxkn7QfSU8s=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2 h1:DMTIbak9GhdaSxEjvVzAeNZvyc03I61duqNbnm3SU0M=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e h1:YA5lmSs3zc/5w+xsRcHqpETkaYyK63ivEPzNTcUUlSA=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb h1:TLPQVbx1GJ8VKZxz52VAxl1EBgKXXbTiU9Fc5fZeLn4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM=
google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg=
google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

27
main.go
View file

@ -36,6 +36,10 @@ var (
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
azureDocAIEndpoint = os.Getenv("AZURE_DOCAI_ENDPOINT")
azureDocAIKey = os.Getenv("AZURE_DOCAI_KEY")
azureDocAIModelID = os.Getenv("AZURE_DOCAI_MODEL_ID")
azureDocAITimeout = os.Getenv("AZURE_DOCAI_TIMEOUT_SECONDS")
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
manualTag = os.Getenv("MANUAL_TAG")
autoTag = os.Getenv("AUTO_TAG")
@ -167,6 +171,18 @@ func main() {
GoogleProcessorID: os.Getenv("GOOGLE_PROCESSOR_ID"),
VisionLLMProvider: visionLlmProvider,
VisionLLMModel: visionLlmModel,
AzureEndpoint: azureDocAIEndpoint,
AzureAPIKey: azureDocAIKey,
AzureModelID: azureDocAIModelID,
}
// Parse Azure timeout if set
if azureDocAITimeout != "" {
if timeout, err := strconv.Atoi(azureDocAITimeout); err == nil {
ocrConfig.AzureTimeout = timeout
} else {
log.Warnf("Invalid AZURE_DOCAI_TIMEOUT_SECONDS value: %v, using default", err)
}
}
// If provider is LLM, but no VISION_LLM_PROVIDER is set, don't initialize OCR provider
@ -422,6 +438,17 @@ func validateOrDefaultEnvVars() {
log.Fatal("Please set the LLM_PROVIDER environment variable to 'openai' or 'ollama'.")
}
// Validate OCR provider if set
ocrProvider := os.Getenv("OCR_PROVIDER")
if ocrProvider == "azure" {
if azureDocAIEndpoint == "" {
log.Fatal("Please set the AZURE_DOCAI_ENDPOINT environment variable for Azure provider")
}
if azureDocAIKey == "" {
log.Fatal("Please set the AZURE_DOCAI_KEY environment variable for Azure provider")
}
}
if llmModel == "" {
log.Fatal("Please set the LLM_MODEL environment variable.")
}

13
ocr.go
View file

@ -36,13 +36,20 @@ func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string,
return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
}
ocrText, err := app.ocrProvider.ProcessImage(ctx, imageContent)
result, err := app.ocrProvider.ProcessImage(ctx, imageContent)
if err != nil {
return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
}
pageLogger.Debug("OCR completed for page")
if result == nil {
pageLogger.Error("Got nil result from OCR provider")
return "", fmt.Errorf("error performing OCR for document %d, page %d: nil result", documentID, i+1)
}
ocrTexts = append(ocrTexts, ocrText)
pageLogger.WithField("has_hocr", result.HOCR != "").
WithField("metadata", result.Metadata).
Debug("OCR completed for page")
ocrTexts = append(ocrTexts, result.Text)
}
docLogger.Info("OCR processing completed successfully")

224
ocr/azure_provider.go Normal file
View file

@ -0,0 +1,224 @@
package ocr
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"github.com/gabriel-vasile/mimetype"
"github.com/hashicorp/go-retryablehttp"
"github.com/sirupsen/logrus"
)
const (
apiVersion = "2024-11-30"
defaultModelID = "prebuilt-read"
defaultTimeout = 120
pollingInterval = 2 * time.Second
)
// AzureProvider implements OCR using Azure Document Intelligence
type AzureProvider struct {
endpoint string
apiKey string
modelID string
timeout time.Duration
httpClient *retryablehttp.Client
}
// Request body for Azure Document Intelligence
type analyzeRequest struct {
Base64Source string `json:"base64Source"`
}
func newAzureProvider(config Config) (*AzureProvider, error) {
logger := log.WithFields(logrus.Fields{
"endpoint": config.AzureEndpoint,
"model_id": config.AzureModelID,
})
logger.Info("Creating new Azure Document Intelligence provider")
// Validate required configuration
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
logger.Error("Missing required configuration")
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
}
// Set defaults and create provider
modelID := defaultModelID
if config.AzureModelID != "" {
modelID = config.AzureModelID
}
timeout := defaultTimeout
if config.AzureTimeout > 0 {
timeout = config.AzureTimeout
}
// Configure retryablehttp client
client := retryablehttp.NewClient()
client.RetryMax = 3
client.RetryWaitMin = 1 * time.Second
client.RetryWaitMax = 5 * time.Second
client.Logger = logger
provider := &AzureProvider{
endpoint: config.AzureEndpoint,
apiKey: config.AzureAPIKey,
modelID: modelID,
timeout: time.Duration(timeout) * time.Second,
httpClient: client,
}
logger.Info("Successfully initialized Azure Document Intelligence provider")
return provider, nil
}
func (p *AzureProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
logger := log.WithFields(logrus.Fields{
"model_id": p.modelID,
})
logger.Debug("Starting Azure Document Intelligence processing")
// Detect MIME type
mtype := mimetype.Detect(imageContent)
logger.WithField("mime_type", mtype.String()).Debug("Detected file type")
if !isImageMIMEType(mtype.String()) {
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
}
// Create context with timeout
ctx, cancel := context.WithTimeout(ctx, p.timeout)
defer cancel()
// Submit document for analysis
operationLocation, err := p.submitDocument(ctx, imageContent)
if err != nil {
return nil, fmt.Errorf("error submitting document: %w", err)
}
// Poll for results
result, err := p.pollForResults(ctx, operationLocation)
if err != nil {
return nil, fmt.Errorf("error polling for results: %w", err)
}
// Convert to OCR result
ocrResult := &OCRResult{
Text: result.AnalyzeResult.Content,
Metadata: map[string]string{
"provider": "azure_docai",
"page_count": fmt.Sprintf("%d", len(result.AnalyzeResult.Pages)),
"api_version": result.AnalyzeResult.APIVersion,
},
}
logger.WithFields(logrus.Fields{
"content_length": len(ocrResult.Text),
"page_count": len(result.AnalyzeResult.Pages),
}).Info("Successfully processed document")
return ocrResult, nil
}
func (p *AzureProvider) submitDocument(ctx context.Context, imageContent []byte) (string, error) {
requestURL := fmt.Sprintf("%s/documentintelligence/documentModels/%s:analyze?api-version=%s",
p.endpoint, p.modelID, apiVersion)
// Prepare request body
requestBody := analyzeRequest{
Base64Source: base64.StdEncoding.EncodeToString(imageContent),
}
requestBodyBytes, err := json.Marshal(requestBody)
if err != nil {
return "", fmt.Errorf("error marshaling request body: %w", err)
}
req, err := retryablehttp.NewRequestWithContext(ctx, "POST", requestURL, bytes.NewBuffer(requestBodyBytes))
if err != nil {
return "", fmt.Errorf("error creating HTTP request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
resp, err := p.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("error sending HTTP request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusAccepted {
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(body))
}
operationLocation := resp.Header.Get("Operation-Location")
if operationLocation == "" {
return "", fmt.Errorf("no Operation-Location header in response")
}
return operationLocation, nil
}
func (p *AzureProvider) pollForResults(ctx context.Context, operationLocation string) (*AzureDocumentResult, error) {
logger := log.WithField("operation_location", operationLocation)
logger.Debug("Starting to poll for results")
ticker := time.NewTicker(pollingInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return nil, fmt.Errorf("operation timed out after %v: %w", p.timeout, ctx.Err())
case <-ticker.C:
req, err := retryablehttp.NewRequestWithContext(ctx, "GET", operationLocation, nil)
if err != nil {
return nil, fmt.Errorf("error creating poll request: %w", err)
}
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
resp, err := p.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("error polling for results: %w", err)
}
var result AzureDocumentResult
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
resp.Body.Close()
logger.WithError(err).Error("Failed to decode response")
return nil, fmt.Errorf("error decoding response: %w", err)
}
defer resp.Body.Close()
logger.WithFields(logrus.Fields{
"status_code": resp.StatusCode,
"content_length": len(result.AnalyzeResult.Content),
"page_count": len(result.AnalyzeResult.Pages),
"status": result.Status,
}).Debug("Poll response received")
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code %d while polling", resp.StatusCode)
}
switch result.Status {
case "succeeded":
return &result, nil
case "failed":
return nil, fmt.Errorf("document processing failed")
case "running":
// Continue polling
default:
return nil, fmt.Errorf("unexpected status: %s", result.Status)
}
}
}
}

222
ocr/azure_provider_test.go Normal file
View file

@ -0,0 +1,222 @@
package ocr
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/hashicorp/go-retryablehttp"
"github.com/stretchr/testify/assert"
)
func TestNewAzureProvider(t *testing.T) {
tests := []struct {
name string
config Config
wantErr bool
errContains string
}{
{
name: "valid config",
config: Config{
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
AzureAPIKey: "test-key",
},
wantErr: false,
},
{
name: "valid config with custom model and timeout",
config: Config{
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
AzureAPIKey: "test-key",
AzureModelID: "custom-model",
AzureTimeout: 60,
},
wantErr: false,
},
{
name: "missing endpoint",
config: Config{
AzureAPIKey: "test-key",
},
wantErr: true,
errContains: "missing required Azure Document Intelligence configuration",
},
{
name: "missing api key",
config: Config{
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
},
wantErr: true,
errContains: "missing required Azure Document Intelligence configuration",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
provider, err := newAzureProvider(tt.config)
if tt.wantErr {
assert.Error(t, err)
if tt.errContains != "" {
assert.Contains(t, err.Error(), tt.errContains)
}
return
}
assert.NoError(t, err)
assert.NotNil(t, provider)
// Verify default values
if tt.config.AzureModelID == "" {
assert.Equal(t, defaultModelID, provider.modelID)
} else {
assert.Equal(t, tt.config.AzureModelID, provider.modelID)
}
if tt.config.AzureTimeout == 0 {
assert.Equal(t, time.Duration(defaultTimeout)*time.Second, provider.timeout)
} else {
assert.Equal(t, time.Duration(tt.config.AzureTimeout)*time.Second, provider.timeout)
}
})
}
}
func TestAzureProvider_ProcessImage(t *testing.T) {
// Sample success response
now := time.Now()
successResult := AzureDocumentResult{
Status: "succeeded",
CreatedDateTime: now,
LastUpdatedDateTime: now,
AnalyzeResult: AzureAnalyzeResult{
APIVersion: apiVersion,
ModelID: defaultModelID,
StringIndexType: "utf-16",
Content: "Test document content",
Pages: []AzurePage{
{
PageNumber: 1,
Angle: 0.0,
Width: 800,
Height: 600,
Unit: "pixel",
Lines: []AzureLine{
{
Content: "Test line",
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
Spans: []AzureSpan{{Offset: 0, Length: 9}},
},
},
Spans: []AzureSpan{{Offset: 0, Length: 9}},
},
},
Paragraphs: []AzureParagraph{
{
Content: "Test document content",
Spans: []AzureSpan{{Offset: 0, Length: 19}},
BoundingRegions: []AzureBoundingBox{
{
PageNumber: 1,
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
},
},
},
},
ContentFormat: "text",
},
}
tests := []struct {
name string
setupServer func() *httptest.Server
imageContent []byte
wantErr bool
errContains string
expectedText string
}{
{
name: "successful processing",
setupServer: func() *httptest.Server {
mux := http.NewServeMux()
server := httptest.NewServer(mux)
mux.HandleFunc("/documentintelligence/documentModels/prebuilt-read:analyze", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Operation-Location", fmt.Sprintf("%s/operations/123", server.URL))
w.WriteHeader(http.StatusAccepted)
})
mux.HandleFunc("/operations/123", func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(successResult)
})
return server
},
// Create minimal JPEG content with magic numbers
imageContent: append([]byte{0xFF, 0xD8, 0xFF, 0xE0}, []byte("JFIF test content")...),
expectedText: "Test document content",
},
{
name: "invalid mime type",
setupServer: func() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Log("Server should not be called with invalid mime type")
w.WriteHeader(http.StatusBadRequest)
}))
},
imageContent: []byte("invalid content"),
wantErr: true,
errContains: "unsupported file type",
},
{
name: "submission error",
setupServer: func() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintln(w, "Invalid request")
}))
},
imageContent: []byte{0xFF, 0xD8, 0xFF, 0xE0}, // JPEG magic numbers
wantErr: true,
errContains: "unexpected status code 400",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
server := tt.setupServer()
defer server.Close()
client := retryablehttp.NewClient()
client.HTTPClient = server.Client()
client.Logger = log
provider := &AzureProvider{
endpoint: server.URL,
apiKey: "test-key",
modelID: defaultModelID,
timeout: 5 * time.Second,
httpClient: client,
}
result, err := provider.ProcessImage(context.Background(), tt.imageContent)
if tt.wantErr {
assert.Error(t, err)
if tt.errContains != "" {
assert.Contains(t, err.Error(), tt.errContains)
}
return
}
assert.NoError(t, err)
assert.NotNil(t, result)
assert.Equal(t, tt.expectedText, result.Text)
assert.Equal(t, "azure_docai", result.Metadata["provider"])
assert.Equal(t, apiVersion, result.Metadata["api_version"])
assert.Equal(t, "1", result.Metadata["page_count"])
})
}
}

72
ocr/azure_types.go Normal file
View file

@ -0,0 +1,72 @@
package ocr
import "time"
// AzureDocumentResult represents the root response from Azure Document Intelligence
type AzureDocumentResult struct {
Status string `json:"status"`
CreatedDateTime time.Time `json:"createdDateTime"`
LastUpdatedDateTime time.Time `json:"lastUpdatedDateTime"`
AnalyzeResult AzureAnalyzeResult `json:"analyzeResult"`
}
// AzureAnalyzeResult represents the analyze result part of the Azure Document Intelligence response
type AzureAnalyzeResult struct {
APIVersion string `json:"apiVersion"`
ModelID string `json:"modelId"`
StringIndexType string `json:"stringIndexType"`
Content string `json:"content"`
Pages []AzurePage `json:"pages"`
Paragraphs []AzureParagraph `json:"paragraphs"`
Styles []interface{} `json:"styles"`
ContentFormat string `json:"contentFormat"`
}
// AzurePage represents a single page in the document
type AzurePage struct {
PageNumber int `json:"pageNumber"`
Angle float64 `json:"angle"`
Width int `json:"width"`
Height int `json:"height"`
Unit string `json:"unit"`
Words []AzureWord `json:"words"`
Lines []AzureLine `json:"lines"`
Spans []AzureSpan `json:"spans"`
}
// AzureWord represents a single word with its properties
type AzureWord struct {
Content string `json:"content"`
Polygon []int `json:"polygon"`
Confidence float64 `json:"confidence"`
Span AzureSpan `json:"span"`
}
// AzureLine represents a line of text
type AzureLine struct {
Content string `json:"content"`
Polygon []int `json:"polygon"`
Spans []AzureSpan `json:"spans"`
}
// AzureSpan represents a span of text with offset and length
type AzureSpan struct {
Offset int `json:"offset"`
Length int `json:"length"`
}
// AzureParagraph represents a paragraph of text
type AzureParagraph struct {
Content string `json:"content"`
Spans []AzureSpan `json:"spans"`
BoundingRegions []AzureBoundingBox `json:"boundingRegions"`
}
// AzureBoundingBox represents the location of content on a page
type AzureBoundingBox struct {
PageNumber int `json:"pageNumber"`
Polygon []int `json:"polygon"`
}
// AzureStyle represents style information for text segments - changed to interface{} as per input
type AzureStyle interface{}

View file

@ -3,6 +3,8 @@ package ocr
import (
"context"
"fmt"
"html"
"strings"
documentai "cloud.google.com/go/documentai/apiv1"
"cloud.google.com/go/documentai/apiv1/documentaipb"
@ -46,7 +48,7 @@ func newGoogleDocAIProvider(config Config) (*GoogleDocAIProvider, error) {
return provider, nil
}
func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (string, error) {
func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
logger := log.WithFields(logrus.Fields{
"project_id": p.projectID,
"location": p.location,
@ -60,7 +62,7 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
if !isImageMIMEType(mtype.String()) {
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
return "", fmt.Errorf("unsupported file type: %s", mtype.String())
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
}
name := fmt.Sprintf("projects/%s/locations/%s/processors/%s", p.projectID, p.location, p.processorID)
@ -79,21 +81,56 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
resp, err := p.client.ProcessDocument(ctx, req)
if err != nil {
logger.WithError(err).Error("Failed to process document")
return "", fmt.Errorf("error processing document: %w", err)
return nil, fmt.Errorf("error processing document: %w", err)
}
if resp == nil || resp.Document == nil {
logger.Error("Received nil response or document from Document AI")
return "", fmt.Errorf("received nil response or document from Document AI")
return nil, fmt.Errorf("received nil response or document from Document AI")
}
if resp.Document.Error != nil {
logger.WithField("error", resp.Document.Error.Message).Error("Document processing error")
return "", fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
}
logger.WithField("content_length", len(resp.Document.Text)).Info("Successfully processed document")
return resp.Document.Text, nil
metadata := map[string]string{
"provider": "google_docai",
"mime_type": mtype.String(),
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
"processor_id": p.processorID,
}
// Safely add language code if available
if pages := resp.Document.GetPages(); len(pages) > 0 {
if langs := pages[0].GetDetectedLanguages(); len(langs) > 0 {
metadata["lang_code"] = langs[0].GetLanguageCode()
}
}
result := &OCRResult{
Text: resp.Document.Text,
Metadata: metadata,
}
// Add hOCR output if available
if len(resp.Document.GetPages()) > 0 {
var hocr string
func() {
defer func() {
if r := recover(); r != nil {
logger.WithField("error", r).Error("Panic during hOCR generation")
}
}()
hocr = generateHOCR(resp.Document)
}()
if hocr != "" {
result.HOCR = hocr
}
}
logger.WithField("content_length", len(result.Text)).Info("Successfully processed document")
return result, nil
}
// isImageMIMEType checks if the given MIME type is a supported image type
@ -109,6 +146,83 @@ func isImageMIMEType(mimeType string) bool {
return supportedTypes[mimeType]
}
// generateHOCR converts Document AI response to hOCR format
func generateHOCR(doc *documentaipb.Document) string {
if len(doc.GetPages()) == 0 {
return ""
}
var hocr strings.Builder
hocr.WriteString(`<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>OCR Output</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name='ocr-system' content='google-docai' />
</head>
<body>`)
for pageNum, page := range doc.GetPages() {
pageWidth := page.GetDimension().GetWidth()
pageHeight := page.GetDimension().GetHeight()
// Validate dimensions
if pageWidth <= 0 || pageHeight <= 0 {
continue
}
hocr.WriteString(fmt.Sprintf(`
<div class='ocr_page' id='page_%d' title='image;bbox 0 0 %d %d'>`,
pageNum+1, int(pageWidth), int(pageHeight)))
// Process paragraphs
for _, para := range page.GetParagraphs() {
paraBox := para.GetLayout().GetBoundingPoly().GetNormalizedVertices()
if len(paraBox) < 4 {
continue
}
// Convert normalized coordinates to absolute
// Use float64 for intermediate calculations to prevent overflow
x1 := int(float64(paraBox[0].GetX()) * float64(pageWidth))
y1 := int(float64(paraBox[0].GetY()) * float64(pageHeight))
x2 := int(float64(paraBox[2].GetX()) * float64(pageWidth))
y2 := int(float64(paraBox[2].GetY()) * float64(pageHeight))
// Validate coordinates
if x1 < 0 || y1 < 0 || x2 < 0 || y2 < 0 ||
x1 > int(pageWidth) || y1 > int(pageHeight) ||
x2 > int(pageWidth) || y2 > int(pageHeight) {
continue
}
hocr.WriteString(fmt.Sprintf(`
<p class='ocr_par' id='par_%d_%d' title='bbox %d %d %d %d'>`,
pageNum+1, len(page.GetParagraphs()), x1, y1, x2, y2))
// Process words within paragraph
for _, token := range para.GetLayout().GetTextAnchor().GetTextSegments() {
text := doc.Text[token.GetStartIndex():token.GetEndIndex()]
if text == "" {
continue
}
// Escape HTML special characters
text = html.EscapeString(text)
hocr.WriteString(fmt.Sprintf(`
<span class='ocrx_word'>%s</span>`, text))
}
hocr.WriteString("\n </p>")
}
hocr.WriteString("\n </div>")
}
hocr.WriteString("\n</body>\n</html>")
return hocr.String()
}
// Close releases resources used by the provider
func (p *GoogleDocAIProvider) Close() error {
if p.client != nil {

View file

@ -0,0 +1,94 @@
package ocr
import (
"regexp"
"strings"
"testing"
"cloud.google.com/go/documentai/apiv1/documentaipb"
)
func TestGenerateHOCR(t *testing.T) {
tests := []struct {
name string
doc *documentaipb.Document
expected string
}{
{
name: "empty document",
doc: &documentaipb.Document{},
expected: "",
},
{
name: "single page with one paragraph",
doc: &documentaipb.Document{
Text: "Hello World",
Pages: []*documentaipb.Document_Page{
{
Dimension: &documentaipb.Document_Page_Dimension{
Width: 800,
Height: 600,
},
Paragraphs: []*documentaipb.Document_Page_Paragraph{
{
Layout: &documentaipb.Document_Page_Layout{
BoundingPoly: &documentaipb.BoundingPoly{
NormalizedVertices: []*documentaipb.NormalizedVertex{
{X: 0.1, Y: 0.1},
{X: 0.9, Y: 0.1},
{X: 0.9, Y: 0.2},
{X: 0.1, Y: 0.2},
},
},
TextAnchor: &documentaipb.Document_TextAnchor{
TextSegments: []*documentaipb.Document_TextAnchor_TextSegment{
{
StartIndex: 0,
EndIndex: 11,
},
},
},
},
},
},
},
},
},
expected: "(?s).*<div class='ocr_page' id='page_1' title='image;bbox 0 0 800 600'>.*" +
"<p class='ocr_par' id='par_1_1' title='bbox 80 60 719 120'>.*" +
"<span class='ocrx_word'>Hello World</span>.*</p>.*</div>.*",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := generateHOCR(tt.doc)
if tt.expected == "" {
if result != "" {
t.Errorf("expected empty string, got %v", result)
}
return
}
matched, err := regexp.MatchString(tt.expected, result)
if err != nil {
t.Fatalf("error matching regex: %v", err)
}
if !matched {
t.Errorf("expected to match regex %v\ngot: %v", tt.expected, result)
}
// Verify basic hOCR structure
if !strings.Contains(result, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>") {
t.Error("missing XML declaration")
}
if !strings.Contains(result, "<html xmlns=\"http://www.w3.org/1999/xhtml\"") {
t.Error("missing HTML namespace")
}
if !strings.Contains(result, "<meta name='ocr-system' content='google-docai'") {
t.Error("missing OCR system metadata")
}
})
}
}

View file

@ -60,7 +60,7 @@ func newLLMProvider(config Config) (*LLMProvider, error) {
}, nil
}
func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (string, error) {
func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
logger := log.WithFields(logrus.Fields{
"provider": p.provider,
"model": p.model,
@ -71,7 +71,7 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (st
img, _, err := image.Decode(bytes.NewReader(imageContent))
if err != nil {
logger.WithError(err).Error("Failed to decode image")
return "", fmt.Errorf("error decoding image: %w", err)
return nil, fmt.Errorf("error decoding image: %w", err)
}
bounds := img.Bounds()
logger.WithFields(logrus.Fields{
@ -106,11 +106,18 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (st
})
if err != nil {
logger.WithError(err).Error("Failed to get response from vision model")
return "", fmt.Errorf("error getting response from LLM: %w", err)
return nil, fmt.Errorf("error getting response from LLM: %w", err)
}
logger.WithField("content_length", len(completion.Choices[0].Content)).Info("Successfully processed image")
return completion.Choices[0].Content, nil
result := &OCRResult{
Text: completion.Choices[0].Content,
Metadata: map[string]string{
"provider": p.provider,
"model": p.model,
},
}
logger.WithField("content_length", len(result.Text)).Info("Successfully processed image")
return result, nil
}
// createOpenAIClient creates a new OpenAI vision model client

View file

@ -9,14 +9,26 @@ import (
var log = logrus.New()
// OCRResult holds the output from OCR processing
type OCRResult struct {
// Plain text output (required)
Text string
// hOCR output (optional, if provider supports it)
HOCR string
// Additional provider-specific metadata
Metadata map[string]string
}
// Provider defines the interface for OCR processing
type Provider interface {
ProcessImage(ctx context.Context, imageContent []byte) (string, error)
ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error)
}
// Config holds the OCR provider configuration
type Config struct {
// Provider type (e.g., "llm", "google_docai")
// Provider type (e.g., "llm", "google_docai", "azure")
Provider string
// Google Document AI settings
@ -27,6 +39,15 @@ type Config struct {
// LLM settings (from existing config)
VisionLLMProvider string
VisionLLMModel string
// Azure Document Intelligence settings
AzureEndpoint string
AzureAPIKey string
AzureModelID string // Optional, defaults to "prebuilt-read"
AzureTimeout int // Optional, defaults to 120 seconds
// OCR output options
EnableHOCR bool // Whether to request hOCR output if supported by the provider
}
// NewProvider creates a new OCR provider based on configuration
@ -54,6 +75,12 @@ func NewProvider(config Config) (Provider, error) {
}).Info("Using LLM OCR provider")
return newLLMProvider(config)
case "azure":
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
}
return newAzureProvider(config)
default:
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
}

View file

@ -94,7 +94,6 @@ func (client *PaperlessClient) Do(ctx context.Context, method, path string, body
log.WithFields(logrus.Fields{
"method": method,
"url": url,
"headers": req.Header,
}).Debug("Making HTTP request")
resp, err := client.HTTPClient.Do(req)

File diff suppressed because it is too large Load diff

View file

@ -43,7 +43,7 @@
"eslint": "^9.9.0",
"eslint-plugin-react-hooks": "^5.1.0-rc.0",
"eslint-plugin-react-refresh": "^0.4.9",
"globals": "^15.9.0",
"globals": "^16.0.0",
"node-fetch": "^3.3.0",
"postcss": "^8.4.47",
"tailwindcss": "^3.4.12",