mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-13 05:08:01 -05:00
Compare commits
32 commits
Author | SHA1 | Date | |
---|---|---|---|
|
53bfc7d252 | ||
|
14567480a8 | ||
|
72a23ac51e | ||
|
60c141e815 | ||
|
3788abc9cb | ||
|
d477e26048 | ||
|
184c1a8600 | ||
|
cbd9c5438c | ||
|
360663b05b | ||
|
7c7449e197 | ||
|
a5a5afe276 | ||
|
3bb1415b25 | ||
|
df396dac78 | ||
|
c514914b4e | ||
|
5ac6bb7532 | ||
|
6c03fca89c | ||
|
0b5b367b0a | ||
|
9b0204180f | ||
|
5c6f50a1a3 | ||
|
74e4419b7c | ||
|
fc1d69a93b | ||
|
1d6cea481f | ||
|
c95487e834 | ||
|
3bcee9c1c5 | ||
|
ad74e28473 | ||
|
b5fb1cb040 | ||
|
992b3b824b | ||
|
474fde659c | ||
|
80365f95a0 | ||
|
bfe1e00392 | ||
|
c1b4f8344f | ||
|
b6b8948fe3 |
17 changed files with 1343 additions and 413 deletions
|
@ -25,7 +25,7 @@ COPY web-app /app/
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Stage 2: Build the Go binary
|
# Stage 2: Build the Go binary
|
||||||
FROM golang:1.24.0-alpine3.21 AS builder
|
FROM golang:1.24.1-alpine3.21 AS builder
|
||||||
|
|
||||||
# Set the working directory inside the container
|
# Set the working directory inside the container
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
@ -82,7 +82,7 @@ RUN sed -i \
|
||||||
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
|
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
|
||||||
|
|
||||||
# Stage 3: Create a lightweight image with just the binary
|
# Stage 3: Create a lightweight image with just the binary
|
||||||
FROM alpine:3.21.2
|
FROM alpine:3.21.3
|
||||||
|
|
||||||
ENV GIN_MODE=release
|
ENV GIN_MODE=release
|
||||||
|
|
||||||
|
|
112
README.md
112
README.md
|
@ -22,7 +22,7 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
|
|
||||||
- **LLM OCR**: Use OpenAI or Ollama to extract text from images.
|
- **LLM OCR**: Use OpenAI or Ollama to extract text from images.
|
||||||
- **Google Document AI**: Leverage Google's powerful Document AI for OCR tasks.
|
- **Google Document AI**: Leverage Google's powerful Document AI for OCR tasks.
|
||||||
- **More to come**: Stay tuned for more OCR providers!
|
- **Azure Document Intelligence**: Use Microsoft's enterprise OCR solution.
|
||||||
|
|
||||||
3. **Automatic Title & Tag Generation**
|
3. **Automatic Title & Tag Generation**
|
||||||
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
|
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
|
||||||
|
@ -39,11 +39,11 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
|
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
|
||||||
|
|
||||||
7. **Simple Docker Deployment**
|
7. **Simple Docker Deployment**
|
||||||
A few environment variables, and you’re off! Compose it alongside paperless-ngx with minimal fuss.
|
A few environment variables, and you're off! Compose it alongside paperless-ngx with minimal fuss.
|
||||||
|
|
||||||
8. **Unified Web UI**
|
8. **Unified Web UI**
|
||||||
|
|
||||||
- **Manual Review**: Approve or tweak AI’s suggestions.
|
- **Manual Review**: Approve or tweak AI's suggestions.
|
||||||
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
|
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
@ -56,6 +56,12 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
- [Installation](#installation)
|
- [Installation](#installation)
|
||||||
- [Docker Compose](#docker-compose)
|
- [Docker Compose](#docker-compose)
|
||||||
- [Manual Setup](#manual-setup)
|
- [Manual Setup](#manual-setup)
|
||||||
|
- [OCR Providers](#ocr-providers)
|
||||||
|
- [LLM-based OCR](#1-llm-based-ocr-default)
|
||||||
|
- [Azure Document Intelligence](#2-azure-document-intelligence)
|
||||||
|
- [Google Document AI](#3-google-document-ai)
|
||||||
|
- [Comparing OCR Providers](#comparing-ocr-providers)
|
||||||
|
- [Choosing the Right Provider](#choosing-the-right-provider)
|
||||||
- [Configuration](#configuration)
|
- [Configuration](#configuration)
|
||||||
- [Environment Variables](#environment-variables)
|
- [Environment Variables](#environment-variables)
|
||||||
- [Custom Prompt Templates](#custom-prompt-templates)
|
- [Custom Prompt Templates](#custom-prompt-templates)
|
||||||
|
@ -86,7 +92,7 @@ https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
|
||||||
|
|
||||||
#### Docker Compose
|
#### Docker Compose
|
||||||
|
|
||||||
Here’s an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
|
Here's an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
services:
|
services:
|
||||||
|
@ -124,6 +130,13 @@ services:
|
||||||
# GOOGLE_PROCESSOR_ID: 'processor-id' # Your processor ID
|
# GOOGLE_PROCESSOR_ID: 'processor-id' # Your processor ID
|
||||||
# GOOGLE_APPLICATION_CREDENTIALS: '/app/credentials.json' # Path to service account key
|
# GOOGLE_APPLICATION_CREDENTIALS: '/app/credentials.json' # Path to service account key
|
||||||
|
|
||||||
|
# Option 3: Azure Document Intelligence
|
||||||
|
# OCR_PROVIDER: 'azure' # Use Azure Document Intelligence
|
||||||
|
# AZURE_DOCAI_ENDPOINT: 'your-endpoint' # Your Azure endpoint URL
|
||||||
|
# AZURE_DOCAI_KEY: 'your-key' # Your Azure API key
|
||||||
|
# AZURE_DOCAI_MODEL_ID: 'prebuilt-read' # Optional, defaults to prebuilt-read
|
||||||
|
# AZURE_DOCAI_TIMEOUT_SECONDS: '120' # Optional, defaults to 120 seconds
|
||||||
|
|
||||||
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto
|
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto
|
||||||
OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit.
|
OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit.
|
||||||
LOG_LEVEL: "info" # Optional: debug, warn, error
|
LOG_LEVEL: "info" # Optional: debug, warn, error
|
||||||
|
@ -172,6 +185,63 @@ services:
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
## OCR Providers
|
||||||
|
|
||||||
|
paperless-gpt supports three different OCR providers, each with unique strengths and capabilities:
|
||||||
|
|
||||||
|
### 1. LLM-based OCR (Default)
|
||||||
|
- **Key Features**:
|
||||||
|
- Uses vision-capable LLMs like GPT-4V or MiniCPM-V
|
||||||
|
- High accuracy with complex layouts and difficult scans
|
||||||
|
- Context-aware text recognition
|
||||||
|
- Self-correcting capabilities for OCR errors
|
||||||
|
- **Best For**:
|
||||||
|
- Complex or unusual document layouts
|
||||||
|
- Poor quality scans
|
||||||
|
- Documents with mixed languages
|
||||||
|
- **Configuration**:
|
||||||
|
```yaml
|
||||||
|
OCR_PROVIDER: "llm"
|
||||||
|
VISION_LLM_PROVIDER: "openai" # or "ollama"
|
||||||
|
VISION_LLM_MODEL: "gpt-4v" # or "minicpm-v"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Azure Document Intelligence
|
||||||
|
- **Key Features**:
|
||||||
|
- Enterprise-grade OCR solution
|
||||||
|
- Prebuilt models for common document types
|
||||||
|
- Layout preservation and table detection
|
||||||
|
- Fast processing speeds
|
||||||
|
- **Best For**:
|
||||||
|
- Business documents and forms
|
||||||
|
- High-volume processing
|
||||||
|
- Documents requiring layout analysis
|
||||||
|
- **Configuration**:
|
||||||
|
```yaml
|
||||||
|
OCR_PROVIDER: "azure"
|
||||||
|
AZURE_DOCAI_ENDPOINT: "https://your-endpoint.cognitiveservices.azure.com/"
|
||||||
|
AZURE_DOCAI_KEY: "your-key"
|
||||||
|
AZURE_DOCAI_MODEL_ID: "prebuilt-read" # optional
|
||||||
|
AZURE_DOCAI_TIMEOUT_SECONDS: "120" # optional
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Google Document AI
|
||||||
|
- **Key Features**:
|
||||||
|
- Specialized document processors
|
||||||
|
- Strong form field detection
|
||||||
|
- Multi-language support
|
||||||
|
- High accuracy on structured documents
|
||||||
|
- **Best For**:
|
||||||
|
- Forms and structured documents
|
||||||
|
- Documents with tables
|
||||||
|
- Multi-language documents
|
||||||
|
- **Configuration**:
|
||||||
|
```yaml
|
||||||
|
OCR_PROVIDER: "google_docai"
|
||||||
|
GOOGLE_PROJECT_ID: "your-project"
|
||||||
|
GOOGLE_LOCATION: "us"
|
||||||
|
GOOGLE_PROCESSOR_ID: "processor-id"
|
||||||
|
```
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
|
@ -192,9 +262,13 @@ services:
|
||||||
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No | |
|
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No | |
|
||||||
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). | No | English |
|
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). | No | English |
|
||||||
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No | |
|
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No | |
|
||||||
| `OCR_PROVIDER` | OCR provider to use (`llm` or `google_docai`). | No | llm |
|
| `OCR_PROVIDER` | OCR provider to use (`llm`, `azure`, or `google_docai`). | No | llm |
|
||||||
| `VISION_LLM_PROVIDER` | AI backend for LLM OCR (`openai` or `ollama`). Required if OCR_PROVIDER is `llm`. | Cond. | |
|
| `VISION_LLM_PROVIDER` | AI backend for LLM OCR (`openai` or `ollama`). Required if OCR_PROVIDER is `llm`. | Cond. | |
|
||||||
| `VISION_LLM_MODEL` | Model name for LLM OCR (e.g. `minicpm-v`). Required if OCR_PROVIDER is `llm`. | Cond. | |
|
| `VISION_LLM_MODEL` | Model name for LLM OCR (e.g. `minicpm-v`). Required if OCR_PROVIDER is `llm`. | Cond. | |
|
||||||
|
| `AZURE_DOCAI_ENDPOINT` | Azure Document Intelligence endpoint. Required if OCR_PROVIDER is `azure`. | Cond. | |
|
||||||
|
| `AZURE_DOCAI_KEY` | Azure Document Intelligence API key. Required if OCR_PROVIDER is `azure`. | Cond. | |
|
||||||
|
| `AZURE_DOCAI_MODEL_ID` | Azure Document Intelligence model ID. Optional if using `azure` provider. | No | prebuilt-read |
|
||||||
|
| `AZURE_DOCAI_TIMEOUT_SECONDS` | Azure Document Intelligence timeout in seconds. | No | 120 |
|
||||||
| `GOOGLE_PROJECT_ID` | Google Cloud project ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
| `GOOGLE_PROJECT_ID` | Google Cloud project ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
||||||
| `GOOGLE_LOCATION` | Google Cloud region (e.g. `us`, `eu`). Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
| `GOOGLE_LOCATION` | Google Cloud region (e.g. `us`, `eu`). Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
||||||
| `GOOGLE_PROCESSOR_ID` | Document AI processor ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
| `GOOGLE_PROCESSOR_ID` | Document AI processor ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
|
||||||
|
@ -211,7 +285,7 @@ services:
|
||||||
|
|
||||||
### Custom Prompt Templates
|
### Custom Prompt Templates
|
||||||
|
|
||||||
paperless-gpt’s flexible **prompt templates** let you shape how AI responds:
|
paperless-gpt's flexible **prompt templates** let you shape how AI responds:
|
||||||
|
|
||||||
1. **`title_prompt.tmpl`**: For document titles.
|
1. **`title_prompt.tmpl`**: For document titles.
|
||||||
2. **`tag_prompt.tmpl`**: For tagging logic.
|
2. **`tag_prompt.tmpl`**: For tagging logic.
|
||||||
|
@ -232,13 +306,11 @@ Then tweak at will—**paperless-gpt** reloads them automatically on startup!
|
||||||
Each template has access to specific variables:
|
Each template has access to specific variables:
|
||||||
|
|
||||||
**title_prompt.tmpl**:
|
**title_prompt.tmpl**:
|
||||||
|
|
||||||
- `{{.Language}}` - Target language (e.g., "English")
|
- `{{.Language}}` - Target language (e.g., "English")
|
||||||
- `{{.Content}}` - Document content text
|
- `{{.Content}}` - Document content text
|
||||||
- `{{.Title}}` - Original document title
|
- `{{.Title}}` - Original document title
|
||||||
|
|
||||||
**tag_prompt.tmpl**:
|
**tag_prompt.tmpl**:
|
||||||
|
|
||||||
- `{{.Language}}` - Target language
|
- `{{.Language}}` - Target language
|
||||||
- `{{.AvailableTags}}` - List of existing tags in paperless-ngx
|
- `{{.AvailableTags}}` - List of existing tags in paperless-ngx
|
||||||
- `{{.OriginalTags}}` - Document's current tags
|
- `{{.OriginalTags}}` - Document's current tags
|
||||||
|
@ -246,11 +318,9 @@ Each template has access to specific variables:
|
||||||
- `{{.Content}}` - Document content text
|
- `{{.Content}}` - Document content text
|
||||||
|
|
||||||
**ocr_prompt.tmpl**:
|
**ocr_prompt.tmpl**:
|
||||||
|
|
||||||
- `{{.Language}}` - Target language
|
- `{{.Language}}` - Target language
|
||||||
|
|
||||||
**correspondent_prompt.tmpl**:
|
**correspondent_prompt.tmpl**:
|
||||||
|
|
||||||
- `{{.Language}}` - Target language
|
- `{{.Language}}` - Target language
|
||||||
- `{{.AvailableCorrespondents}}` - List of existing correspondents
|
- `{{.AvailableCorrespondents}}` - List of existing correspondents
|
||||||
- `{{.BlackList}}` - List of blacklisted correspondent names
|
- `{{.BlackList}}` - List of blacklisted correspondent names
|
||||||
|
@ -265,23 +335,25 @@ The templates use Go's text/template syntax. paperless-gpt automatically reloads
|
||||||
|
|
||||||
1. **Tag Documents**
|
1. **Tag Documents**
|
||||||
|
|
||||||
- Add `paperless-gpt` or your custom tag to the docs you want to AI-ify.
|
- Add `paperless-gpt` tag to documents for manual processing
|
||||||
|
- Add `paperless-gpt-auto` for automatic processing
|
||||||
|
- Add `paperless-gpt-ocr-auto` for automatic OCR processing
|
||||||
|
|
||||||
2. **Visit Web UI**
|
2. **Visit Web UI**
|
||||||
|
|
||||||
- Go to `http://localhost:8080` (or your host) in your browser.
|
- Go to `http://localhost:8080` (or your host) in your browser
|
||||||
|
- Review documents tagged for processing
|
||||||
|
|
||||||
3. **Generate & Apply Suggestions**
|
3. **Generate & Apply Suggestions**
|
||||||
|
|
||||||
- Click “Generate Suggestions” to see AI-proposed titles/tags/correspondents.
|
- Click "Generate Suggestions" to see AI-proposed titles/tags/correspondents
|
||||||
- Approve, edit, or discard. Hit “Apply” to finalize in paperless-ngx.
|
- Review and approve or edit suggestions
|
||||||
|
- Click "Apply" to save changes to paperless-ngx
|
||||||
4. **Try LLM-Based OCR (Experimental)**
|
|
||||||
- If you enabled `VISION_LLM_PROVIDER` and `VISION_LLM_MODEL`, let AI-based OCR read your scanned PDFs.
|
|
||||||
- Tag those documents with `paperless-gpt-ocr-auto` (or your custom `AUTO_OCR_TAG`).
|
|
||||||
|
|
||||||
**Tip**: The entire pipeline can be **fully automated** if you prefer minimal manual intervention.
|
|
||||||
|
|
||||||
|
4. **OCR Processing**
|
||||||
|
- Tag documents with appropriate OCR tag to process them
|
||||||
|
- Monitor progress in the Web UI
|
||||||
|
- Review results and apply changes
|
||||||
---
|
---
|
||||||
|
|
||||||
## LLM-Based OCR: Compare for Yourself
|
## LLM-Based OCR: Compare for Yourself
|
||||||
|
|
|
@ -92,3 +92,88 @@
|
||||||
- E2E tests for web interface
|
- E2E tests for web interface
|
||||||
- Test fixtures and mocks
|
- Test fixtures and mocks
|
||||||
- Playwright for frontend testing
|
- Playwright for frontend testing
|
||||||
|
|
||||||
|
## OCR System Patterns
|
||||||
|
|
||||||
|
### OCR Provider Architecture
|
||||||
|
|
||||||
|
#### 1. Provider Interface
|
||||||
|
- Common interface for all OCR implementations
|
||||||
|
- Methods for image processing
|
||||||
|
- Configuration through standardized Config struct
|
||||||
|
- Resource management patterns
|
||||||
|
|
||||||
|
#### 2. LLM Provider Implementation
|
||||||
|
- Supports OpenAI and Ollama vision models
|
||||||
|
- Base64 encoding for OpenAI requests
|
||||||
|
- Binary format for Ollama requests
|
||||||
|
- Template-based OCR prompts
|
||||||
|
|
||||||
|
#### 3. Google Document AI Provider
|
||||||
|
- Enterprise-grade OCR processing
|
||||||
|
- MIME type validation
|
||||||
|
- Processor configuration via environment
|
||||||
|
- Regional endpoint support
|
||||||
|
|
||||||
|
### Logging Patterns
|
||||||
|
|
||||||
|
#### 1. Provider Initialization
|
||||||
|
```
|
||||||
|
[INFO] Initializing OCR provider: llm
|
||||||
|
[INFO] Using LLM OCR provider (provider=ollama, model=minicpm-v)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2. Processing Logs
|
||||||
|
```
|
||||||
|
[DEBUG] Starting OCR processing
|
||||||
|
[DEBUG] Image dimensions (width=800, height=1200)
|
||||||
|
[DEBUG] Using binary image format for non-OpenAI provider
|
||||||
|
[DEBUG] Sending request to vision model
|
||||||
|
[INFO] Successfully processed image (content_length=1536)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Error Logging
|
||||||
|
```
|
||||||
|
[ERROR] Failed to decode image: invalid format
|
||||||
|
[ERROR] Unsupported file type: image/webp
|
||||||
|
[ERROR] Failed to get response from vision model
|
||||||
|
```
|
||||||
|
|
||||||
|
### Error Handling Patterns
|
||||||
|
|
||||||
|
#### 1. Configuration Validation
|
||||||
|
- Required parameter checks
|
||||||
|
- Environment variable validation
|
||||||
|
- Provider-specific configuration
|
||||||
|
- Connection testing
|
||||||
|
|
||||||
|
#### 2. Processing Errors
|
||||||
|
- Image format validation
|
||||||
|
- MIME type checking
|
||||||
|
- Content processing errors
|
||||||
|
- Provider-specific error handling
|
||||||
|
|
||||||
|
#### 3. Error Propagation
|
||||||
|
- Detailed error contexts
|
||||||
|
- Original error wrapping
|
||||||
|
- Logging with error context
|
||||||
|
- Recovery mechanisms
|
||||||
|
|
||||||
|
### Processing Flow
|
||||||
|
|
||||||
|
#### 1. Document Processing
|
||||||
|
```
|
||||||
|
Document Tagged → OCR Provider Selected → Image Processing → Text Extraction → Content Update
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2. Provider Selection
|
||||||
|
```
|
||||||
|
Config Check → Provider Initialization → Resource Setup → Provider Ready
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Error Recovery
|
||||||
|
```
|
||||||
|
Error Detection → Logging → Cleanup → Error Propagation
|
||||||
|
```
|
||||||
|
|
||||||
|
These patterns ensure consistent behavior across OCR providers while maintaining proper logging and error handling throughout the system.
|
||||||
|
|
36
go.mod
36
go.mod
|
@ -1,8 +1,8 @@
|
||||||
module paperless-gpt
|
module paperless-gpt
|
||||||
|
|
||||||
go 1.22.7
|
go 1.23.0
|
||||||
|
|
||||||
toolchain go1.23.6
|
toolchain go1.24.1
|
||||||
|
|
||||||
require (
|
require (
|
||||||
cloud.google.com/go/documentai v1.35.2
|
cloud.google.com/go/documentai v1.35.2
|
||||||
|
@ -12,18 +12,19 @@ require (
|
||||||
github.com/gen2brain/go-fitz v1.24.14
|
github.com/gen2brain/go-fitz v1.24.14
|
||||||
github.com/gin-gonic/gin v1.10.0
|
github.com/gin-gonic/gin v1.10.0
|
||||||
github.com/google/uuid v1.6.0
|
github.com/google/uuid v1.6.0
|
||||||
|
github.com/hashicorp/go-retryablehttp v0.7.7
|
||||||
github.com/sirupsen/logrus v1.9.3
|
github.com/sirupsen/logrus v1.9.3
|
||||||
github.com/stretchr/testify v1.10.0
|
github.com/stretchr/testify v1.10.0
|
||||||
github.com/tmc/langchaingo v0.1.13-pre.1
|
github.com/tmc/langchaingo v0.1.13
|
||||||
golang.org/x/sync v0.11.0
|
golang.org/x/sync v0.12.0
|
||||||
google.golang.org/api v0.221.0
|
google.golang.org/api v0.225.0
|
||||||
gorm.io/driver/sqlite v1.5.7
|
gorm.io/driver/sqlite v1.5.7
|
||||||
gorm.io/gorm v1.25.12
|
gorm.io/gorm v1.25.12
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
cloud.google.com/go v0.118.1 // indirect
|
cloud.google.com/go v0.118.1 // indirect
|
||||||
cloud.google.com/go/auth v0.14.1 // indirect
|
cloud.google.com/go/auth v0.15.0 // indirect
|
||||||
cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
|
cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
|
||||||
cloud.google.com/go/compute/metadata v0.6.0 // indirect
|
cloud.google.com/go/compute/metadata v0.6.0 // indirect
|
||||||
cloud.google.com/go/longrunning v0.6.4 // indirect
|
cloud.google.com/go/longrunning v0.6.4 // indirect
|
||||||
|
@ -46,8 +47,9 @@ require (
|
||||||
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
||||||
github.com/goccy/go-json v0.10.2 // indirect
|
github.com/goccy/go-json v0.10.2 // indirect
|
||||||
github.com/google/s2a-go v0.1.9 // indirect
|
github.com/google/s2a-go v0.1.9 // indirect
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
|
github.com/googleapis/enterprise-certificate-proxy v0.3.5 // indirect
|
||||||
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
|
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
|
||||||
|
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
|
||||||
github.com/huandu/xstrings v1.5.0 // indirect
|
github.com/huandu/xstrings v1.5.0 // indirect
|
||||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||||
github.com/jinzhu/now v1.1.5 // indirect
|
github.com/jinzhu/now v1.1.5 // indirect
|
||||||
|
@ -75,22 +77,22 @@ require (
|
||||||
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
|
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
|
||||||
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
|
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
|
||||||
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
|
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 // indirect
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 // indirect
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
|
||||||
go.opentelemetry.io/otel v1.34.0 // indirect
|
go.opentelemetry.io/otel v1.34.0 // indirect
|
||||||
go.opentelemetry.io/otel/metric v1.34.0 // indirect
|
go.opentelemetry.io/otel/metric v1.34.0 // indirect
|
||||||
go.opentelemetry.io/otel/trace v1.34.0 // indirect
|
go.opentelemetry.io/otel/trace v1.34.0 // indirect
|
||||||
golang.org/x/arch v0.8.0 // indirect
|
golang.org/x/arch v0.8.0 // indirect
|
||||||
golang.org/x/crypto v0.33.0 // indirect
|
golang.org/x/crypto v0.36.0 // indirect
|
||||||
golang.org/x/net v0.35.0 // indirect
|
golang.org/x/net v0.37.0 // indirect
|
||||||
golang.org/x/oauth2 v0.26.0 // indirect
|
golang.org/x/oauth2 v0.28.0 // indirect
|
||||||
golang.org/x/sys v0.30.0 // indirect
|
golang.org/x/sys v0.31.0 // indirect
|
||||||
golang.org/x/text v0.22.0 // indirect
|
golang.org/x/text v0.23.0 // indirect
|
||||||
golang.org/x/time v0.10.0 // indirect
|
golang.org/x/time v0.11.0 // indirect
|
||||||
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 // indirect
|
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 // indirect
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 // indirect
|
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 // indirect
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6 // indirect
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect
|
||||||
google.golang.org/grpc v1.70.0 // indirect
|
google.golang.org/grpc v1.71.0 // indirect
|
||||||
google.golang.org/protobuf v1.36.5 // indirect
|
google.golang.org/protobuf v1.36.5 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
147
go.sum
147
go.sum
|
@ -1,23 +1,13 @@
|
||||||
cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE=
|
|
||||||
cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U=
|
|
||||||
cloud.google.com/go v0.118.1 h1:b8RATMcrK9A4BH0rj8yQupPXp+aP+cJ0l6H7V9osV1E=
|
cloud.google.com/go v0.118.1 h1:b8RATMcrK9A4BH0rj8yQupPXp+aP+cJ0l6H7V9osV1E=
|
||||||
cloud.google.com/go v0.118.1/go.mod h1:CFO4UPEPi8oV21xoezZCrd3d81K4fFkDTEJu4R8K+9M=
|
cloud.google.com/go v0.118.1/go.mod h1:CFO4UPEPi8oV21xoezZCrd3d81K4fFkDTEJu4R8K+9M=
|
||||||
cloud.google.com/go/auth v0.13.0 h1:8Fu8TZy167JkW8Tj3q7dIkr2v4cndv41ouecJx0PAHs=
|
cloud.google.com/go/auth v0.15.0 h1:Ly0u4aA5vG/fsSsxu98qCQBemXtAtJf+95z9HK+cxps=
|
||||||
cloud.google.com/go/auth v0.13.0/go.mod h1:COOjD9gwfKNKz+IIduatIhYJQIc0mG3H102r/EMxX6Q=
|
cloud.google.com/go/auth v0.15.0/go.mod h1:WJDGqZ1o9E9wKIL+IwStfyn/+s59zl4Bi+1KQNVXLZ8=
|
||||||
cloud.google.com/go/auth v0.14.1 h1:AwoJbzUdxA/whv1qj3TLKwh3XX5sikny2fc40wUl+h0=
|
|
||||||
cloud.google.com/go/auth v0.14.1/go.mod h1:4JHUxlGXisL0AW8kXPtUF6ztuOksyfUQNFjfsOCXkPM=
|
|
||||||
cloud.google.com/go/auth/oauth2adapt v0.2.6 h1:V6a6XDu2lTwPZWOawrAa9HUK+DB2zfJyTuciBG5hFkU=
|
|
||||||
cloud.google.com/go/auth/oauth2adapt v0.2.6/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8=
|
|
||||||
cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M=
|
cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M=
|
||||||
cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc=
|
cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc=
|
||||||
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
|
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
|
||||||
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
|
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
|
||||||
cloud.google.com/go/documentai v1.35.1 h1:52RfiUsoblXcE57CfKJGnITWLxRM30BcqNk/BKZl2LI=
|
|
||||||
cloud.google.com/go/documentai v1.35.1/go.mod h1:WJjwUAQfwQPJORW8fjz7RODprMULDzEGLA2E6WxenFw=
|
|
||||||
cloud.google.com/go/documentai v1.35.2 h1:hswVobCWUTXtmn+4QqUIVkai7sDOe0QS2KB3IpqLkik=
|
cloud.google.com/go/documentai v1.35.2 h1:hswVobCWUTXtmn+4QqUIVkai7sDOe0QS2KB3IpqLkik=
|
||||||
cloud.google.com/go/documentai v1.35.2/go.mod h1:oh/0YXosgEq3hVhyH4ZQ7VNXPaveRO4eLVM3tBSZOsI=
|
cloud.google.com/go/documentai v1.35.2/go.mod h1:oh/0YXosgEq3hVhyH4ZQ7VNXPaveRO4eLVM3tBSZOsI=
|
||||||
cloud.google.com/go/longrunning v0.6.2 h1:xjDfh1pQcWPEvnfjZmwjKQEcHnpz6lHjfy7Fo0MK+hc=
|
|
||||||
cloud.google.com/go/longrunning v0.6.2/go.mod h1:k/vIs83RN4bE3YCswdXC5PFfWVILjm3hpEUlSko4PiI=
|
|
||||||
cloud.google.com/go/longrunning v0.6.4 h1:3tyw9rO3E2XVXzSApn1gyEEnH2K9SynNQjMlBi3uHLg=
|
cloud.google.com/go/longrunning v0.6.4 h1:3tyw9rO3E2XVXzSApn1gyEEnH2K9SynNQjMlBi3uHLg=
|
||||||
cloud.google.com/go/longrunning v0.6.4/go.mod h1:ttZpLCe6e7EXvn9OxpBRx7kZEB0efv8yBO6YnVMfhJs=
|
cloud.google.com/go/longrunning v0.6.4/go.mod h1:ttZpLCe6e7EXvn9OxpBRx7kZEB0efv8yBO6YnVMfhJs=
|
||||||
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
|
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
|
||||||
|
@ -49,8 +39,6 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2
|
||||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||||
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
||||||
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
|
||||||
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
|
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
|
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
|
||||||
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
|
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
|
||||||
|
@ -74,21 +62,27 @@ github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBEx
|
||||||
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
||||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||||
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM=
|
|
||||||
github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=
|
|
||||||
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
|
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
|
||||||
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
|
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
|
||||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
|
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
|
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
|
||||||
github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o=
|
github.com/googleapis/enterprise-certificate-proxy v0.3.5 h1:VgzTY2jogw3xt39CusEnFJWm7rlsq5yL5q9XdLOuP5g=
|
||||||
github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk=
|
github.com/googleapis/enterprise-certificate-proxy v0.3.5/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA=
|
||||||
github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q=
|
github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q=
|
||||||
github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA=
|
github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA=
|
||||||
|
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
|
||||||
|
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
|
||||||
|
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
|
||||||
|
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
|
||||||
|
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
|
||||||
|
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
|
||||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
||||||
|
@ -131,8 +125,8 @@ github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAc
|
||||||
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
|
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
|
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
|
||||||
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
|
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
||||||
|
@ -154,8 +148,8 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl
|
||||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/tmc/langchaingo v0.1.13-pre.1 h1:r+ma9kl0NuFJGtIrnMPFjEn4RhXktwSI31fIpgiiMm4=
|
github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
|
||||||
github.com/tmc/langchaingo v0.1.13-pre.1/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
|
github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||||
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
||||||
|
@ -174,105 +168,84 @@ gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJW
|
||||||
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
|
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
|
||||||
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
||||||
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc=
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 h1:rgMkmiGfix9vFJDcDi1PK8WEQP4FLQwLDfhp5ZLpFeE=
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI=
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0/go.mod h1:ijPqXp5P6IRRByFVVg9DY8P5HkxkHE5ARIa+86aXPf4=
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 h1:PS8wXpbyaDJQ2VDHHncMe9Vct0Zn1fEjpsjrLxGJoSc=
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0/go.mod h1:HDBUsEjOuRC0EzKZ1bSaRGZWUBAzo+MhAcUUORSr4D0=
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk=
|
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8=
|
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU=
|
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q=
|
|
||||||
go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
|
|
||||||
go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
|
|
||||||
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
|
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
|
||||||
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
|
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
|
||||||
go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
|
|
||||||
go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
|
|
||||||
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
|
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
|
||||||
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
|
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
|
||||||
go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4=
|
go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
|
||||||
go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
|
go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU=
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ=
|
||||||
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
|
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
|
||||||
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
|
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
|
||||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||||
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
||||||
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||||
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
|
|
||||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
|
||||||
golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc=
|
|
||||||
golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
|
|
||||||
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
|
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
|
||||||
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
|
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
|
||||||
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
|
golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs=
|
||||||
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ=
|
||||||
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
|
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
|
||||||
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
|
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
|
||||||
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
|
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
|
||||||
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
|
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
|
||||||
golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE=
|
golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
|
||||||
golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
|
||||||
golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70=
|
|
||||||
golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
|
||||||
golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE=
|
golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE=
|
||||||
golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
||||||
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
|
golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
|
||||||
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
|
||||||
|
golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc=
|
||||||
|
golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
|
||||||
|
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
|
||||||
|
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
|
|
||||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
|
||||||
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
|
|
||||||
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
|
||||||
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
||||||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
|
||||||
|
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||||
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
|
|
||||||
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
|
||||||
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
||||||
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
||||||
golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg=
|
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
|
||||||
golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
|
||||||
golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
|
|
||||||
golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
|
||||||
golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
|
golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
|
||||||
golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
||||||
|
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
|
||||||
|
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
google.golang.org/api v0.214.0 h1:h2Gkq07OYi6kusGOaT/9rnNljuXmqPnaig7WGPmKbwA=
|
google.golang.org/api v0.223.0 h1:JUTaWEriXmEy5AhvdMgksGGPEFsYfUKaPEYXd4c3Wvc=
|
||||||
google.golang.org/api v0.214.0/go.mod h1:bYPpLG8AyeMWwDU6NXoB00xC0DFkikVvd5MfwoxjLqE=
|
google.golang.org/api v0.223.0/go.mod h1:C+RS7Z+dDwds2b+zoAk5hN/eSfsiCn0UDrYof/M4d2M=
|
||||||
google.golang.org/api v0.220.0 h1:3oMI4gdBgB72WFVwE1nerDD8W3HUOS4kypK6rRLbGns=
|
google.golang.org/api v0.224.0 h1:Ir4UPtDsNiwIOHdExr3fAj4xZ42QjK7uQte3lORLJwU=
|
||||||
google.golang.org/api v0.220.0/go.mod h1:26ZAlY6aN/8WgpCzjPNy18QpYaz7Zgg1h0qe1GkZEmY=
|
google.golang.org/api v0.224.0/go.mod h1:3V39my2xAGkodXy0vEqcEtkqgw2GtrFL5WuBZlCTCOQ=
|
||||||
google.golang.org/api v0.221.0 h1:qzaJfLhDsbMeFee8zBRdt/Nc+xmOuafD/dbdgGfutOU=
|
google.golang.org/api v0.225.0 h1:+4/IVqBQm0MV5S+JW3kdEGC1WtOmM2mXN1LKH1LdNlw=
|
||||||
google.golang.org/api v0.221.0/go.mod h1:7sOU2+TL4TxUTdbi0gWgAIg7tH5qBXxoyhtL+9x3biQ=
|
google.golang.org/api v0.225.0/go.mod h1:WP/0Xm4LVvMOCldfvOISnWquSRWbG2kArDZcg+W2DbY=
|
||||||
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 h1:ToEetK57OidYuqD4Q5w+vfEnPvPpuTwedCNVohYJfNk=
|
|
||||||
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697/go.mod h1:JJrvXBWRZaFMxBufik1a4RpFw4HhgVtBBWQeQgUj2cc=
|
|
||||||
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 h1:Pw6WnI9W/LIdRxqK7T6XGugGbHIRl5Q7q3BssH6xk4s=
|
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 h1:Pw6WnI9W/LIdRxqK7T6XGugGbHIRl5Q7q3BssH6xk4s=
|
||||||
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4/go.mod h1:qbZzneIOXSq+KFAFut9krLfRLZiFLzZL5u2t8SV83EE=
|
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4/go.mod h1:qbZzneIOXSq+KFAFut9krLfRLZiFLzZL5u2t8SV83EE=
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697 h1:pgr/4QbFyktUv9CtQ/Fq4gzEE6/Xs7iCXbktaGzLHbQ=
|
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697/go.mod h1:+D9ySVjN8nY8YCVjc5O7PZDIdZporIDY3KaGfJunh88=
|
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 h1:CkkIfIt50+lT6NHAVoRYEyAvQGFM7xEwXUUywFvEb3Q=
|
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08=
|
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 h1:L9JNMl/plZH9wmzQUHleO/ZZDSN+9Gh41wPczNy+5Fk=
|
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 h1:L9JNMl/plZH9wmzQUHleO/ZZDSN+9Gh41wPczNy+5Fk=
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6/go.mod h1:iYONQfRdizDB8JJBybql13nArx91jcUk7zCXEsOofM4=
|
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6/go.mod h1:iYONQfRdizDB8JJBybql13nArx91jcUk7zCXEsOofM4=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 h1:8ZmaLZE4XWrtU3MyClkYqqtl6Oegr3235h7jxsDyqCY=
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2 h1:DMTIbak9GhdaSxEjvVzAeNZvyc03I61duqNbnm3SU0M=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU=
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287 h1:J1H9f+LEdWAfHcez/4cvaVBox7cOYT+IU6rgqj5x++8=
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e h1:YA5lmSs3zc/5w+xsRcHqpETkaYyK63ivEPzNTcUUlSA=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287/go.mod h1:8BS3B93F/U1juMFq9+EDk+qOT5CO1R9IzXxG3PTqiRk=
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6 h1:2duwAxN2+k0xLNpjnHTXoMUgnv6VPSp5fiqTuwSxjmI=
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb h1:TLPQVbx1GJ8VKZxz52VAxl1EBgKXXbTiU9Fc5fZeLn4=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6/go.mod h1:8BS3B93F/U1juMFq9+EDk+qOT5CO1R9IzXxG3PTqiRk=
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
|
||||||
google.golang.org/grpc v1.67.3 h1:OgPcDAFKHnH8X3O4WcO4XUc8GRDeKsKReqbQtiCj7N8=
|
|
||||||
google.golang.org/grpc v1.67.3/go.mod h1:YGaHCc6Oap+FzBJTZLBzkGSYt/cvGPFTPxkn7QfSU8s=
|
|
||||||
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
|
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
|
||||||
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
|
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
|
||||||
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
|
google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg=
|
||||||
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
|
||||||
google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM=
|
|
||||||
google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
|
||||||
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
|
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
|
||||||
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
|
27
main.go
27
main.go
|
@ -36,6 +36,10 @@ var (
|
||||||
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
|
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
|
||||||
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
|
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
|
||||||
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
|
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
|
||||||
|
azureDocAIEndpoint = os.Getenv("AZURE_DOCAI_ENDPOINT")
|
||||||
|
azureDocAIKey = os.Getenv("AZURE_DOCAI_KEY")
|
||||||
|
azureDocAIModelID = os.Getenv("AZURE_DOCAI_MODEL_ID")
|
||||||
|
azureDocAITimeout = os.Getenv("AZURE_DOCAI_TIMEOUT_SECONDS")
|
||||||
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
|
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
|
||||||
manualTag = os.Getenv("MANUAL_TAG")
|
manualTag = os.Getenv("MANUAL_TAG")
|
||||||
autoTag = os.Getenv("AUTO_TAG")
|
autoTag = os.Getenv("AUTO_TAG")
|
||||||
|
@ -167,6 +171,18 @@ func main() {
|
||||||
GoogleProcessorID: os.Getenv("GOOGLE_PROCESSOR_ID"),
|
GoogleProcessorID: os.Getenv("GOOGLE_PROCESSOR_ID"),
|
||||||
VisionLLMProvider: visionLlmProvider,
|
VisionLLMProvider: visionLlmProvider,
|
||||||
VisionLLMModel: visionLlmModel,
|
VisionLLMModel: visionLlmModel,
|
||||||
|
AzureEndpoint: azureDocAIEndpoint,
|
||||||
|
AzureAPIKey: azureDocAIKey,
|
||||||
|
AzureModelID: azureDocAIModelID,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse Azure timeout if set
|
||||||
|
if azureDocAITimeout != "" {
|
||||||
|
if timeout, err := strconv.Atoi(azureDocAITimeout); err == nil {
|
||||||
|
ocrConfig.AzureTimeout = timeout
|
||||||
|
} else {
|
||||||
|
log.Warnf("Invalid AZURE_DOCAI_TIMEOUT_SECONDS value: %v, using default", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If provider is LLM, but no VISION_LLM_PROVIDER is set, don't initialize OCR provider
|
// If provider is LLM, but no VISION_LLM_PROVIDER is set, don't initialize OCR provider
|
||||||
|
@ -422,6 +438,17 @@ func validateOrDefaultEnvVars() {
|
||||||
log.Fatal("Please set the LLM_PROVIDER environment variable to 'openai' or 'ollama'.")
|
log.Fatal("Please set the LLM_PROVIDER environment variable to 'openai' or 'ollama'.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate OCR provider if set
|
||||||
|
ocrProvider := os.Getenv("OCR_PROVIDER")
|
||||||
|
if ocrProvider == "azure" {
|
||||||
|
if azureDocAIEndpoint == "" {
|
||||||
|
log.Fatal("Please set the AZURE_DOCAI_ENDPOINT environment variable for Azure provider")
|
||||||
|
}
|
||||||
|
if azureDocAIKey == "" {
|
||||||
|
log.Fatal("Please set the AZURE_DOCAI_KEY environment variable for Azure provider")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if llmModel == "" {
|
if llmModel == "" {
|
||||||
log.Fatal("Please set the LLM_MODEL environment variable.")
|
log.Fatal("Please set the LLM_MODEL environment variable.")
|
||||||
}
|
}
|
||||||
|
|
13
ocr.go
13
ocr.go
|
@ -36,13 +36,20 @@ func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string,
|
||||||
return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
|
return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ocrText, err := app.ocrProvider.ProcessImage(ctx, imageContent)
|
result, err := app.ocrProvider.ProcessImage(ctx, imageContent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
|
return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
|
||||||
}
|
}
|
||||||
pageLogger.Debug("OCR completed for page")
|
if result == nil {
|
||||||
|
pageLogger.Error("Got nil result from OCR provider")
|
||||||
|
return "", fmt.Errorf("error performing OCR for document %d, page %d: nil result", documentID, i+1)
|
||||||
|
}
|
||||||
|
|
||||||
ocrTexts = append(ocrTexts, ocrText)
|
pageLogger.WithField("has_hocr", result.HOCR != "").
|
||||||
|
WithField("metadata", result.Metadata).
|
||||||
|
Debug("OCR completed for page")
|
||||||
|
|
||||||
|
ocrTexts = append(ocrTexts, result.Text)
|
||||||
}
|
}
|
||||||
|
|
||||||
docLogger.Info("OCR processing completed successfully")
|
docLogger.Info("OCR processing completed successfully")
|
||||||
|
|
224
ocr/azure_provider.go
Normal file
224
ocr/azure_provider.go
Normal file
|
@ -0,0 +1,224 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/gabriel-vasile/mimetype"
|
||||||
|
"github.com/hashicorp/go-retryablehttp"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
apiVersion = "2024-11-30"
|
||||||
|
defaultModelID = "prebuilt-read"
|
||||||
|
defaultTimeout = 120
|
||||||
|
pollingInterval = 2 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
// AzureProvider implements OCR using Azure Document Intelligence
|
||||||
|
type AzureProvider struct {
|
||||||
|
endpoint string
|
||||||
|
apiKey string
|
||||||
|
modelID string
|
||||||
|
timeout time.Duration
|
||||||
|
httpClient *retryablehttp.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Request body for Azure Document Intelligence
|
||||||
|
type analyzeRequest struct {
|
||||||
|
Base64Source string `json:"base64Source"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func newAzureProvider(config Config) (*AzureProvider, error) {
|
||||||
|
logger := log.WithFields(logrus.Fields{
|
||||||
|
"endpoint": config.AzureEndpoint,
|
||||||
|
"model_id": config.AzureModelID,
|
||||||
|
})
|
||||||
|
logger.Info("Creating new Azure Document Intelligence provider")
|
||||||
|
|
||||||
|
// Validate required configuration
|
||||||
|
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
|
||||||
|
logger.Error("Missing required configuration")
|
||||||
|
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set defaults and create provider
|
||||||
|
modelID := defaultModelID
|
||||||
|
if config.AzureModelID != "" {
|
||||||
|
modelID = config.AzureModelID
|
||||||
|
}
|
||||||
|
|
||||||
|
timeout := defaultTimeout
|
||||||
|
if config.AzureTimeout > 0 {
|
||||||
|
timeout = config.AzureTimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure retryablehttp client
|
||||||
|
client := retryablehttp.NewClient()
|
||||||
|
client.RetryMax = 3
|
||||||
|
client.RetryWaitMin = 1 * time.Second
|
||||||
|
client.RetryWaitMax = 5 * time.Second
|
||||||
|
client.Logger = logger
|
||||||
|
|
||||||
|
provider := &AzureProvider{
|
||||||
|
endpoint: config.AzureEndpoint,
|
||||||
|
apiKey: config.AzureAPIKey,
|
||||||
|
modelID: modelID,
|
||||||
|
timeout: time.Duration(timeout) * time.Second,
|
||||||
|
httpClient: client,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info("Successfully initialized Azure Document Intelligence provider")
|
||||||
|
return provider, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *AzureProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
|
||||||
|
logger := log.WithFields(logrus.Fields{
|
||||||
|
"model_id": p.modelID,
|
||||||
|
})
|
||||||
|
logger.Debug("Starting Azure Document Intelligence processing")
|
||||||
|
|
||||||
|
// Detect MIME type
|
||||||
|
mtype := mimetype.Detect(imageContent)
|
||||||
|
logger.WithField("mime_type", mtype.String()).Debug("Detected file type")
|
||||||
|
|
||||||
|
if !isImageMIMEType(mtype.String()) {
|
||||||
|
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
|
||||||
|
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create context with timeout
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, p.timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Submit document for analysis
|
||||||
|
operationLocation, err := p.submitDocument(ctx, imageContent)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error submitting document: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Poll for results
|
||||||
|
result, err := p.pollForResults(ctx, operationLocation)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error polling for results: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to OCR result
|
||||||
|
ocrResult := &OCRResult{
|
||||||
|
Text: result.AnalyzeResult.Content,
|
||||||
|
Metadata: map[string]string{
|
||||||
|
"provider": "azure_docai",
|
||||||
|
"page_count": fmt.Sprintf("%d", len(result.AnalyzeResult.Pages)),
|
||||||
|
"api_version": result.AnalyzeResult.APIVersion,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.WithFields(logrus.Fields{
|
||||||
|
"content_length": len(ocrResult.Text),
|
||||||
|
"page_count": len(result.AnalyzeResult.Pages),
|
||||||
|
}).Info("Successfully processed document")
|
||||||
|
return ocrResult, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *AzureProvider) submitDocument(ctx context.Context, imageContent []byte) (string, error) {
|
||||||
|
requestURL := fmt.Sprintf("%s/documentintelligence/documentModels/%s:analyze?api-version=%s",
|
||||||
|
p.endpoint, p.modelID, apiVersion)
|
||||||
|
|
||||||
|
// Prepare request body
|
||||||
|
requestBody := analyzeRequest{
|
||||||
|
Base64Source: base64.StdEncoding.EncodeToString(imageContent),
|
||||||
|
}
|
||||||
|
requestBodyBytes, err := json.Marshal(requestBody)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("error marshaling request body: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := retryablehttp.NewRequestWithContext(ctx, "POST", requestURL, bytes.NewBuffer(requestBodyBytes))
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("error creating HTTP request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
|
||||||
|
|
||||||
|
resp, err := p.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("error sending HTTP request: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusAccepted {
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return "", fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
operationLocation := resp.Header.Get("Operation-Location")
|
||||||
|
if operationLocation == "" {
|
||||||
|
return "", fmt.Errorf("no Operation-Location header in response")
|
||||||
|
}
|
||||||
|
|
||||||
|
return operationLocation, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *AzureProvider) pollForResults(ctx context.Context, operationLocation string) (*AzureDocumentResult, error) {
|
||||||
|
logger := log.WithField("operation_location", operationLocation)
|
||||||
|
logger.Debug("Starting to poll for results")
|
||||||
|
|
||||||
|
ticker := time.NewTicker(pollingInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, fmt.Errorf("operation timed out after %v: %w", p.timeout, ctx.Err())
|
||||||
|
case <-ticker.C:
|
||||||
|
req, err := retryablehttp.NewRequestWithContext(ctx, "GET", operationLocation, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error creating poll request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
|
||||||
|
|
||||||
|
resp, err := p.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error polling for results: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var result AzureDocumentResult
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||||
|
resp.Body.Close()
|
||||||
|
logger.WithError(err).Error("Failed to decode response")
|
||||||
|
return nil, fmt.Errorf("error decoding response: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
logger.WithFields(logrus.Fields{
|
||||||
|
"status_code": resp.StatusCode,
|
||||||
|
"content_length": len(result.AnalyzeResult.Content),
|
||||||
|
"page_count": len(result.AnalyzeResult.Pages),
|
||||||
|
"status": result.Status,
|
||||||
|
}).Debug("Poll response received")
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("unexpected status code %d while polling", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch result.Status {
|
||||||
|
case "succeeded":
|
||||||
|
return &result, nil
|
||||||
|
case "failed":
|
||||||
|
return nil, fmt.Errorf("document processing failed")
|
||||||
|
case "running":
|
||||||
|
// Continue polling
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unexpected status: %s", result.Status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
222
ocr/azure_provider_test.go
Normal file
222
ocr/azure_provider_test.go
Normal file
|
@ -0,0 +1,222 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/go-retryablehttp"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewAzureProvider(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
config Config
|
||||||
|
wantErr bool
|
||||||
|
errContains string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid config",
|
||||||
|
config: Config{
|
||||||
|
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
|
||||||
|
AzureAPIKey: "test-key",
|
||||||
|
},
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "valid config with custom model and timeout",
|
||||||
|
config: Config{
|
||||||
|
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
|
||||||
|
AzureAPIKey: "test-key",
|
||||||
|
AzureModelID: "custom-model",
|
||||||
|
AzureTimeout: 60,
|
||||||
|
},
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing endpoint",
|
||||||
|
config: Config{
|
||||||
|
AzureAPIKey: "test-key",
|
||||||
|
},
|
||||||
|
wantErr: true,
|
||||||
|
errContains: "missing required Azure Document Intelligence configuration",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing api key",
|
||||||
|
config: Config{
|
||||||
|
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
|
||||||
|
},
|
||||||
|
wantErr: true,
|
||||||
|
errContains: "missing required Azure Document Intelligence configuration",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
provider, err := newAzureProvider(tt.config)
|
||||||
|
if tt.wantErr {
|
||||||
|
assert.Error(t, err)
|
||||||
|
if tt.errContains != "" {
|
||||||
|
assert.Contains(t, err.Error(), tt.errContains)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, provider)
|
||||||
|
|
||||||
|
// Verify default values
|
||||||
|
if tt.config.AzureModelID == "" {
|
||||||
|
assert.Equal(t, defaultModelID, provider.modelID)
|
||||||
|
} else {
|
||||||
|
assert.Equal(t, tt.config.AzureModelID, provider.modelID)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.config.AzureTimeout == 0 {
|
||||||
|
assert.Equal(t, time.Duration(defaultTimeout)*time.Second, provider.timeout)
|
||||||
|
} else {
|
||||||
|
assert.Equal(t, time.Duration(tt.config.AzureTimeout)*time.Second, provider.timeout)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAzureProvider_ProcessImage(t *testing.T) {
|
||||||
|
// Sample success response
|
||||||
|
now := time.Now()
|
||||||
|
successResult := AzureDocumentResult{
|
||||||
|
Status: "succeeded",
|
||||||
|
CreatedDateTime: now,
|
||||||
|
LastUpdatedDateTime: now,
|
||||||
|
AnalyzeResult: AzureAnalyzeResult{
|
||||||
|
APIVersion: apiVersion,
|
||||||
|
ModelID: defaultModelID,
|
||||||
|
StringIndexType: "utf-16",
|
||||||
|
Content: "Test document content",
|
||||||
|
Pages: []AzurePage{
|
||||||
|
{
|
||||||
|
PageNumber: 1,
|
||||||
|
Angle: 0.0,
|
||||||
|
Width: 800,
|
||||||
|
Height: 600,
|
||||||
|
Unit: "pixel",
|
||||||
|
Lines: []AzureLine{
|
||||||
|
{
|
||||||
|
Content: "Test line",
|
||||||
|
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
|
||||||
|
Spans: []AzureSpan{{Offset: 0, Length: 9}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spans: []AzureSpan{{Offset: 0, Length: 9}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Paragraphs: []AzureParagraph{
|
||||||
|
{
|
||||||
|
Content: "Test document content",
|
||||||
|
Spans: []AzureSpan{{Offset: 0, Length: 19}},
|
||||||
|
BoundingRegions: []AzureBoundingBox{
|
||||||
|
{
|
||||||
|
PageNumber: 1,
|
||||||
|
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ContentFormat: "text",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setupServer func() *httptest.Server
|
||||||
|
imageContent []byte
|
||||||
|
wantErr bool
|
||||||
|
errContains string
|
||||||
|
expectedText string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "successful processing",
|
||||||
|
setupServer: func() *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
server := httptest.NewServer(mux)
|
||||||
|
|
||||||
|
mux.HandleFunc("/documentintelligence/documentModels/prebuilt-read:analyze", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Operation-Location", fmt.Sprintf("%s/operations/123", server.URL))
|
||||||
|
w.WriteHeader(http.StatusAccepted)
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("/operations/123", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
json.NewEncoder(w).Encode(successResult)
|
||||||
|
})
|
||||||
|
|
||||||
|
return server
|
||||||
|
},
|
||||||
|
// Create minimal JPEG content with magic numbers
|
||||||
|
imageContent: append([]byte{0xFF, 0xD8, 0xFF, 0xE0}, []byte("JFIF test content")...),
|
||||||
|
expectedText: "Test document content",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid mime type",
|
||||||
|
setupServer: func() *httptest.Server {
|
||||||
|
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
t.Log("Server should not be called with invalid mime type")
|
||||||
|
w.WriteHeader(http.StatusBadRequest)
|
||||||
|
}))
|
||||||
|
},
|
||||||
|
imageContent: []byte("invalid content"),
|
||||||
|
wantErr: true,
|
||||||
|
errContains: "unsupported file type",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "submission error",
|
||||||
|
setupServer: func() *httptest.Server {
|
||||||
|
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusBadRequest)
|
||||||
|
fmt.Fprintln(w, "Invalid request")
|
||||||
|
}))
|
||||||
|
},
|
||||||
|
imageContent: []byte{0xFF, 0xD8, 0xFF, 0xE0}, // JPEG magic numbers
|
||||||
|
wantErr: true,
|
||||||
|
errContains: "unexpected status code 400",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
server := tt.setupServer()
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
client := retryablehttp.NewClient()
|
||||||
|
client.HTTPClient = server.Client()
|
||||||
|
client.Logger = log
|
||||||
|
|
||||||
|
provider := &AzureProvider{
|
||||||
|
endpoint: server.URL,
|
||||||
|
apiKey: "test-key",
|
||||||
|
modelID: defaultModelID,
|
||||||
|
timeout: 5 * time.Second,
|
||||||
|
httpClient: client,
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := provider.ProcessImage(context.Background(), tt.imageContent)
|
||||||
|
if tt.wantErr {
|
||||||
|
assert.Error(t, err)
|
||||||
|
if tt.errContains != "" {
|
||||||
|
assert.Contains(t, err.Error(), tt.errContains)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, result)
|
||||||
|
assert.Equal(t, tt.expectedText, result.Text)
|
||||||
|
assert.Equal(t, "azure_docai", result.Metadata["provider"])
|
||||||
|
assert.Equal(t, apiVersion, result.Metadata["api_version"])
|
||||||
|
assert.Equal(t, "1", result.Metadata["page_count"])
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
72
ocr/azure_types.go
Normal file
72
ocr/azure_types.go
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
// AzureDocumentResult represents the root response from Azure Document Intelligence
|
||||||
|
type AzureDocumentResult struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
CreatedDateTime time.Time `json:"createdDateTime"`
|
||||||
|
LastUpdatedDateTime time.Time `json:"lastUpdatedDateTime"`
|
||||||
|
AnalyzeResult AzureAnalyzeResult `json:"analyzeResult"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureAnalyzeResult represents the analyze result part of the Azure Document Intelligence response
|
||||||
|
type AzureAnalyzeResult struct {
|
||||||
|
APIVersion string `json:"apiVersion"`
|
||||||
|
ModelID string `json:"modelId"`
|
||||||
|
StringIndexType string `json:"stringIndexType"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
Pages []AzurePage `json:"pages"`
|
||||||
|
Paragraphs []AzureParagraph `json:"paragraphs"`
|
||||||
|
Styles []interface{} `json:"styles"`
|
||||||
|
ContentFormat string `json:"contentFormat"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzurePage represents a single page in the document
|
||||||
|
type AzurePage struct {
|
||||||
|
PageNumber int `json:"pageNumber"`
|
||||||
|
Angle float64 `json:"angle"`
|
||||||
|
Width int `json:"width"`
|
||||||
|
Height int `json:"height"`
|
||||||
|
Unit string `json:"unit"`
|
||||||
|
Words []AzureWord `json:"words"`
|
||||||
|
Lines []AzureLine `json:"lines"`
|
||||||
|
Spans []AzureSpan `json:"spans"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureWord represents a single word with its properties
|
||||||
|
type AzureWord struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
Polygon []int `json:"polygon"`
|
||||||
|
Confidence float64 `json:"confidence"`
|
||||||
|
Span AzureSpan `json:"span"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureLine represents a line of text
|
||||||
|
type AzureLine struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
Polygon []int `json:"polygon"`
|
||||||
|
Spans []AzureSpan `json:"spans"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureSpan represents a span of text with offset and length
|
||||||
|
type AzureSpan struct {
|
||||||
|
Offset int `json:"offset"`
|
||||||
|
Length int `json:"length"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureParagraph represents a paragraph of text
|
||||||
|
type AzureParagraph struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
Spans []AzureSpan `json:"spans"`
|
||||||
|
BoundingRegions []AzureBoundingBox `json:"boundingRegions"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureBoundingBox represents the location of content on a page
|
||||||
|
type AzureBoundingBox struct {
|
||||||
|
PageNumber int `json:"pageNumber"`
|
||||||
|
Polygon []int `json:"polygon"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AzureStyle represents style information for text segments - changed to interface{} as per input
|
||||||
|
type AzureStyle interface{}
|
|
@ -3,6 +3,8 @@ package ocr
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"html"
|
||||||
|
"strings"
|
||||||
|
|
||||||
documentai "cloud.google.com/go/documentai/apiv1"
|
documentai "cloud.google.com/go/documentai/apiv1"
|
||||||
"cloud.google.com/go/documentai/apiv1/documentaipb"
|
"cloud.google.com/go/documentai/apiv1/documentaipb"
|
||||||
|
@ -46,7 +48,7 @@ func newGoogleDocAIProvider(config Config) (*GoogleDocAIProvider, error) {
|
||||||
return provider, nil
|
return provider, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (string, error) {
|
func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
|
||||||
logger := log.WithFields(logrus.Fields{
|
logger := log.WithFields(logrus.Fields{
|
||||||
"project_id": p.projectID,
|
"project_id": p.projectID,
|
||||||
"location": p.location,
|
"location": p.location,
|
||||||
|
@ -60,7 +62,7 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
|
||||||
|
|
||||||
if !isImageMIMEType(mtype.String()) {
|
if !isImageMIMEType(mtype.String()) {
|
||||||
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
|
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
|
||||||
return "", fmt.Errorf("unsupported file type: %s", mtype.String())
|
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
name := fmt.Sprintf("projects/%s/locations/%s/processors/%s", p.projectID, p.location, p.processorID)
|
name := fmt.Sprintf("projects/%s/locations/%s/processors/%s", p.projectID, p.location, p.processorID)
|
||||||
|
@ -79,21 +81,56 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
|
||||||
resp, err := p.client.ProcessDocument(ctx, req)
|
resp, err := p.client.ProcessDocument(ctx, req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.WithError(err).Error("Failed to process document")
|
logger.WithError(err).Error("Failed to process document")
|
||||||
return "", fmt.Errorf("error processing document: %w", err)
|
return nil, fmt.Errorf("error processing document: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp == nil || resp.Document == nil {
|
if resp == nil || resp.Document == nil {
|
||||||
logger.Error("Received nil response or document from Document AI")
|
logger.Error("Received nil response or document from Document AI")
|
||||||
return "", fmt.Errorf("received nil response or document from Document AI")
|
return nil, fmt.Errorf("received nil response or document from Document AI")
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.Document.Error != nil {
|
if resp.Document.Error != nil {
|
||||||
logger.WithField("error", resp.Document.Error.Message).Error("Document processing error")
|
logger.WithField("error", resp.Document.Error.Message).Error("Document processing error")
|
||||||
return "", fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
|
return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.WithField("content_length", len(resp.Document.Text)).Info("Successfully processed document")
|
metadata := map[string]string{
|
||||||
return resp.Document.Text, nil
|
"provider": "google_docai",
|
||||||
|
"mime_type": mtype.String(),
|
||||||
|
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
|
||||||
|
"processor_id": p.processorID,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safely add language code if available
|
||||||
|
if pages := resp.Document.GetPages(); len(pages) > 0 {
|
||||||
|
if langs := pages[0].GetDetectedLanguages(); len(langs) > 0 {
|
||||||
|
metadata["lang_code"] = langs[0].GetLanguageCode()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &OCRResult{
|
||||||
|
Text: resp.Document.Text,
|
||||||
|
Metadata: metadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add hOCR output if available
|
||||||
|
if len(resp.Document.GetPages()) > 0 {
|
||||||
|
var hocr string
|
||||||
|
func() {
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
logger.WithField("error", r).Error("Panic during hOCR generation")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
hocr = generateHOCR(resp.Document)
|
||||||
|
}()
|
||||||
|
if hocr != "" {
|
||||||
|
result.HOCR = hocr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.WithField("content_length", len(result.Text)).Info("Successfully processed document")
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// isImageMIMEType checks if the given MIME type is a supported image type
|
// isImageMIMEType checks if the given MIME type is a supported image type
|
||||||
|
@ -109,6 +146,83 @@ func isImageMIMEType(mimeType string) bool {
|
||||||
return supportedTypes[mimeType]
|
return supportedTypes[mimeType]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// generateHOCR converts Document AI response to hOCR format
|
||||||
|
func generateHOCR(doc *documentaipb.Document) string {
|
||||||
|
if len(doc.GetPages()) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var hocr strings.Builder
|
||||||
|
hocr.WriteString(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||||
|
<head>
|
||||||
|
<title>OCR Output</title>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
||||||
|
<meta name='ocr-system' content='google-docai' />
|
||||||
|
</head>
|
||||||
|
<body>`)
|
||||||
|
|
||||||
|
for pageNum, page := range doc.GetPages() {
|
||||||
|
pageWidth := page.GetDimension().GetWidth()
|
||||||
|
pageHeight := page.GetDimension().GetHeight()
|
||||||
|
// Validate dimensions
|
||||||
|
if pageWidth <= 0 || pageHeight <= 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
hocr.WriteString(fmt.Sprintf(`
|
||||||
|
<div class='ocr_page' id='page_%d' title='image;bbox 0 0 %d %d'>`,
|
||||||
|
pageNum+1, int(pageWidth), int(pageHeight)))
|
||||||
|
|
||||||
|
// Process paragraphs
|
||||||
|
for _, para := range page.GetParagraphs() {
|
||||||
|
paraBox := para.GetLayout().GetBoundingPoly().GetNormalizedVertices()
|
||||||
|
if len(paraBox) < 4 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert normalized coordinates to absolute
|
||||||
|
// Use float64 for intermediate calculations to prevent overflow
|
||||||
|
x1 := int(float64(paraBox[0].GetX()) * float64(pageWidth))
|
||||||
|
y1 := int(float64(paraBox[0].GetY()) * float64(pageHeight))
|
||||||
|
x2 := int(float64(paraBox[2].GetX()) * float64(pageWidth))
|
||||||
|
y2 := int(float64(paraBox[2].GetY()) * float64(pageHeight))
|
||||||
|
|
||||||
|
// Validate coordinates
|
||||||
|
if x1 < 0 || y1 < 0 || x2 < 0 || y2 < 0 ||
|
||||||
|
x1 > int(pageWidth) || y1 > int(pageHeight) ||
|
||||||
|
x2 > int(pageWidth) || y2 > int(pageHeight) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
hocr.WriteString(fmt.Sprintf(`
|
||||||
|
<p class='ocr_par' id='par_%d_%d' title='bbox %d %d %d %d'>`,
|
||||||
|
pageNum+1, len(page.GetParagraphs()), x1, y1, x2, y2))
|
||||||
|
|
||||||
|
// Process words within paragraph
|
||||||
|
for _, token := range para.GetLayout().GetTextAnchor().GetTextSegments() {
|
||||||
|
text := doc.Text[token.GetStartIndex():token.GetEndIndex()]
|
||||||
|
if text == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Escape HTML special characters
|
||||||
|
text = html.EscapeString(text)
|
||||||
|
|
||||||
|
hocr.WriteString(fmt.Sprintf(`
|
||||||
|
<span class='ocrx_word'>%s</span>`, text))
|
||||||
|
}
|
||||||
|
|
||||||
|
hocr.WriteString("\n </p>")
|
||||||
|
}
|
||||||
|
hocr.WriteString("\n </div>")
|
||||||
|
}
|
||||||
|
|
||||||
|
hocr.WriteString("\n</body>\n</html>")
|
||||||
|
return hocr.String()
|
||||||
|
}
|
||||||
|
|
||||||
// Close releases resources used by the provider
|
// Close releases resources used by the provider
|
||||||
func (p *GoogleDocAIProvider) Close() error {
|
func (p *GoogleDocAIProvider) Close() error {
|
||||||
if p.client != nil {
|
if p.client != nil {
|
||||||
|
|
94
ocr/google_docai_provider_test.go
Normal file
94
ocr/google_docai_provider_test.go
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
package ocr
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"cloud.google.com/go/documentai/apiv1/documentaipb"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGenerateHOCR(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
doc *documentaipb.Document
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty document",
|
||||||
|
doc: &documentaipb.Document{},
|
||||||
|
expected: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "single page with one paragraph",
|
||||||
|
doc: &documentaipb.Document{
|
||||||
|
Text: "Hello World",
|
||||||
|
Pages: []*documentaipb.Document_Page{
|
||||||
|
{
|
||||||
|
Dimension: &documentaipb.Document_Page_Dimension{
|
||||||
|
Width: 800,
|
||||||
|
Height: 600,
|
||||||
|
},
|
||||||
|
Paragraphs: []*documentaipb.Document_Page_Paragraph{
|
||||||
|
{
|
||||||
|
Layout: &documentaipb.Document_Page_Layout{
|
||||||
|
BoundingPoly: &documentaipb.BoundingPoly{
|
||||||
|
NormalizedVertices: []*documentaipb.NormalizedVertex{
|
||||||
|
{X: 0.1, Y: 0.1},
|
||||||
|
{X: 0.9, Y: 0.1},
|
||||||
|
{X: 0.9, Y: 0.2},
|
||||||
|
{X: 0.1, Y: 0.2},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
TextAnchor: &documentaipb.Document_TextAnchor{
|
||||||
|
TextSegments: []*documentaipb.Document_TextAnchor_TextSegment{
|
||||||
|
{
|
||||||
|
StartIndex: 0,
|
||||||
|
EndIndex: 11,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: "(?s).*<div class='ocr_page' id='page_1' title='image;bbox 0 0 800 600'>.*" +
|
||||||
|
"<p class='ocr_par' id='par_1_1' title='bbox 80 60 719 120'>.*" +
|
||||||
|
"<span class='ocrx_word'>Hello World</span>.*</p>.*</div>.*",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := generateHOCR(tt.doc)
|
||||||
|
|
||||||
|
if tt.expected == "" {
|
||||||
|
if result != "" {
|
||||||
|
t.Errorf("expected empty string, got %v", result)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
matched, err := regexp.MatchString(tt.expected, result)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error matching regex: %v", err)
|
||||||
|
}
|
||||||
|
if !matched {
|
||||||
|
t.Errorf("expected to match regex %v\ngot: %v", tt.expected, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify basic hOCR structure
|
||||||
|
if !strings.Contains(result, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>") {
|
||||||
|
t.Error("missing XML declaration")
|
||||||
|
}
|
||||||
|
if !strings.Contains(result, "<html xmlns=\"http://www.w3.org/1999/xhtml\"") {
|
||||||
|
t.Error("missing HTML namespace")
|
||||||
|
}
|
||||||
|
if !strings.Contains(result, "<meta name='ocr-system' content='google-docai'") {
|
||||||
|
t.Error("missing OCR system metadata")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -60,7 +60,7 @@ func newLLMProvider(config Config) (*LLMProvider, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (string, error) {
|
func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
|
||||||
logger := log.WithFields(logrus.Fields{
|
logger := log.WithFields(logrus.Fields{
|
||||||
"provider": p.provider,
|
"provider": p.provider,
|
||||||
"model": p.model,
|
"model": p.model,
|
||||||
|
@ -71,7 +71,7 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (st
|
||||||
img, _, err := image.Decode(bytes.NewReader(imageContent))
|
img, _, err := image.Decode(bytes.NewReader(imageContent))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.WithError(err).Error("Failed to decode image")
|
logger.WithError(err).Error("Failed to decode image")
|
||||||
return "", fmt.Errorf("error decoding image: %w", err)
|
return nil, fmt.Errorf("error decoding image: %w", err)
|
||||||
}
|
}
|
||||||
bounds := img.Bounds()
|
bounds := img.Bounds()
|
||||||
logger.WithFields(logrus.Fields{
|
logger.WithFields(logrus.Fields{
|
||||||
|
@ -106,11 +106,18 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (st
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.WithError(err).Error("Failed to get response from vision model")
|
logger.WithError(err).Error("Failed to get response from vision model")
|
||||||
return "", fmt.Errorf("error getting response from LLM: %w", err)
|
return nil, fmt.Errorf("error getting response from LLM: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.WithField("content_length", len(completion.Choices[0].Content)).Info("Successfully processed image")
|
result := &OCRResult{
|
||||||
return completion.Choices[0].Content, nil
|
Text: completion.Choices[0].Content,
|
||||||
|
Metadata: map[string]string{
|
||||||
|
"provider": p.provider,
|
||||||
|
"model": p.model,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
logger.WithField("content_length", len(result.Text)).Info("Successfully processed image")
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// createOpenAIClient creates a new OpenAI vision model client
|
// createOpenAIClient creates a new OpenAI vision model client
|
||||||
|
|
|
@ -9,14 +9,26 @@ import (
|
||||||
|
|
||||||
var log = logrus.New()
|
var log = logrus.New()
|
||||||
|
|
||||||
|
// OCRResult holds the output from OCR processing
|
||||||
|
type OCRResult struct {
|
||||||
|
// Plain text output (required)
|
||||||
|
Text string
|
||||||
|
|
||||||
|
// hOCR output (optional, if provider supports it)
|
||||||
|
HOCR string
|
||||||
|
|
||||||
|
// Additional provider-specific metadata
|
||||||
|
Metadata map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
// Provider defines the interface for OCR processing
|
// Provider defines the interface for OCR processing
|
||||||
type Provider interface {
|
type Provider interface {
|
||||||
ProcessImage(ctx context.Context, imageContent []byte) (string, error)
|
ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Config holds the OCR provider configuration
|
// Config holds the OCR provider configuration
|
||||||
type Config struct {
|
type Config struct {
|
||||||
// Provider type (e.g., "llm", "google_docai")
|
// Provider type (e.g., "llm", "google_docai", "azure")
|
||||||
Provider string
|
Provider string
|
||||||
|
|
||||||
// Google Document AI settings
|
// Google Document AI settings
|
||||||
|
@ -27,6 +39,15 @@ type Config struct {
|
||||||
// LLM settings (from existing config)
|
// LLM settings (from existing config)
|
||||||
VisionLLMProvider string
|
VisionLLMProvider string
|
||||||
VisionLLMModel string
|
VisionLLMModel string
|
||||||
|
|
||||||
|
// Azure Document Intelligence settings
|
||||||
|
AzureEndpoint string
|
||||||
|
AzureAPIKey string
|
||||||
|
AzureModelID string // Optional, defaults to "prebuilt-read"
|
||||||
|
AzureTimeout int // Optional, defaults to 120 seconds
|
||||||
|
|
||||||
|
// OCR output options
|
||||||
|
EnableHOCR bool // Whether to request hOCR output if supported by the provider
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewProvider creates a new OCR provider based on configuration
|
// NewProvider creates a new OCR provider based on configuration
|
||||||
|
@ -54,6 +75,12 @@ func NewProvider(config Config) (Provider, error) {
|
||||||
}).Info("Using LLM OCR provider")
|
}).Info("Using LLM OCR provider")
|
||||||
return newLLMProvider(config)
|
return newLLMProvider(config)
|
||||||
|
|
||||||
|
case "azure":
|
||||||
|
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
|
||||||
|
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
|
||||||
|
}
|
||||||
|
return newAzureProvider(config)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
|
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,7 +94,6 @@ func (client *PaperlessClient) Do(ctx context.Context, method, path string, body
|
||||||
log.WithFields(logrus.Fields{
|
log.WithFields(logrus.Fields{
|
||||||
"method": method,
|
"method": method,
|
||||||
"url": url,
|
"url": url,
|
||||||
"headers": req.Header,
|
|
||||||
}).Debug("Making HTTP request")
|
}).Debug("Making HTTP request")
|
||||||
|
|
||||||
resp, err := client.HTTPClient.Do(req)
|
resp, err := client.HTTPClient.Do(req)
|
||||||
|
|
541
web-app/package-lock.json
generated
541
web-app/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -43,7 +43,7 @@
|
||||||
"eslint": "^9.9.0",
|
"eslint": "^9.9.0",
|
||||||
"eslint-plugin-react-hooks": "^5.1.0-rc.0",
|
"eslint-plugin-react-hooks": "^5.1.0-rc.0",
|
||||||
"eslint-plugin-react-refresh": "^0.4.9",
|
"eslint-plugin-react-refresh": "^0.4.9",
|
||||||
"globals": "^15.9.0",
|
"globals": "^16.0.0",
|
||||||
"node-fetch": "^3.3.0",
|
"node-fetch": "^3.3.0",
|
||||||
"postcss": "^8.4.47",
|
"postcss": "^8.4.47",
|
||||||
"tailwindcss": "^3.4.12",
|
"tailwindcss": "^3.4.12",
|
||||||
|
|
Loading…
Reference in a new issue