mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-12 12:58:02 -05:00
92 lines
2.5 KiB
Go
92 lines
2.5 KiB
Go
package ocr
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
var log = logrus.New()
|
|
|
|
// OCRResult holds the output from OCR processing
|
|
type OCRResult struct {
|
|
// Plain text output (required)
|
|
Text string
|
|
|
|
// hOCR output (optional, if provider supports it)
|
|
HOCR string
|
|
|
|
// Additional provider-specific metadata
|
|
Metadata map[string]string
|
|
}
|
|
|
|
// Provider defines the interface for OCR processing
|
|
type Provider interface {
|
|
ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error)
|
|
}
|
|
|
|
// Config holds the OCR provider configuration
|
|
type Config struct {
|
|
// Provider type (e.g., "llm", "google_docai", "azure")
|
|
Provider string
|
|
|
|
// Google Document AI settings
|
|
GoogleProjectID string
|
|
GoogleLocation string
|
|
GoogleProcessorID string
|
|
|
|
// LLM settings (from existing config)
|
|
VisionLLMProvider string
|
|
VisionLLMModel string
|
|
|
|
// Azure Document Intelligence settings
|
|
AzureEndpoint string
|
|
AzureAPIKey string
|
|
AzureModelID string // Optional, defaults to "prebuilt-read"
|
|
AzureTimeout int // Optional, defaults to 120 seconds
|
|
|
|
// OCR output options
|
|
EnableHOCR bool // Whether to request hOCR output if supported by the provider
|
|
}
|
|
|
|
// NewProvider creates a new OCR provider based on configuration
|
|
func NewProvider(config Config) (Provider, error) {
|
|
log.Info("Initializing OCR provider: ", config.Provider)
|
|
|
|
switch config.Provider {
|
|
case "google_docai":
|
|
if config.GoogleProjectID == "" || config.GoogleLocation == "" || config.GoogleProcessorID == "" {
|
|
return nil, fmt.Errorf("missing required Google Document AI configuration")
|
|
}
|
|
log.WithFields(logrus.Fields{
|
|
"location": config.GoogleLocation,
|
|
"processor_id": config.GoogleProcessorID,
|
|
}).Info("Using Google Document AI provider")
|
|
return newGoogleDocAIProvider(config)
|
|
|
|
case "llm":
|
|
if config.VisionLLMProvider == "" || config.VisionLLMModel == "" {
|
|
return nil, fmt.Errorf("missing required LLM configuration")
|
|
}
|
|
log.WithFields(logrus.Fields{
|
|
"provider": config.VisionLLMProvider,
|
|
"model": config.VisionLLMModel,
|
|
}).Info("Using LLM OCR provider")
|
|
return newLLMProvider(config)
|
|
|
|
case "azure":
|
|
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
|
|
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
|
|
}
|
|
return newAzureProvider(config)
|
|
|
|
default:
|
|
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
|
|
}
|
|
}
|
|
|
|
// SetLogLevel sets the logging level for the OCR package
|
|
func SetLogLevel(level logrus.Level) {
|
|
log.SetLevel(level)
|
|
}
|