paperless-gpt/ocr/provider.go
Icereed c8c0dd75ff
Google Document AI (#208)
* feat(ocr): implement OCR provider interface and add Google Document AI and LLM providers

* chore(deps): reorder dependencies in go.mod for better readability

* chore: update version numbers and adjust Docker configuration for Google Document AI integration

* feat(logging): add structured logging to Google Document AI and LLM providers

* chore: add placeholder file to maintain directory structure in web-app/dist

* chore(docker): remove Google Application Credentials configuration from docker-compose
2025-02-10 14:34:12 +00:00

65 lines
1.7 KiB
Go

package ocr
import (
"context"
"fmt"
"github.com/sirupsen/logrus"
)
var log = logrus.New()
// Provider defines the interface for OCR processing
type Provider interface {
ProcessImage(ctx context.Context, imageContent []byte) (string, error)
}
// Config holds the OCR provider configuration
type Config struct {
// Provider type (e.g., "llm", "google_docai")
Provider string
// Google Document AI settings
GoogleProjectID string
GoogleLocation string
GoogleProcessorID string
// LLM settings (from existing config)
VisionLLMProvider string
VisionLLMModel string
}
// NewProvider creates a new OCR provider based on configuration
func NewProvider(config Config) (Provider, error) {
log.Info("Initializing OCR provider: ", config.Provider)
switch config.Provider {
case "google_docai":
if config.GoogleProjectID == "" || config.GoogleLocation == "" || config.GoogleProcessorID == "" {
return nil, fmt.Errorf("missing required Google Document AI configuration")
}
log.WithFields(logrus.Fields{
"location": config.GoogleLocation,
"processor_id": config.GoogleProcessorID,
}).Info("Using Google Document AI provider")
return newGoogleDocAIProvider(config)
case "llm":
if config.VisionLLMProvider == "" || config.VisionLLMModel == "" {
return nil, fmt.Errorf("missing required LLM configuration")
}
log.WithFields(logrus.Fields{
"provider": config.VisionLLMProvider,
"model": config.VisionLLMModel,
}).Info("Using LLM OCR provider")
return newLLMProvider(config)
default:
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
}
}
// SetLogLevel sets the logging level for the OCR package
func SetLogLevel(level logrus.Level) {
log.SetLevel(level)
}