mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-12 21:08:00 -05:00
* feat(ocr): implement OCR provider interface and add Google Document AI and LLM providers * chore(deps): reorder dependencies in go.mod for better readability * chore: update version numbers and adjust Docker configuration for Google Document AI integration * feat(logging): add structured logging to Google Document AI and LLM providers * chore: add placeholder file to maintain directory structure in web-app/dist * chore(docker): remove Google Application Credentials configuration from docker-compose
65 lines
1.7 KiB
Go
65 lines
1.7 KiB
Go
package ocr
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
var log = logrus.New()
|
|
|
|
// Provider defines the interface for OCR processing
|
|
type Provider interface {
|
|
ProcessImage(ctx context.Context, imageContent []byte) (string, error)
|
|
}
|
|
|
|
// Config holds the OCR provider configuration
|
|
type Config struct {
|
|
// Provider type (e.g., "llm", "google_docai")
|
|
Provider string
|
|
|
|
// Google Document AI settings
|
|
GoogleProjectID string
|
|
GoogleLocation string
|
|
GoogleProcessorID string
|
|
|
|
// LLM settings (from existing config)
|
|
VisionLLMProvider string
|
|
VisionLLMModel string
|
|
}
|
|
|
|
// NewProvider creates a new OCR provider based on configuration
|
|
func NewProvider(config Config) (Provider, error) {
|
|
log.Info("Initializing OCR provider: ", config.Provider)
|
|
|
|
switch config.Provider {
|
|
case "google_docai":
|
|
if config.GoogleProjectID == "" || config.GoogleLocation == "" || config.GoogleProcessorID == "" {
|
|
return nil, fmt.Errorf("missing required Google Document AI configuration")
|
|
}
|
|
log.WithFields(logrus.Fields{
|
|
"location": config.GoogleLocation,
|
|
"processor_id": config.GoogleProcessorID,
|
|
}).Info("Using Google Document AI provider")
|
|
return newGoogleDocAIProvider(config)
|
|
|
|
case "llm":
|
|
if config.VisionLLMProvider == "" || config.VisionLLMModel == "" {
|
|
return nil, fmt.Errorf("missing required LLM configuration")
|
|
}
|
|
log.WithFields(logrus.Fields{
|
|
"provider": config.VisionLLMProvider,
|
|
"model": config.VisionLLMModel,
|
|
}).Info("Using LLM OCR provider")
|
|
return newLLMProvider(config)
|
|
|
|
default:
|
|
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
|
|
}
|
|
}
|
|
|
|
// SetLogLevel sets the logging level for the OCR package
|
|
func SetLogLevel(level logrus.Level) {
|
|
log.SetLevel(level)
|
|
}
|