mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-13 05:08:01 -05:00
* feat(ocr): implement OCR provider interface and add Google Document AI and LLM providers * chore(deps): reorder dependencies in go.mod for better readability * chore: update version numbers and adjust Docker configuration for Google Document AI integration * feat(logging): add structured logging to Google Document AI and LLM providers * chore: add placeholder file to maintain directory structure in web-app/dist * chore(docker): remove Google Application Credentials configuration from docker-compose
118 lines
3.5 KiB
Go
118 lines
3.5 KiB
Go
package ocr
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
documentai "cloud.google.com/go/documentai/apiv1"
|
|
"cloud.google.com/go/documentai/apiv1/documentaipb"
|
|
"github.com/gabriel-vasile/mimetype"
|
|
"github.com/sirupsen/logrus"
|
|
"google.golang.org/api/option"
|
|
)
|
|
|
|
// GoogleDocAIProvider implements OCR using Google Document AI
|
|
type GoogleDocAIProvider struct {
|
|
projectID string
|
|
location string
|
|
processorID string
|
|
client *documentai.DocumentProcessorClient
|
|
}
|
|
|
|
func newGoogleDocAIProvider(config Config) (*GoogleDocAIProvider, error) {
|
|
logger := log.WithFields(logrus.Fields{
|
|
"location": config.GoogleLocation,
|
|
"processor_id": config.GoogleProcessorID,
|
|
})
|
|
logger.Info("Creating new Google Document AI provider")
|
|
|
|
ctx := context.Background()
|
|
endpoint := fmt.Sprintf("%s-documentai.googleapis.com:443", config.GoogleLocation)
|
|
|
|
client, err := documentai.NewDocumentProcessorClient(ctx, option.WithEndpoint(endpoint))
|
|
if err != nil {
|
|
logger.WithError(err).Error("Failed to create Document AI client")
|
|
return nil, fmt.Errorf("error creating Document AI client: %w", err)
|
|
}
|
|
|
|
provider := &GoogleDocAIProvider{
|
|
projectID: config.GoogleProjectID,
|
|
location: config.GoogleLocation,
|
|
processorID: config.GoogleProcessorID,
|
|
client: client,
|
|
}
|
|
|
|
logger.Info("Successfully initialized Google Document AI provider")
|
|
return provider, nil
|
|
}
|
|
|
|
func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (string, error) {
|
|
logger := log.WithFields(logrus.Fields{
|
|
"project_id": p.projectID,
|
|
"location": p.location,
|
|
"processor_id": p.processorID,
|
|
})
|
|
logger.Debug("Starting Document AI processing")
|
|
|
|
// Detect MIME type
|
|
mtype := mimetype.Detect(imageContent)
|
|
logger.WithField("mime_type", mtype.String()).Debug("Detected file type")
|
|
|
|
if !isImageMIMEType(mtype.String()) {
|
|
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
|
|
return "", fmt.Errorf("unsupported file type: %s", mtype.String())
|
|
}
|
|
|
|
name := fmt.Sprintf("projects/%s/locations/%s/processors/%s", p.projectID, p.location, p.processorID)
|
|
|
|
req := &documentaipb.ProcessRequest{
|
|
Name: name,
|
|
Source: &documentaipb.ProcessRequest_RawDocument{
|
|
RawDocument: &documentaipb.RawDocument{
|
|
Content: imageContent,
|
|
MimeType: mtype.String(),
|
|
},
|
|
},
|
|
}
|
|
|
|
logger.Debug("Sending request to Document AI")
|
|
resp, err := p.client.ProcessDocument(ctx, req)
|
|
if err != nil {
|
|
logger.WithError(err).Error("Failed to process document")
|
|
return "", fmt.Errorf("error processing document: %w", err)
|
|
}
|
|
|
|
if resp == nil || resp.Document == nil {
|
|
logger.Error("Received nil response or document from Document AI")
|
|
return "", fmt.Errorf("received nil response or document from Document AI")
|
|
}
|
|
|
|
if resp.Document.Error != nil {
|
|
logger.WithField("error", resp.Document.Error.Message).Error("Document processing error")
|
|
return "", fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
|
|
}
|
|
|
|
logger.WithField("content_length", len(resp.Document.Text)).Info("Successfully processed document")
|
|
return resp.Document.Text, nil
|
|
}
|
|
|
|
// isImageMIMEType checks if the given MIME type is a supported image type
|
|
func isImageMIMEType(mimeType string) bool {
|
|
supportedTypes := map[string]bool{
|
|
"image/jpeg": true,
|
|
"image/jpg": true,
|
|
"image/png": true,
|
|
"image/tiff": true,
|
|
"image/bmp": true,
|
|
"application/pdf": true,
|
|
}
|
|
return supportedTypes[mimeType]
|
|
}
|
|
|
|
// Close releases resources used by the provider
|
|
func (p *GoogleDocAIProvider) Close() error {
|
|
if p.client != nil {
|
|
return p.client.Close()
|
|
}
|
|
return nil
|
|
}
|