refactor: Add defensive checks for language code in Google DocAI provider

This commit is contained in:
mkrinke" (aider) 2025-02-11 20:29:19 +01:00 committed by mkrinke
parent e2df794ac0
commit 6d53b2af17
No known key found for this signature in database

View file

@ -93,15 +93,23 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message) return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
} }
metadata := map[string]string{
"provider": "google_docai",
"mime_type": mtype.String(),
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
"processor_id": p.processorID,
}
// Safely add language code if available
if pages := resp.Document.GetPages(); len(pages) > 0 {
if langs := pages[0].GetDetectedLanguages(); len(langs) > 0 {
metadata["lang_code"] = langs[0].GetLanguageCode()
}
}
result := &OCRResult{ result := &OCRResult{
Text: resp.Document.Text, Text: resp.Document.Text,
Metadata: map[string]string{ Metadata: metadata,
"provider": "google_docai",
"mime_type": mtype.String(),
"lang_code": resp.Document.GetPages()[0].GetDetectedLanguages()[0].GetLanguageCode(),
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
"processor_id": p.processorID,
},
} }
// Add hOCR output if available // Add hOCR output if available