refactor: Add defensive checks for language code in Google DocAI provider (#226)

This commit is contained in:
mkrinke 2025-02-11 21:26:19 +01:00 committed by GitHub
parent e2df794ac0
commit 38b93eaa67
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -93,15 +93,23 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
}
result := &OCRResult{
Text: resp.Document.Text,
Metadata: map[string]string{
metadata := map[string]string{
"provider": "google_docai",
"mime_type": mtype.String(),
"lang_code": resp.Document.GetPages()[0].GetDetectedLanguages()[0].GetLanguageCode(),
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
"processor_id": p.processorID,
},
}
// Safely add language code if available
if pages := resp.Document.GetPages(); len(pages) > 0 {
if langs := pages[0].GetDetectedLanguages(); len(langs) > 0 {
metadata["lang_code"] = langs[0].GetLanguageCode()
}
}
result := &OCRResult{
Text: resp.Document.Text,
Metadata: metadata,
}
// Add hOCR output if available