mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-12 21:08:00 -05:00
refactor: Add defensive checks for language code in Google DocAI provider
This commit is contained in:
parent
e2df794ac0
commit
6d53b2af17
1 changed files with 16 additions and 8 deletions
|
@ -93,15 +93,23 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
|
||||||
return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
|
return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metadata := map[string]string{
|
||||||
|
"provider": "google_docai",
|
||||||
|
"mime_type": mtype.String(),
|
||||||
|
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
|
||||||
|
"processor_id": p.processorID,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safely add language code if available
|
||||||
|
if pages := resp.Document.GetPages(); len(pages) > 0 {
|
||||||
|
if langs := pages[0].GetDetectedLanguages(); len(langs) > 0 {
|
||||||
|
metadata["lang_code"] = langs[0].GetLanguageCode()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
result := &OCRResult{
|
result := &OCRResult{
|
||||||
Text: resp.Document.Text,
|
Text: resp.Document.Text,
|
||||||
Metadata: map[string]string{
|
Metadata: metadata,
|
||||||
"provider": "google_docai",
|
|
||||||
"mime_type": mtype.String(),
|
|
||||||
"lang_code": resp.Document.GetPages()[0].GetDetectedLanguages()[0].GetLanguageCode(),
|
|
||||||
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
|
|
||||||
"processor_id": p.processorID,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add hOCR output if available
|
// Add hOCR output if available
|
||||||
|
|
Loading…
Reference in a new issue