mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-13 13:18:02 -05:00
refactor: Add defensive checks for language code in Google DocAI provider (#226)
This commit is contained in:
parent
e2df794ac0
commit
38b93eaa67
1 changed files with 16 additions and 8 deletions
|
@ -93,15 +93,23 @@ func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []b
|
|||
return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
|
||||
}
|
||||
|
||||
metadata := map[string]string{
|
||||
"provider": "google_docai",
|
||||
"mime_type": mtype.String(),
|
||||
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
|
||||
"processor_id": p.processorID,
|
||||
}
|
||||
|
||||
// Safely add language code if available
|
||||
if pages := resp.Document.GetPages(); len(pages) > 0 {
|
||||
if langs := pages[0].GetDetectedLanguages(); len(langs) > 0 {
|
||||
metadata["lang_code"] = langs[0].GetLanguageCode()
|
||||
}
|
||||
}
|
||||
|
||||
result := &OCRResult{
|
||||
Text: resp.Document.Text,
|
||||
Metadata: map[string]string{
|
||||
"provider": "google_docai",
|
||||
"mime_type": mtype.String(),
|
||||
"lang_code": resp.Document.GetPages()[0].GetDetectedLanguages()[0].GetLanguageCode(),
|
||||
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
|
||||
"processor_id": p.processorID,
|
||||
},
|
||||
Text: resp.Document.Text,
|
||||
Metadata: metadata,
|
||||
}
|
||||
|
||||
// Add hOCR output if available
|
||||
|
|
Loading…
Reference in a new issue