mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-13 13:18:02 -05:00
Add RemoveTags field to DocumentSuggestion for explicit tag removal (#118)
* feat: add functionality to manage suggested and removable tags in document suggestions * feat: process multiple documents for auto-tagging and OCR
This commit is contained in:
parent
9fb2f65909
commit
e144661dfb
4 changed files with 51 additions and 42 deletions
|
@ -253,8 +253,12 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
|
||||||
docLogger.Printf("Suggested tags for document %d: %v", documentID, suggestedTags)
|
docLogger.Printf("Suggested tags for document %d: %v", documentID, suggestedTags)
|
||||||
suggestion.SuggestedTags = suggestedTags
|
suggestion.SuggestedTags = suggestedTags
|
||||||
} else {
|
} else {
|
||||||
suggestion.SuggestedTags = removeTagFromList(doc.Tags, manualTag)
|
suggestion.SuggestedTags = doc.Tags
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove manual tag from the list of suggested tags
|
||||||
|
suggestion.RemoveTags = []string{manualTag, autoTag}
|
||||||
|
|
||||||
documentSuggestions = append(documentSuggestions, suggestion)
|
documentSuggestions = append(documentSuggestions, suggestion)
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
docLogger.Printf("Document %d processed successfully.", documentID)
|
docLogger.Printf("Document %d processed successfully.", documentID)
|
||||||
|
|
81
main.go
81
main.go
|
@ -375,27 +375,28 @@ func (app *App) processAutoTagDocuments() (int, error) {
|
||||||
|
|
||||||
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag)
|
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag)
|
||||||
|
|
||||||
documents = documents[:1] // Process only one document at a time
|
for _, document := range documents {
|
||||||
docLogger := documentLogger(documents[0].ID)
|
docLogger := documentLogger(document.ID)
|
||||||
docLogger.Info("Processing document for auto-tagging")
|
docLogger.Info("Processing document for auto-tagging")
|
||||||
|
|
||||||
suggestionRequest := GenerateSuggestionsRequest{
|
suggestionRequest := GenerateSuggestionsRequest{
|
||||||
Documents: documents,
|
Documents: []Document{document},
|
||||||
GenerateTitles: strings.ToLower(autoGenerateTitle) != "false",
|
GenerateTitles: strings.ToLower(autoGenerateTitle) != "false",
|
||||||
GenerateTags: strings.ToLower(autoGenerateTags) != "false",
|
GenerateTags: strings.ToLower(autoGenerateTags) != "false",
|
||||||
|
}
|
||||||
|
|
||||||
|
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("error generating suggestions for document %d: %w", document.ID, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("error updating document %d: %w", document.ID, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
docLogger.Info("Successfully processed document")
|
||||||
}
|
}
|
||||||
|
|
||||||
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger)
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("error generating suggestions for document %d: %w", documents[0].ID, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("error updating document %d: %w", documents[0].ID, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
docLogger.Info("Successfully processed document")
|
|
||||||
return len(documents), nil
|
return len(documents), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -415,28 +416,30 @@ func (app *App) processAutoOcrTagDocuments() (int, error) {
|
||||||
|
|
||||||
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag)
|
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag)
|
||||||
|
|
||||||
documents = documents[:1] // Process only one document at a time
|
for _, document := range documents {
|
||||||
docLogger := documentLogger(documents[0].ID)
|
docLogger := documentLogger(document.ID)
|
||||||
docLogger.Info("Processing document for OCR")
|
docLogger.Info("Processing document for OCR")
|
||||||
|
|
||||||
ocrContent, err := app.ProcessDocumentOCR(ctx, documents[0].ID)
|
ocrContent, err := app.ProcessDocumentOCR(ctx, document.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("error processing OCR for document %d: %w", documents[0].ID, err)
|
return 0, fmt.Errorf("error processing OCR for document %d: %w", document.ID, err)
|
||||||
|
}
|
||||||
|
docLogger.Debug("OCR processing completed")
|
||||||
|
|
||||||
|
err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{
|
||||||
|
{
|
||||||
|
ID: document.ID,
|
||||||
|
OriginalDocument: document,
|
||||||
|
SuggestedContent: ocrContent,
|
||||||
|
RemoveTags: []string{autoOcrTag},
|
||||||
|
},
|
||||||
|
}, app.Database, false)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("error updating document %d after OCR: %w", document.ID, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
docLogger.Info("Successfully processed document OCR")
|
||||||
}
|
}
|
||||||
docLogger.Debug("OCR processing completed")
|
|
||||||
|
|
||||||
err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{
|
|
||||||
{
|
|
||||||
ID: documents[0].ID,
|
|
||||||
OriginalDocument: documents[0],
|
|
||||||
SuggestedContent: ocrContent,
|
|
||||||
},
|
|
||||||
}, app.Database, false)
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("error updating document %d after OCR: %w", documents[0].ID, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
docLogger.Info("Successfully processed document OCR")
|
|
||||||
return 1, nil
|
return 1, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -272,8 +272,9 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove autoTag to prevent infinite loop (even if it is in the original tags)
|
// remove autoTag to prevent infinite loop (even if it is in the original tags)
|
||||||
originalTags = removeTagFromList(originalTags, autoTag)
|
for _, tag := range document.RemoveTags {
|
||||||
originalTags = removeTagFromList(originalTags, autoOcrTag)
|
originalTags = removeTagFromList(originalTags, tag)
|
||||||
|
}
|
||||||
|
|
||||||
if len(tags) == 0 {
|
if len(tags) == 0 {
|
||||||
tags = originalTags
|
tags = originalTags
|
||||||
|
|
1
types.go
1
types.go
|
@ -79,4 +79,5 @@ type DocumentSuggestion struct {
|
||||||
SuggestedTitle string `json:"suggested_title,omitempty"`
|
SuggestedTitle string `json:"suggested_title,omitempty"`
|
||||||
SuggestedTags []string `json:"suggested_tags,omitempty"`
|
SuggestedTags []string `json:"suggested_tags,omitempty"`
|
||||||
SuggestedContent string `json:"suggested_content,omitempty"`
|
SuggestedContent string `json:"suggested_content,omitempty"`
|
||||||
|
RemoveTags []string `json:"remove_tags,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue