From e144661dfb8e83e652fbea268adb475b3983d910 Mon Sep 17 00:00:00 2001 From: Icereed Date: Mon, 13 Jan 2025 10:52:56 +0100 Subject: [PATCH] Add RemoveTags field to DocumentSuggestion for explicit tag removal (#118) * feat: add functionality to manage suggested and removable tags in document suggestions * feat: process multiple documents for auto-tagging and OCR --- app_llm.go | 6 +++- main.go | 81 +++++++++++++++++++++++++++------------------------- paperless.go | 5 ++-- types.go | 1 + 4 files changed, 51 insertions(+), 42 deletions(-) diff --git a/app_llm.go b/app_llm.go index 6e0dc2c..f070c13 100644 --- a/app_llm.go +++ b/app_llm.go @@ -253,8 +253,12 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque docLogger.Printf("Suggested tags for document %d: %v", documentID, suggestedTags) suggestion.SuggestedTags = suggestedTags } else { - suggestion.SuggestedTags = removeTagFromList(doc.Tags, manualTag) + suggestion.SuggestedTags = doc.Tags } + + // Remove manual tag from the list of suggested tags + suggestion.RemoveTags = []string{manualTag, autoTag} + documentSuggestions = append(documentSuggestions, suggestion) mu.Unlock() docLogger.Printf("Document %d processed successfully.", documentID) diff --git a/main.go b/main.go index 9290296..496ca0a 100644 --- a/main.go +++ b/main.go @@ -375,27 +375,28 @@ func (app *App) processAutoTagDocuments() (int, error) { log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag) - documents = documents[:1] // Process only one document at a time - docLogger := documentLogger(documents[0].ID) - docLogger.Info("Processing document for auto-tagging") + for _, document := range documents { + docLogger := documentLogger(document.ID) + docLogger.Info("Processing document for auto-tagging") - suggestionRequest := GenerateSuggestionsRequest{ - Documents: documents, - GenerateTitles: strings.ToLower(autoGenerateTitle) != "false", - GenerateTags: strings.ToLower(autoGenerateTags) != "false", + suggestionRequest := GenerateSuggestionsRequest{ + Documents: []Document{document}, + GenerateTitles: strings.ToLower(autoGenerateTitle) != "false", + GenerateTags: strings.ToLower(autoGenerateTags) != "false", + } + + suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger) + if err != nil { + return 0, fmt.Errorf("error generating suggestions for document %d: %w", document.ID, err) + } + + err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false) + if err != nil { + return 0, fmt.Errorf("error updating document %d: %w", document.ID, err) + } + + docLogger.Info("Successfully processed document") } - - suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger) - if err != nil { - return 0, fmt.Errorf("error generating suggestions for document %d: %w", documents[0].ID, err) - } - - err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false) - if err != nil { - return 0, fmt.Errorf("error updating document %d: %w", documents[0].ID, err) - } - - docLogger.Info("Successfully processed document") return len(documents), nil } @@ -415,28 +416,30 @@ func (app *App) processAutoOcrTagDocuments() (int, error) { log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag) - documents = documents[:1] // Process only one document at a time - docLogger := documentLogger(documents[0].ID) - docLogger.Info("Processing document for OCR") + for _, document := range documents { + docLogger := documentLogger(document.ID) + docLogger.Info("Processing document for OCR") - ocrContent, err := app.ProcessDocumentOCR(ctx, documents[0].ID) - if err != nil { - return 0, fmt.Errorf("error processing OCR for document %d: %w", documents[0].ID, err) + ocrContent, err := app.ProcessDocumentOCR(ctx, document.ID) + if err != nil { + return 0, fmt.Errorf("error processing OCR for document %d: %w", document.ID, err) + } + docLogger.Debug("OCR processing completed") + + err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{ + { + ID: document.ID, + OriginalDocument: document, + SuggestedContent: ocrContent, + RemoveTags: []string{autoOcrTag}, + }, + }, app.Database, false) + if err != nil { + return 0, fmt.Errorf("error updating document %d after OCR: %w", document.ID, err) + } + + docLogger.Info("Successfully processed document OCR") } - docLogger.Debug("OCR processing completed") - - err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{ - { - ID: documents[0].ID, - OriginalDocument: documents[0], - SuggestedContent: ocrContent, - }, - }, app.Database, false) - if err != nil { - return 0, fmt.Errorf("error updating document %d after OCR: %w", documents[0].ID, err) - } - - docLogger.Info("Successfully processed document OCR") return 1, nil } diff --git a/paperless.go b/paperless.go index 16ab98b..063097b 100644 --- a/paperless.go +++ b/paperless.go @@ -272,8 +272,9 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum } // remove autoTag to prevent infinite loop (even if it is in the original tags) - originalTags = removeTagFromList(originalTags, autoTag) - originalTags = removeTagFromList(originalTags, autoOcrTag) + for _, tag := range document.RemoveTags { + originalTags = removeTagFromList(originalTags, tag) + } if len(tags) == 0 { tags = originalTags diff --git a/types.go b/types.go index 72238c0..11fbc53 100644 --- a/types.go +++ b/types.go @@ -79,4 +79,5 @@ type DocumentSuggestion struct { SuggestedTitle string `json:"suggested_title,omitempty"` SuggestedTags []string `json:"suggested_tags,omitempty"` SuggestedContent string `json:"suggested_content,omitempty"` + RemoveTags []string `json:"remove_tags,omitempty"` }