From 95626371d723ac90407faa17b964e313504fe806 Mon Sep 17 00:00:00 2001 From: Icereed Date: Mon, 13 Jan 2025 08:31:51 +0100 Subject: [PATCH] Enhance logging (#114) * enhance logging * refactor: enhance logging in document suggestion and title generation processes --- app_http_handlers.go | 2 +- app_llm.go | 43 ++++++++++++++++++++++++++----------------- main.go | 25 ++++++++++++++++++------- ocr.go | 25 ++++++++++++++++++------- 4 files changed, 63 insertions(+), 32 deletions(-) diff --git a/app_http_handlers.go b/app_http_handlers.go index 27ea742..909edbd 100644 --- a/app_http_handlers.go +++ b/app_http_handlers.go @@ -119,7 +119,7 @@ func (app *App) generateSuggestionsHandler(c *gin.Context) { return } - results, err := app.generateDocumentSuggestions(ctx, suggestionRequest) + results, err := app.generateDocumentSuggestions(ctx, suggestionRequest, log.WithContext(ctx)) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error processing documents: %v", err)}) log.Errorf("Error processing documents: %v", err) diff --git a/app_llm.go b/app_llm.go index 22883a5..6e0dc2c 100644 --- a/app_llm.go +++ b/app_llm.go @@ -11,11 +11,17 @@ import ( _ "image/jpeg" + "github.com/sirupsen/logrus" "github.com/tmc/langchaingo/llms" ) // getSuggestedTags generates suggested tags for a document using the LLM -func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedTitle string, availableTags []string) ([]string, error) { +func (app *App) getSuggestedTags( + ctx context.Context, + content string, + suggestedTitle string, + availableTags []string, + logger *logrus.Entry) ([]string, error) { likelyLanguage := getLikelyLanguage() templateMutex.RLock() @@ -29,11 +35,12 @@ func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedT "Content": content, }) if err != nil { + logger.Errorf("Error executing tag template: %v", err) return nil, fmt.Errorf("error executing tag template: %v", err) } prompt := promptBuffer.String() - log.Debugf("Tag suggestion prompt: %s", prompt) + logger.Debugf("Tag suggestion prompt: %s", prompt) completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{ { @@ -46,6 +53,7 @@ func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedT }, }) if err != nil { + logger.Errorf("Error getting response from LLM: %v", err) return nil, fmt.Errorf("error getting response from LLM: %v", err) } @@ -69,7 +77,7 @@ func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedT return filteredTags, nil } -func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, error) { +func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte, logger *logrus.Entry) (string, error) { templateMutex.RLock() defer templateMutex.RUnlock() @@ -91,13 +99,13 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro return "", fmt.Errorf("error decoding image: %v", err) } bounds := img.Bounds() - log.Debugf("Image dimensions: %dx%d", bounds.Dx(), bounds.Dy()) + logger.Debugf("Image dimensions: %dx%d", bounds.Dx(), bounds.Dy()) // If not OpenAI then use binary part for image, otherwise, use the ImageURL part with encoding from https://platform.openai.com/docs/guides/vision var parts []llms.ContentPart if strings.ToLower(visionLlmProvider) != "openai" { // Log image size in kilobytes - log.Debugf("Image size: %d KB", len(jpegBytes)/1024) + logger.Debugf("Image size: %d KB", len(jpegBytes)/1024) parts = []llms.ContentPart{ llms.BinaryPart("image/jpeg", jpegBytes), llms.TextPart(prompt), @@ -105,7 +113,7 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro } else { base64Image := base64.StdEncoding.EncodeToString(jpegBytes) // Log image size in kilobytes - log.Debugf("Image size: %d KB", len(base64Image)/1024) + logger.Debugf("Image size: %d KB", len(base64Image)/1024) parts = []llms.ContentPart{ llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)), llms.TextPart(prompt), @@ -129,7 +137,7 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro } // getSuggestedTitle generates a suggested title for a document using the LLM -func (app *App) getSuggestedTitle(ctx context.Context, content string) (string, error) { +func (app *App) getSuggestedTitle(ctx context.Context, content string, logger *logrus.Entry) (string, error) { likelyLanguage := getLikelyLanguage() templateMutex.RLock() @@ -146,7 +154,7 @@ func (app *App) getSuggestedTitle(ctx context.Context, content string) (string, prompt := promptBuffer.String() - log.Debugf("Title suggestion prompt: %s", prompt) + logger.Debugf("Title suggestion prompt: %s", prompt) completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{ { @@ -166,7 +174,7 @@ func (app *App) getSuggestedTitle(ctx context.Context, content string) (string, } // generateDocumentSuggestions generates suggestions for a set of documents -func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionRequest GenerateSuggestionsRequest) ([]DocumentSuggestion, error) { +func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionRequest GenerateSuggestionsRequest, logger *logrus.Entry) ([]DocumentSuggestion, error) { // Fetch all available tags from paperless-ngx availableTagsMap, err := app.Client.GetAllTags(ctx) if err != nil { @@ -194,7 +202,8 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque go func(doc Document) { defer wg.Done() documentID := doc.ID - log.Printf("Processing Document ID %d...", documentID) + docLogger := documentLogger(documentID) + docLogger.Printf("Processing Document ID %d...", documentID) content := doc.Content if len(content) > 5000 { @@ -205,23 +214,23 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque var suggestedTags []string if suggestionRequest.GenerateTitles { - suggestedTitle, err = app.getSuggestedTitle(ctx, content) + suggestedTitle, err = app.getSuggestedTitle(ctx, content, docLogger) if err != nil { mu.Lock() errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err)) mu.Unlock() - log.Errorf("Error processing document %d: %v", documentID, err) + docLogger.Errorf("Error processing document %d: %v", documentID, err) return } } if suggestionRequest.GenerateTags { - suggestedTags, err = app.getSuggestedTags(ctx, content, suggestedTitle, availableTagNames) + suggestedTags, err = app.getSuggestedTags(ctx, content, suggestedTitle, availableTagNames, docLogger) if err != nil { mu.Lock() errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err)) mu.Unlock() - log.Errorf("Error generating tags for document %d: %v", documentID, err) + logger.Errorf("Error generating tags for document %d: %v", documentID, err) return } } @@ -233,7 +242,7 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque } // Titles if suggestionRequest.GenerateTitles { - log.Printf("Suggested title for document %d: %s", documentID, suggestedTitle) + docLogger.Printf("Suggested title for document %d: %s", documentID, suggestedTitle) suggestion.SuggestedTitle = suggestedTitle } else { suggestion.SuggestedTitle = doc.Title @@ -241,14 +250,14 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque // Tags if suggestionRequest.GenerateTags { - log.Printf("Suggested tags for document %d: %v", documentID, suggestedTags) + docLogger.Printf("Suggested tags for document %d: %v", documentID, suggestedTags) suggestion.SuggestedTags = suggestedTags } else { suggestion.SuggestedTags = removeTagFromList(doc.Tags, manualTag) } documentSuggestions = append(documentSuggestions, suggestion) mu.Unlock() - log.Printf("Document %d processed successfully.", documentID) + docLogger.Printf("Document %d processed successfully.", documentID) }(documents[i]) } diff --git a/main.go b/main.go index 178791b..9290296 100644 --- a/main.go +++ b/main.go @@ -354,6 +354,11 @@ func validateOrDefaultEnvVars() { } } +// documentLogger creates a logger with document context +func documentLogger(documentID int) *logrus.Entry { + return log.WithField("document_id", documentID) +} + // processAutoTagDocuments handles the background auto-tagging of documents func (app *App) processAutoTagDocuments() (int, error) { ctx := context.Background() @@ -371,6 +376,8 @@ func (app *App) processAutoTagDocuments() (int, error) { log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag) documents = documents[:1] // Process only one document at a time + docLogger := documentLogger(documents[0].ID) + docLogger.Info("Processing document for auto-tagging") suggestionRequest := GenerateSuggestionsRequest{ Documents: documents, @@ -378,16 +385,17 @@ func (app *App) processAutoTagDocuments() (int, error) { GenerateTags: strings.ToLower(autoGenerateTags) != "false", } - suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest) + suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest, docLogger) if err != nil { - return 0, fmt.Errorf("error generating suggestions: %w", err) + return 0, fmt.Errorf("error generating suggestions for document %d: %w", documents[0].ID, err) } err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false) if err != nil { - return 0, fmt.Errorf("error updating documents: %w", err) + return 0, fmt.Errorf("error updating document %d: %w", documents[0].ID, err) } + docLogger.Info("Successfully processed document") return len(documents), nil } @@ -408,12 +416,14 @@ func (app *App) processAutoOcrTagDocuments() (int, error) { log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag) documents = documents[:1] // Process only one document at a time + docLogger := documentLogger(documents[0].ID) + docLogger.Info("Processing document for OCR") ocrContent, err := app.ProcessDocumentOCR(ctx, documents[0].ID) if err != nil { - return 0, fmt.Errorf("error processing document OCR: %w", err) + return 0, fmt.Errorf("error processing OCR for document %d: %w", documents[0].ID, err) } - log.Debugf("OCR content for document %d: %s", documents[0].ID, ocrContent) + docLogger.Debug("OCR processing completed") err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{ { @@ -423,10 +433,11 @@ func (app *App) processAutoOcrTagDocuments() (int, error) { }, }, app.Database, false) if err != nil { - return 0, fmt.Errorf("error updating documents: %w", err) + return 0, fmt.Errorf("error updating document %d after OCR: %w", documents[0].ID, err) } - return 1, nil // Processed one document + docLogger.Info("Successfully processed document OCR") + return 1, nil } // removeTagFromList removes a specific tag from a list of tags diff --git a/ocr.go b/ocr.go index b96d74a..6819a98 100644 --- a/ocr.go +++ b/ocr.go @@ -9,31 +9,42 @@ import ( // ProcessDocumentOCR processes a document through OCR and returns the combined text func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string, error) { + docLogger := documentLogger(documentID) + docLogger.Info("Starting OCR processing") + imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, documentID, limitOcrPages) defer func() { for _, imagePath := range imagePaths { - os.Remove(imagePath) + if err := os.Remove(imagePath); err != nil { + docLogger.WithError(err).WithField("image_path", imagePath).Warn("Failed to remove temporary image file") + } } }() if err != nil { - return "", fmt.Errorf("error downloading document images: %w", err) + return "", fmt.Errorf("error downloading document images for document %d: %w", documentID, err) } + docLogger.WithField("page_count", len(imagePaths)).Debug("Downloaded document images") + var ocrTexts []string - for _, imagePath := range imagePaths { + for i, imagePath := range imagePaths { + pageLogger := docLogger.WithField("page", i+1) + pageLogger.Debug("Processing page") + imageContent, err := os.ReadFile(imagePath) if err != nil { - return "", fmt.Errorf("error reading image file: %w", err) + return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err) } - ocrText, err := app.doOCRViaLLM(ctx, imageContent) + ocrText, err := app.doOCRViaLLM(ctx, imageContent, pageLogger) if err != nil { - return "", fmt.Errorf("error performing OCR: %w", err) + return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err) } - log.Debugf("OCR text: %s", ocrText) + pageLogger.Debug("OCR completed for page") ocrTexts = append(ocrTexts, ocrText) } + docLogger.Info("OCR processing completed successfully") return strings.Join(ocrTexts, "\n\n"), nil }