feat: Add background routine to process documents with autoTag every 10 seconds

- Spawn a new goroutine in the `main` function to process documents with the `autoTag` every 10 seconds.
- Implement `processAutoTagDocuments` function to:
  - Fetch documents with the `autoTag`.
  - Generate suggestions using the LLM.
  - Apply the suggestions to the documents.
  - Remove the `autoTag` from the documents.
- Add logging for each document being processed.
This commit is contained in:
Jonas Hess 2024-10-20 23:49:42 +02:00 committed by Dominik Schröter
parent 6fad4bb153
commit 95c9d1bdb5

56
main.go
View file

@ -13,6 +13,7 @@ import (
"strings" "strings"
"sync" "sync"
"text/template" "text/template"
"time"
"github.com/Masterminds/sprig/v3" "github.com/Masterminds/sprig/v3"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
@ -25,7 +26,8 @@ var (
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL") paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN") paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
openaiAPIKey = os.Getenv("OPENAI_API_KEY") openaiAPIKey = os.Getenv("OPENAI_API_KEY")
tagToFilter = "paperless-gpt" manualTag = "paperless-gpt"
autoTag = "paperless-gpt-auto"
llmProvider = os.Getenv("LLM_PROVIDER") llmProvider = os.Getenv("LLM_PROVIDER")
llmModel = os.Getenv("LLM_MODEL") llmModel = os.Getenv("LLM_MODEL")
@ -82,6 +84,13 @@ func main() {
loadTemplates() loadTemplates()
go func() {
for {
processAutoTagDocuments()
time.Sleep(10 * time.Second)
}
}()
// Create a Gin router with default middleware (logger and recovery) // Create a Gin router with default middleware (logger and recovery)
router := gin.Default() router := gin.Default()
@ -92,7 +101,7 @@ func main() {
api.POST("/generate-suggestions", generateSuggestionsHandler) api.POST("/generate-suggestions", generateSuggestionsHandler)
api.PATCH("/update-documents", updateDocumentsHandler) api.PATCH("/update-documents", updateDocumentsHandler)
api.GET("/filter-tag", func(c *gin.Context) { api.GET("/filter-tag", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"tag": tagToFilter}) c.JSON(http.StatusOK, gin.H{"tag": manualTag})
}) })
// get all tags // get all tags
api.GET("/tags", func(c *gin.Context) { api.GET("/tags", func(c *gin.Context) {
@ -126,6 +135,38 @@ func main() {
} }
} }
func processAutoTagDocuments() {
ctx := context.Background()
documents, err := getDocumentsByTags(ctx, paperlessBaseURL, paperlessAPIToken, []string{autoTag})
if err != nil {
log.Printf("Error fetching documents with autoTag: %v", err)
return
}
suggestionRequest := GenerateSuggestionsRequest{
Documents: documents,
GenerateTitles: true,
GenerateTags: true,
}
suggestions, err := generateDocumentSuggestions(ctx, suggestionRequest)
if err != nil {
log.Printf("Error generating suggestions: %v", err)
return
}
for i := range suggestions {
log.Printf("Processing document ID %d with autoTag", suggestions[i].ID)
suggestions[i].SuggestedTags = removeTagFromList(suggestions[i].SuggestedTags, autoTag)
}
err = updateDocuments(ctx, paperlessBaseURL, paperlessAPIToken, suggestions)
if err != nil {
log.Printf("Error updating documents: %v", err)
}
}
func getPromptsHandler(c *gin.Context) { func getPromptsHandler(c *gin.Context) {
templateMutex.RLock() templateMutex.RLock()
defer templateMutex.RUnlock() defer templateMutex.RUnlock()
@ -309,7 +350,7 @@ func getAllTags(ctx context.Context, baseURL, apiToken string) (map[string]int,
func documentsHandler(c *gin.Context) { func documentsHandler(c *gin.Context) {
ctx := c.Request.Context() ctx := c.Request.Context()
documents, err := getDocumentsByTags(ctx, paperlessBaseURL, paperlessAPIToken, []string{tagToFilter}) documents, err := getDocumentsByTags(ctx, paperlessBaseURL, paperlessAPIToken, []string{manualTag})
if err != nil { if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error fetching documents: %v", err)}) c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error fetching documents: %v", err)})
log.Printf("Error fetching documents: %v", err) log.Printf("Error fetching documents: %v", err)
@ -435,7 +476,7 @@ func generateDocumentSuggestions(ctx context.Context, suggestionRequest Generate
// Prepare a list of tag names // Prepare a list of tag names
availableTagNames := make([]string, 0, len(availableTags)) availableTagNames := make([]string, 0, len(availableTags))
for tagName := range availableTags { for tagName := range availableTags {
if tagName == tagToFilter { if tagName == manualTag {
continue continue
} }
availableTagNames = append(availableTagNames, tagName) availableTagNames = append(availableTagNames, tagName)
@ -501,7 +542,7 @@ func generateDocumentSuggestions(ctx context.Context, suggestionRequest Generate
if suggestionRequest.GenerateTags { if suggestionRequest.GenerateTags {
suggestion.SuggestedTags = suggestedTags suggestion.SuggestedTags = suggestedTags
} else { } else {
suggestion.SuggestedTags = removeTagFromList(doc.Tags, tagToFilter) suggestion.SuggestedTags = removeTagFromList(doc.Tags, manualTag)
} }
documentSuggestions = append(documentSuggestions, suggestion) documentSuggestions = append(documentSuggestions, suggestion)
mu.Unlock() mu.Unlock()
@ -648,11 +689,14 @@ func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []
tags = document.OriginalDocument.Tags tags = document.OriginalDocument.Tags
} }
// Remove the autoTag to prevent infinite loop
tags = removeTagFromList(tags, autoTag)
// Map suggested tag names to IDs // Map suggested tag names to IDs
for _, tagName := range tags { for _, tagName := range tags {
if tagID, exists := availableTags[tagName]; exists { if tagID, exists := availableTags[tagName]; exists {
// Skip the tag that we are filtering // Skip the tag that we are filtering
if tagName == tagToFilter { if tagName == manualTag {
continue continue
} }
newTags = append(newTags, tagID) newTags = append(newTags, tagID)