feat: auto create missing correspondents

This commit is contained in:
Jonas Hess 2024-10-31 22:37:35 +01:00
parent 5dadbcb53d
commit 16281be6d3
7 changed files with 327 additions and 72 deletions

View file

@ -134,7 +134,7 @@ If you prefer to run the application manually:
### Environment Variables ### Environment Variables
| Variable | Description | Required | | Variable | Description | Required |
|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|----------| |----------------------------|----------------------------------------------------------------------------------------------------------------------------------------|----------|
| `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes | | `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes |
| `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes | | `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes |
| `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes | | `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes |
@ -145,6 +145,7 @@ If you prefer to run the application manually:
| `VISION_LLM_PROVIDER` | The vision LLM provider to use for OCR (`openai` or `ollama`). | No | | `VISION_LLM_PROVIDER` | The vision LLM provider to use for OCR (`openai` or `ollama`). | No |
| `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No | | `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No |
| `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No | | `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No |
| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`. | No |
**Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container. **Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.

View file

@ -97,7 +97,7 @@ func (app *App) getAllTagsHandler(c *gin.Context) {
func (app *App) documentsHandler(c *gin.Context) { func (app *App) documentsHandler(c *gin.Context) {
ctx := c.Request.Context() ctx := c.Request.Context()
documents, err := app.Client.GetDocumentsByTags(ctx, []string{manualTag}) documents, err := app.Client.GetDocumentsByTags(ctx, []string{manualTag}, 25)
if err != nil { if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error fetching documents: %v", err)}) c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error fetching documents: %v", err)})
log.Errorf("Error fetching documents: %v", err) log.Errorf("Error fetching documents: %v", err)

View file

@ -10,6 +10,46 @@ import (
"github.com/tmc/langchaingo/llms" "github.com/tmc/langchaingo/llms"
) )
// getSuggestedCorrespondent generates a suggested correspondent for a document using the LLM
func (app *App) getSuggestedCorrespondent(ctx context.Context, content string, suggestedTitle string, availableCorrespondents []string, correspondentBlackList []string) (string, error) {
likelyLanguage := getLikelyLanguage()
templateMutex.RLock()
defer templateMutex.RUnlock()
var promptBuffer bytes.Buffer
err := correspondentTemplate.Execute(&promptBuffer, map[string]interface{}{
"Language": likelyLanguage,
"AvailableCorrespondents": availableCorrespondents,
"BlackList": correspondentBlackList,
"Title": suggestedTitle,
"Content": content,
})
if err != nil {
return "", fmt.Errorf("error executing correspondent template: %v", err)
}
prompt := promptBuffer.String()
log.Debugf("Correspondent suggestion prompt: %s", prompt)
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
{
Parts: []llms.ContentPart{
llms.TextContent{
Text: prompt,
},
},
Role: llms.ChatMessageTypeHuman,
},
})
if err != nil {
return "", fmt.Errorf("error getting response from LLM: %v", err)
}
response := strings.TrimSpace(completion.Choices[0].Content)
return response, nil
}
// getSuggestedTags generates suggested tags for a document using the LLM // getSuggestedTags generates suggested tags for a document using the LLM
func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedTitle string, availableTags []string) ([]string, error) { func (app *App) getSuggestedTags(ctx context.Context, content string, suggestedTitle string, availableTags []string) ([]string, error) {
likelyLanguage := getLikelyLanguage() likelyLanguage := getLikelyLanguage()
@ -154,6 +194,18 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
availableTagNames = append(availableTagNames, tagName) availableTagNames = append(availableTagNames, tagName)
} }
// Prepare a list of document correspodents
availableCorrespondentsMap, err := app.Client.GetAllCorrespondents(ctx)
if err != nil {
return nil, fmt.Errorf("failed to fetch available correspondents: %v", err)
}
// Prepare a list of correspondent names
availableCorrespondentNames := make([]string, 0, len(availableCorrespondentsMap))
for correspondentName := range availableCorrespondentsMap {
availableCorrespondentNames = append(availableCorrespondentNames, correspondentName)
}
documents := suggestionRequest.Documents documents := suggestionRequest.Documents
documentSuggestions := []DocumentSuggestion{} documentSuggestions := []DocumentSuggestion{}
@ -175,6 +227,7 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
var suggestedTitle string var suggestedTitle string
var suggestedTags []string var suggestedTags []string
var suggestedCorrespondent string
if suggestionRequest.GenerateTitles { if suggestionRequest.GenerateTitles {
suggestedTitle, err = app.getSuggestedTitle(ctx, content) suggestedTitle, err = app.getSuggestedTitle(ctx, content)
@ -198,6 +251,18 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
} }
} }
if suggestionRequest.GenerateCorrespondents {
suggestedCorrespondent, err = app.getSuggestedCorrespondent(ctx, content, suggestedTitle, availableCorrespondentNames, correspondentBlackList)
if err != nil {
mu.Lock()
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
mu.Unlock()
log.Errorf("Error generating correspondents for document %d: %v", documentID, err)
return
}
}
mu.Lock() mu.Lock()
suggestion := DocumentSuggestion{ suggestion := DocumentSuggestion{
ID: documentID, ID: documentID,
@ -218,6 +283,15 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
} else { } else {
suggestion.SuggestedTags = removeTagFromList(doc.Tags, manualTag) suggestion.SuggestedTags = removeTagFromList(doc.Tags, manualTag)
} }
// Correspondents
if suggestionRequest.GenerateCorrespondents {
log.Printf("Suggested correspondent for document %d: %s", documentID, suggestedCorrespondent)
suggestion.SuggestedCorrespondent = suggestedCorrespondent
} else {
suggestion.SuggestedCorrespondent = ""
}
documentSuggestions = append(documentSuggestions, suggestion) documentSuggestions = append(documentSuggestions, suggestion)
mu.Unlock() mu.Unlock()
log.Printf("Document %d processed successfully.", documentID) log.Printf("Document %d processed successfully.", documentID)

50
main.go
View file

@ -36,10 +36,12 @@ var (
visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER") visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER")
visionLlmModel = os.Getenv("VISION_LLM_MODEL") visionLlmModel = os.Getenv("VISION_LLM_MODEL")
logLevel = strings.ToLower(os.Getenv("LOG_LEVEL")) logLevel = strings.ToLower(os.Getenv("LOG_LEVEL"))
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
// Templates // Templates
titleTemplate *template.Template titleTemplate *template.Template
tagTemplate *template.Template tagTemplate *template.Template
correspondentTemplate *template.Template
ocrTemplate *template.Template ocrTemplate *template.Template
templateMutex sync.RWMutex templateMutex sync.RWMutex
@ -65,6 +67,34 @@ Content:
Please concisely select the {{.Language}} tags from the list above that best describe the document. Please concisely select the {{.Language}} tags from the list above that best describe the document.
Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable. Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable.
`
defaultCorrespondentTemplate = `I will provide you with the content of a document. Your task is to suggest a correspondent that is most relevant to the document.
Correspondents are the senders of documents that reach you. In the other direction, correspondents are the recipients of documents that you send.
In Paperless-ngx we can imagine correspondents as virtual drawers in which all documents of a person or company are stored. With just one click, we can find all the documents assigned to a specific correspondent.
Try to suggest a correspondent, either from the example list or come up with a new correspondent.
Respond only with a correspondent, without any additional information!
Be sure to choose a correspondent that is most relevant to the document.
Try to avoid any legal or financial suffixes like "GmbH" or "AG" in the correspondent name. For example use "Microsoft" instead of "Microsoft Ireland Operations Limited" or "Amazon" instead of "Amazon EU S.a.r.l.".
If you can't find a suitable correspondent, you can respond with "Unknown".
Example Correspondents:
{{.AvailableCorrespondents | join ", "}}
List of Correspondents with Blacklisted Names. Please avoid these correspondents or variations of their names:
{{.BlackList | join ", "}}
Title of the document:
{{.Title}}
The content is likely in {{.Language}}.
Document Content:
{{.Content}}
` `
defaultOcrPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format.` defaultOcrPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format.`
@ -243,7 +273,7 @@ func validateEnvVars() {
func (app *App) processAutoTagDocuments() (int, error) { func (app *App) processAutoTagDocuments() (int, error) {
ctx := context.Background() ctx := context.Background()
documents, err := app.Client.GetDocumentsByTags(ctx, []string{autoTag}) documents, err := app.Client.GetDocumentsByTags(ctx, []string{autoTag}, 1)
if err != nil { if err != nil {
return 0, fmt.Errorf("error fetching documents with autoTag: %w", err) return 0, fmt.Errorf("error fetching documents with autoTag: %w", err)
} }
@ -255,12 +285,11 @@ func (app *App) processAutoTagDocuments() (int, error) {
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag) log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoTag)
documents = documents[:1] // Process only one document at a time
suggestionRequest := GenerateSuggestionsRequest{ suggestionRequest := GenerateSuggestionsRequest{
Documents: documents, Documents: documents,
GenerateTitles: true, GenerateTitles: true,
GenerateTags: true, GenerateTags: true,
GenerateCorrespondents: true,
} }
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest) suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest)
@ -337,6 +366,21 @@ func loadTemplates() {
log.Fatalf("Failed to parse tag template: %v", err) log.Fatalf("Failed to parse tag template: %v", err)
} }
// Load correspondent template
correspondentTemplatePath := filepath.Join(promptsDir, "correspondent_prompt.tmpl")
correspondentTemplateContent, err := os.ReadFile(correspondentTemplatePath)
if err != nil {
log.Errorf("Could not read %s, using default template: %v", correspondentTemplatePath, err)
correspondentTemplateContent = []byte(defaultCorrespondentTemplate)
if err := os.WriteFile(correspondentTemplatePath, correspondentTemplateContent, os.ModePerm); err != nil {
log.Fatalf("Failed to write default correspondent template to disk: %v", err)
}
}
correspondentTemplate, err = template.New("correspondent").Funcs(sprig.FuncMap()).Parse(string(correspondentTemplateContent))
if err != nil {
log.Fatalf("Failed to parse correspondent template: %v", err)
}
// Load OCR template // Load OCR template
ocrTemplatePath := filepath.Join(promptsDir, "ocr_prompt.tmpl") ocrTemplatePath := filepath.Join(promptsDir, "ocr_prompt.tmpl")
ocrTemplateContent, err := os.ReadFile(ocrTemplatePath) ocrTemplateContent, err := os.ReadFile(ocrTemplatePath)

View file

@ -39,29 +39,29 @@ func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient {
} }
// Do method to make requests to the Paperless-NGX API // Do method to make requests to the Paperless-NGX API
func (c *PaperlessClient) Do(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) { func (client *PaperlessClient) Do(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) {
url := fmt.Sprintf("%s/%s", c.BaseURL, strings.TrimLeft(path, "/")) url := fmt.Sprintf("%s/%s", client.BaseURL, strings.TrimLeft(path, "/"))
req, err := http.NewRequestWithContext(ctx, method, url, body) req, err := http.NewRequestWithContext(ctx, method, url, body)
if err != nil { if err != nil {
return nil, err return nil, err
} }
req.Header.Set("Authorization", fmt.Sprintf("Token %s", c.APIToken)) req.Header.Set("Authorization", fmt.Sprintf("Token %s", client.APIToken))
// Set Content-Type if body is present // Set Content-Type if body is present
if body != nil { if body != nil {
req.Header.Set("Content-Type", "application/json") req.Header.Set("Content-Type", "application/json")
} }
return c.HTTPClient.Do(req) return client.HTTPClient.Do(req)
} }
// GetAllTags retrieves all tags from the Paperless-NGX API // GetAllTags retrieves all tags from the Paperless-NGX API
func (c *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int, error) { func (client *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int, error) {
tagIDMapping := make(map[string]int) tagIDMapping := make(map[string]int)
path := "api/tags/" path := "api/tags/"
for path != "" { for path != "" {
resp, err := c.Do(ctx, "GET", path, nil) resp, err := client.Do(ctx, "GET", path, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -92,8 +92,8 @@ func (c *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int, error
// Extract relative path from the Next URL // Extract relative path from the Next URL
if tagsResponse.Next != "" { if tagsResponse.Next != "" {
nextURL := tagsResponse.Next nextURL := tagsResponse.Next
if strings.HasPrefix(nextURL, c.BaseURL) { if strings.HasPrefix(nextURL, client.BaseURL) {
nextURL = strings.TrimPrefix(nextURL, c.BaseURL+"/") nextURL = strings.TrimPrefix(nextURL, client.BaseURL+"/")
} }
path = nextURL path = nextURL
} else { } else {
@ -105,15 +105,15 @@ func (c *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int, error
} }
// GetDocumentsByTags retrieves documents that match the specified tags // GetDocumentsByTags retrieves documents that match the specified tags
func (c *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []string) ([]Document, error) { func (client *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []string, pageSize int) ([]Document, error) {
tagQueries := make([]string, len(tags)) tagQueries := make([]string, len(tags))
for i, tag := range tags { for i, tag := range tags {
tagQueries[i] = fmt.Sprintf("tag:%s", tag) tagQueries[i] = fmt.Sprintf("tag:%s", tag)
} }
searchQuery := strings.Join(tagQueries, " ") searchQuery := strings.Join(tagQueries, " ")
path := fmt.Sprintf("api/documents/?query=%s", urlEncode(searchQuery)) path := fmt.Sprintf("api/documents/?query=%s&page_size=%d", urlEncode(searchQuery), pageSize)
resp, err := c.Do(ctx, "GET", path, nil) resp, err := client.Do(ctx, "GET", path, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -130,7 +130,7 @@ func (c *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []string)
return nil, err return nil, err
} }
allTags, err := c.GetAllTags(ctx) allTags, err := client.GetAllTags(ctx)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -159,9 +159,9 @@ func (c *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []string)
} }
// DownloadPDF downloads the PDF file of the specified document // DownloadPDF downloads the PDF file of the specified document
func (c *PaperlessClient) DownloadPDF(ctx context.Context, document Document) ([]byte, error) { func (client *PaperlessClient) DownloadPDF(ctx context.Context, document Document) ([]byte, error) {
path := fmt.Sprintf("api/documents/%d/download/", document.ID) path := fmt.Sprintf("api/documents/%d/download/", document.ID)
resp, err := c.Do(ctx, "GET", path, nil) resp, err := client.Do(ctx, "GET", path, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -175,9 +175,9 @@ func (c *PaperlessClient) DownloadPDF(ctx context.Context, document Document) ([
return io.ReadAll(resp.Body) return io.ReadAll(resp.Body)
} }
func (c *PaperlessClient) GetDocument(ctx context.Context, documentID int) (Document, error) { func (client *PaperlessClient) GetDocument(ctx context.Context, documentID int) (Document, error) {
path := fmt.Sprintf("api/documents/%d/", documentID) path := fmt.Sprintf("api/documents/%d/", documentID)
resp, err := c.Do(ctx, "GET", path, nil) resp, err := client.Do(ctx, "GET", path, nil)
if err != nil { if err != nil {
return Document{}, err return Document{}, err
} }
@ -194,7 +194,7 @@ func (c *PaperlessClient) GetDocument(ctx context.Context, documentID int) (Docu
return Document{}, err return Document{}, err
} }
allTags, err := c.GetAllTags(ctx) allTags, err := client.GetAllTags(ctx)
if err != nil { if err != nil {
return Document{}, err return Document{}, err
} }
@ -218,14 +218,32 @@ func (c *PaperlessClient) GetDocument(ctx context.Context, documentID int) (Docu
} }
// UpdateDocuments updates the specified documents with suggested changes // UpdateDocuments updates the specified documents with suggested changes
func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion) error { func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion) error {
// Fetch all available tags // Fetch all available tags
availableTags, err := c.GetAllTags(ctx) availableTags, err := client.GetAllTags(ctx)
if err != nil { if err != nil {
log.Errorf("Error fetching available tags: %v", err) log.Errorf("Error fetching available tags: %v", err)
return err return err
} }
documentsContainSuggestedCorrespondent := false
for _, document := range documents {
if document.SuggestedCorrespondent != "" {
documentsContainSuggestedCorrespondent = true
break
}
}
availableCorrespondents := make(map[string]int)
if documentsContainSuggestedCorrespondent {
availableCorrespondents, err = client.GetAllCorrespondents(ctx)
if err != nil {
log.Errorf("Error fetching available correspondents: %v",
err)
return err
}
}
for _, document := range documents { for _, document := range documents {
documentID := document.ID documentID := document.ID
@ -248,12 +266,27 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum
} }
newTags = append(newTags, tagID) newTags = append(newTags, tagID)
} else { } else {
log.Warnf("Tag '%s' does not exist in paperless-ngx, skipping.", tagName) log.Errorf("Suggested tag '%s' does not exist in paperless-ngx, skipping.", tagName)
} }
} }
updatedFields["tags"] = newTags updatedFields["tags"] = newTags
// Map suggested correspondent names to IDs
if document.SuggestedCorrespondent != "" {
if correspondentID, exists := availableCorrespondents[document.SuggestedCorrespondent]; exists {
updatedFields["correspondent"] = correspondentID
} else {
newCorrespondent := instantiateCorrespondent(document.SuggestedCorrespondent)
newCorrespondentID, err := client.CreateCorrespondent(context.Background(), newCorrespondent)
if err != nil {
log.Errorf("Error creating correspondent with name %s: %v\n", document.SuggestedCorrespondent, err)
return err
}
log.Infof("Created correspondent with name %s and ID %d\n", document.SuggestedCorrespondent, newCorrespondentID)
updatedFields["correspondent"] = newCorrespondentID
}
}
suggestedTitle := document.SuggestedTitle suggestedTitle := document.SuggestedTitle
if len(suggestedTitle) > 128 { if len(suggestedTitle) > 128 {
suggestedTitle = suggestedTitle[:128] suggestedTitle = suggestedTitle[:128]
@ -279,7 +312,7 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum
// Send the update request using the generic Do method // Send the update request using the generic Do method
path := fmt.Sprintf("api/documents/%d/", documentID) path := fmt.Sprintf("api/documents/%d/", documentID)
resp, err := c.Do(ctx, "PATCH", path, bytes.NewBuffer(jsonData)) resp, err := client.Do(ctx, "PATCH", path, bytes.NewBuffer(jsonData))
if err != nil { if err != nil {
log.Errorf("Error updating document %d: %v", documentID, err) log.Errorf("Error updating document %d: %v", documentID, err)
return err return err
@ -299,9 +332,9 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum
} }
// DownloadDocumentAsImages downloads the PDF file of the specified document and converts it to images // DownloadDocumentAsImages downloads the PDF file of the specified document and converts it to images
func (c *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, documentId int) ([]string, error) { func (client *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, documentId int) ([]string, error) {
// Create a directory named after the document ID // Create a directory named after the document ID
docDir := filepath.Join(c.GetCacheFolder(), fmt.Sprintf("/document-%d", documentId)) docDir := filepath.Join(client.GetCacheFolder(), fmt.Sprintf("/document-%d", documentId))
if _, err := os.Stat(docDir); os.IsNotExist(err) { if _, err := os.Stat(docDir); os.IsNotExist(err) {
err = os.MkdirAll(docDir, 0755) err = os.MkdirAll(docDir, 0755)
if err != nil { if err != nil {
@ -326,7 +359,7 @@ func (c *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, document
// Proceed with downloading and converting the document to images // Proceed with downloading and converting the document to images
path := fmt.Sprintf("api/documents/%d/download/", documentId) path := fmt.Sprintf("api/documents/%d/download/", documentId)
resp, err := c.Do(ctx, "GET", path, nil) resp, err := client.Do(ctx, "GET", path, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -418,14 +451,97 @@ func (c *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, document
} }
// GetCacheFolder returns the cache folder for the PaperlessClient // GetCacheFolder returns the cache folder for the PaperlessClient
func (c *PaperlessClient) GetCacheFolder() string { func (client *PaperlessClient) GetCacheFolder() string {
if c.CacheFolder == "" { if client.CacheFolder == "" {
c.CacheFolder = filepath.Join(os.TempDir(), "paperless-gpt") client.CacheFolder = filepath.Join(os.TempDir(), "paperless-gpt")
} }
return c.CacheFolder return client.CacheFolder
} }
// urlEncode encodes a string for safe URL usage // urlEncode encodes a string for safe URL usage
func urlEncode(s string) string { func urlEncode(s string) string {
return strings.ReplaceAll(s, " ", "+") return strings.ReplaceAll(s, " ", "+")
} }
// instantiateCorrespondent creates a new Correspondent object with default values
func instantiateCorrespondent(name string) Correspondent {
return Correspondent{
Name: name,
MatchingAlgorithm: 0,
Match: "",
IsInsensitive: true,
Owner: nil,
}
}
// CreateCorrespondent creates a new correspondent in Paperless-NGX
func (client *PaperlessClient) CreateCorrespondent(ctx context.Context, correspondent Correspondent) (int, error) {
url := "api/correspondents/"
// Marshal the correspondent data to JSON
jsonData, err := json.Marshal(correspondent)
if err != nil {
return 0, err
}
// Send the POST request
resp, err := client.Do(ctx, "POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return 0, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusCreated {
bodyBytes, _ := io.ReadAll(resp.Body)
return 0, fmt.Errorf("error creating correspondent: %d, %s", resp.StatusCode, string(bodyBytes))
}
// Decode the response body to get the ID of the created correspondent
var createdCorrespondent struct {
ID int `json:"id"`
}
err = json.NewDecoder(resp.Body).Decode(&createdCorrespondent)
if err != nil {
return 0, err
}
return createdCorrespondent.ID, nil
}
// CorrespondentResponse represents the response structure for correspondents
type CorrespondentResponse struct {
Results []struct {
ID int `json:"id"`
Name string `json:"name"`
} `json:"results"`
}
// GetAllCorrespondents retrieves all correspondents from the Paperless-NGX API
func (client *PaperlessClient) GetAllCorrespondents(ctx context.Context) (map[string]int, error) {
correspondentIDMapping := make(map[string]int)
path := "api/correspondents/?page_size=9999"
resp, err := client.Do(ctx, "GET", path, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("error fetching correspondents: %d, %s", resp.StatusCode, string(bodyBytes))
}
var correspondentsResponse CorrespondentResponse
err = json.NewDecoder(resp.Body).Decode(&correspondentsResponse)
if err != nil {
return nil, err
}
for _, correspondent := range correspondentsResponse.Results {
correspondentIDMapping[correspondent.Name] = correspondent.ID
}
return correspondentIDMapping, nil
}

View file

@ -203,7 +203,7 @@ func TestGetDocumentsByTags(t *testing.T) {
// Set mock responses // Set mock responses
env.setMockResponse("/api/documents/", func(w http.ResponseWriter, r *http.Request) { env.setMockResponse("/api/documents/", func(w http.ResponseWriter, r *http.Request) {
// Verify query parameters // Verify query parameters
expectedQuery := "query=tag:tag1+tag:tag2" expectedQuery := "query=tag:tag1+tag:tag2&page_size=25"
assert.Equal(t, expectedQuery, r.URL.RawQuery) assert.Equal(t, expectedQuery, r.URL.RawQuery)
w.WriteHeader(http.StatusOK) w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(documentsResponse) json.NewEncoder(w).Encode(documentsResponse)
@ -216,7 +216,7 @@ func TestGetDocumentsByTags(t *testing.T) {
ctx := context.Background() ctx := context.Background()
tags := []string{"tag1", "tag2"} tags := []string{"tag1", "tag2"}
documents, err := env.client.GetDocumentsByTags(ctx, tags) documents, err := env.client.GetDocumentsByTags(ctx, tags, 25)
require.NoError(t, err) require.NoError(t, err)
expectedDocuments := []Document{ expectedDocuments := []Document{

View file

@ -70,6 +70,7 @@ type GenerateSuggestionsRequest struct {
Documents []Document `json:"documents"` Documents []Document `json:"documents"`
GenerateTitles bool `json:"generate_titles,omitempty"` GenerateTitles bool `json:"generate_titles,omitempty"`
GenerateTags bool `json:"generate_tags,omitempty"` GenerateTags bool `json:"generate_tags,omitempty"`
GenerateCorrespondents bool `json:"generate_correspondents,omitempty"`
} }
// DocumentSuggestion is the response payload for /generate-suggestions endpoint and the request payload for /update-documents endpoint (as an array) // DocumentSuggestion is the response payload for /generate-suggestions endpoint and the request payload for /update-documents endpoint (as an array)
@ -79,4 +80,23 @@ type DocumentSuggestion struct {
SuggestedTitle string `json:"suggested_title,omitempty"` SuggestedTitle string `json:"suggested_title,omitempty"`
SuggestedTags []string `json:"suggested_tags,omitempty"` SuggestedTags []string `json:"suggested_tags,omitempty"`
SuggestedContent string `json:"suggested_content,omitempty"` SuggestedContent string `json:"suggested_content,omitempty"`
SuggestedCorrespondent string `json:"suggested_correspondent,omitempty"`
}
type Correspondent struct {
Name string `json:"name"`
MatchingAlgorithm int `json:"matching_algorithm"`
Match string `json:"match"`
IsInsensitive bool `json:"is_insensitive"`
Owner *int `json:"owner"`
SetPermissions struct {
View struct {
Users []int `json:"users"`
Groups []int `json:"groups"`
} `json:"view"`
Change struct {
Users []int `json:"users"`
Groups []int `json:"groups"`
} `json:"change"`
} `json:"set_permissions"`
} }