mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-12 12:58:02 -05:00
243 lines
6.4 KiB
Go
243 lines
6.4 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"strings"
|
|
)
|
|
|
|
// PaperlessClient struct to interact with the Paperless-NGX API
|
|
type PaperlessClient struct {
|
|
BaseURL string
|
|
APIToken string
|
|
HTTPClient *http.Client
|
|
}
|
|
|
|
// NewPaperlessClient creates a new instance of PaperlessClient with a default HTTP client
|
|
func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient {
|
|
return &PaperlessClient{
|
|
BaseURL: strings.TrimRight(baseURL, "/"),
|
|
APIToken: apiToken,
|
|
HTTPClient: &http.Client{},
|
|
}
|
|
}
|
|
|
|
// Do method to make requests to the Paperless-NGX API
|
|
func (c *PaperlessClient) Do(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) {
|
|
url := fmt.Sprintf("%s/%s", c.BaseURL, strings.TrimLeft(path, "/"))
|
|
req, err := http.NewRequestWithContext(ctx, method, url, body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Authorization", fmt.Sprintf("Token %s", c.APIToken))
|
|
|
|
// Set Content-Type if body is present
|
|
if body != nil {
|
|
req.Header.Set("Content-Type", "application/json")
|
|
}
|
|
|
|
return c.HTTPClient.Do(req)
|
|
}
|
|
|
|
// GetAllTags retrieves all tags from the Paperless-NGX API
|
|
func (c *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int, error) {
|
|
tagIDMapping := make(map[string]int)
|
|
path := "api/tags/"
|
|
|
|
for path != "" {
|
|
resp, err := c.Do(ctx, "GET", path, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
|
return nil, fmt.Errorf("error fetching tags: %d, %s", resp.StatusCode, string(bodyBytes))
|
|
}
|
|
|
|
var tagsResponse struct {
|
|
Results []struct {
|
|
ID int `json:"id"`
|
|
Name string `json:"name"`
|
|
} `json:"results"`
|
|
Next string `json:"next"`
|
|
}
|
|
|
|
err = json.NewDecoder(resp.Body).Decode(&tagsResponse)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for _, tag := range tagsResponse.Results {
|
|
tagIDMapping[tag.Name] = tag.ID
|
|
}
|
|
|
|
// Extract relative path from the Next URL
|
|
if tagsResponse.Next != "" {
|
|
nextURL := tagsResponse.Next
|
|
if strings.HasPrefix(nextURL, c.BaseURL) {
|
|
nextURL = strings.TrimPrefix(nextURL, c.BaseURL+"/")
|
|
}
|
|
path = nextURL
|
|
} else {
|
|
path = ""
|
|
}
|
|
}
|
|
|
|
return tagIDMapping, nil
|
|
}
|
|
|
|
// GetDocumentsByTags retrieves documents that match the specified tags
|
|
func (c *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []string) ([]Document, error) {
|
|
tagQueries := make([]string, len(tags))
|
|
for i, tag := range tags {
|
|
tagQueries[i] = fmt.Sprintf("tag:%s", tag)
|
|
}
|
|
searchQuery := strings.Join(tagQueries, " ")
|
|
path := fmt.Sprintf("api/documents/?query=%s", urlEncode(searchQuery))
|
|
|
|
resp, err := c.Do(ctx, "GET", path, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
|
return nil, fmt.Errorf("error searching documents: %d, %s", resp.StatusCode, string(bodyBytes))
|
|
}
|
|
|
|
var documentsResponse GetDocumentsApiResponse
|
|
err = json.NewDecoder(resp.Body).Decode(&documentsResponse)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
allTags, err := c.GetAllTags(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
documents := make([]Document, 0, len(documentsResponse.Results))
|
|
for _, result := range documentsResponse.Results {
|
|
tagNames := make([]string, len(result.Tags))
|
|
for i, resultTagID := range result.Tags {
|
|
for tagName, tagID := range allTags {
|
|
if resultTagID == tagID {
|
|
tagNames[i] = tagName
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
documents = append(documents, Document{
|
|
ID: result.ID,
|
|
Title: result.Title,
|
|
Content: result.Content,
|
|
Tags: tagNames,
|
|
})
|
|
}
|
|
|
|
return documents, nil
|
|
}
|
|
|
|
// DownloadPDF downloads the PDF file of the specified document
|
|
func (c *PaperlessClient) DownloadPDF(ctx context.Context, document Document) ([]byte, error) {
|
|
path := fmt.Sprintf("api/documents/%d/download/", document.ID)
|
|
resp, err := c.Do(ctx, "GET", path, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
|
return nil, fmt.Errorf("error downloading document %d: %d, %s", document.ID, resp.StatusCode, string(bodyBytes))
|
|
}
|
|
|
|
return io.ReadAll(resp.Body)
|
|
}
|
|
|
|
// UpdateDocuments updates the specified documents with suggested changes
|
|
func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion) error {
|
|
// Fetch all available tags
|
|
availableTags, err := c.GetAllTags(ctx)
|
|
if err != nil {
|
|
log.Printf("Error fetching available tags: %v", err)
|
|
return err
|
|
}
|
|
|
|
for _, document := range documents {
|
|
documentID := document.ID
|
|
|
|
updatedFields := make(map[string]interface{})
|
|
newTags := []int{}
|
|
|
|
tags := document.SuggestedTags
|
|
if len(tags) == 0 {
|
|
tags = document.OriginalDocument.Tags
|
|
}
|
|
|
|
// Map suggested tag names to IDs
|
|
for _, tagName := range tags {
|
|
if tagID, exists := availableTags[tagName]; exists {
|
|
// Skip the tag that we are filtering
|
|
if tagName == manualTag {
|
|
continue
|
|
}
|
|
newTags = append(newTags, tagID)
|
|
} else {
|
|
log.Printf("Tag '%s' does not exist in paperless-ngx, skipping.", tagName)
|
|
}
|
|
}
|
|
|
|
updatedFields["tags"] = newTags
|
|
|
|
suggestedTitle := document.SuggestedTitle
|
|
if len(suggestedTitle) > 128 {
|
|
suggestedTitle = suggestedTitle[:128]
|
|
}
|
|
if suggestedTitle != "" {
|
|
updatedFields["title"] = suggestedTitle
|
|
} else {
|
|
log.Printf("No valid title found for document %d, skipping.", documentID)
|
|
}
|
|
|
|
// Marshal updated fields to JSON
|
|
jsonData, err := json.Marshal(updatedFields)
|
|
if err != nil {
|
|
log.Printf("Error marshalling JSON for document %d: %v", documentID, err)
|
|
return err
|
|
}
|
|
|
|
// Send the update request using the generic Do method
|
|
path := fmt.Sprintf("api/documents/%d/", documentID)
|
|
resp, err := c.Do(ctx, "PATCH", path, bytes.NewBuffer(jsonData))
|
|
if err != nil {
|
|
log.Printf("Error updating document %d: %v", documentID, err)
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
|
log.Printf("Error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
|
|
return fmt.Errorf("error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
|
|
}
|
|
|
|
log.Printf("Document %d updated successfully.", documentID)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// urlEncode encodes a string for safe URL usage
|
|
func urlEncode(s string) string {
|
|
return strings.ReplaceAll(s, " ", "+")
|
|
}
|