From 16e478687ee1babcc1eab5e9861047c368da4cc9 Mon Sep 17 00:00:00 2001 From: mkrinke Date: Wed, 12 Feb 2025 15:47:29 +0100 Subject: [PATCH] feat: Add TLS configuration support for HTTPS connections in Paperless client (#230) * feat: Add TLS configuration support for HTTPS connections in Paperless client * fix: Correct insecure skip verify environment variable logic --- main.go | 40 ++++++++++---------- paperless.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 111 insertions(+), 30 deletions(-) diff --git a/main.go b/main.go index eb43498..a300412 100644 --- a/main.go +++ b/main.go @@ -32,26 +32,26 @@ var ( log = logrus.New() // Environment Variables - correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",") - - paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL") - paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN") - openaiAPIKey = os.Getenv("OPENAI_API_KEY") - manualTag = os.Getenv("MANUAL_TAG") - autoTag = os.Getenv("AUTO_TAG") - manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet - autoOcrTag = os.Getenv("AUTO_OCR_TAG") - llmProvider = os.Getenv("LLM_PROVIDER") - llmModel = os.Getenv("LLM_MODEL") - visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER") - visionLlmModel = os.Getenv("VISION_LLM_MODEL") - logLevel = strings.ToLower(os.Getenv("LOG_LEVEL")) - listenInterface = os.Getenv("LISTEN_INTERFACE") - autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE") - autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS") - autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS") - limitOcrPages int // Will be read from OCR_LIMIT_PAGES - tokenLimit = 0 // Will be read from TOKEN_LIMIT + paperlessInsecureSkipVerify = os.Getenv("PAPERLESS_INSECURE_SKIP_VERIFY") == "true" + correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",") + paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL") + paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN") + openaiAPIKey = os.Getenv("OPENAI_API_KEY") + manualTag = os.Getenv("MANUAL_TAG") + autoTag = os.Getenv("AUTO_TAG") + manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet + autoOcrTag = os.Getenv("AUTO_OCR_TAG") + llmProvider = os.Getenv("LLM_PROVIDER") + llmModel = os.Getenv("LLM_MODEL") + visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER") + visionLlmModel = os.Getenv("VISION_LLM_MODEL") + logLevel = strings.ToLower(os.Getenv("LOG_LEVEL")) + listenInterface = os.Getenv("LISTEN_INTERFACE") + autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE") + autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS") + autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS") + limitOcrPages int // Will be read from OCR_LIMIT_PAGES + tokenLimit = 0 // Will be read from TOKEN_LIMIT // Templates titleTemplate *template.Template diff --git a/paperless.go b/paperless.go index 58ad5cb..667ba73 100644 --- a/paperless.go +++ b/paperless.go @@ -3,11 +3,13 @@ package main import ( "bytes" "context" + "crypto/tls" "encoding/json" "fmt" "image/jpeg" "io" "net/http" + "net/url" "os" "path/filepath" "slices" @@ -16,6 +18,7 @@ import ( "sync" "github.com/gen2brain/go-fitz" + "github.com/sirupsen/logrus" "golang.org/x/sync/errgroup" "gorm.io/gorm" ) @@ -58,10 +61,18 @@ func hasSameTags(original, suggested []string) bool { func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient { cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR") + // Create a custom HTTP transport with TLS configuration + tr := &http.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: paperlessInsecureSkipVerify, + }, + } + httpClient := &http.Client{Transport: tr} + return &PaperlessClient{ BaseURL: strings.TrimRight(baseURL, "/"), APIToken: apiToken, - HTTPClient: &http.Client{}, + HTTPClient: httpClient, CacheFolder: cacheFolder, } } @@ -80,7 +91,53 @@ func (client *PaperlessClient) Do(ctx context.Context, method, path string, body req.Header.Set("Content-Type", "application/json") } - return client.HTTPClient.Do(req) + log.WithFields(logrus.Fields{ + "method": method, + "url": url, + "headers": req.Header, + }).Debug("Making HTTP request") + + resp, err := client.HTTPClient.Do(req) + if err != nil { + log.WithError(err).WithFields(logrus.Fields{ + "url": url, + "method": method, + "error": err, + }).Error("HTTP request failed") + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + + // Check if response is HTML instead of JSON for API endpoints + if strings.HasPrefix(path, "api/") { + contentType := resp.Header.Get("Content-Type") + if strings.Contains(contentType, "text/html") { + bodyBytes, _ := io.ReadAll(resp.Body) + resp.Body.Close() + + // Create a new response with the same body for the caller + resp = &http.Response{ + Status: resp.Status, + StatusCode: resp.StatusCode, + Header: resp.Header, + Body: io.NopCloser(bytes.NewBuffer(bodyBytes)), + } + + log.WithFields(logrus.Fields{ + "url": url, + "method": method, + "content-type": contentType, + "status-code": resp.StatusCode, + "response": string(bodyBytes), + "base-url": client.BaseURL, + "request-path": path, + "full-headers": resp.Header, + }).Error("Received HTML response for API request") + + return nil, fmt.Errorf("received HTML response instead of JSON (status: %d). This often indicates an SSL/TLS issue or invalid authentication. Check your PAPERLESS_URL, PAPERLESS_TOKEN and PAPERLESS_INSECURE_SKIP_VERIFY settings. Full response: %s", resp.StatusCode, string(bodyBytes)) + } + } + + return resp, nil } // GetAllTags retrieves all tags from the Paperless-NGX API @@ -120,10 +177,19 @@ func (client *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int, // Extract relative path from the Next URL if tagsResponse.Next != "" { nextURL := tagsResponse.Next - if strings.HasPrefix(nextURL, client.BaseURL) { - nextURL = strings.TrimPrefix(nextURL, client.BaseURL+"/") + if strings.HasPrefix(nextURL, "http") { + // Extract just the path portion from the full URL + if parsedURL, err := url.Parse(nextURL); err == nil { + path = strings.TrimPrefix(parsedURL.Path, "/") + if parsedURL.RawQuery != "" { + path += "?" + parsedURL.RawQuery + } + } else { + return nil, fmt.Errorf("failed to parse next URL: %v", err) + } + } else { + path = strings.TrimPrefix(nextURL, "/") } - path = nextURL } else { path = "" } @@ -143,19 +209,34 @@ func (client *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []st resp, err := client.Do(ctx, "GET", path, nil) if err != nil { - return nil, err + return nil, fmt.Errorf("HTTP request failed in GetDocumentsByTags: %w", err) } defer resp.Body.Close() + // Read the response body + bodyBytes, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + if resp.StatusCode != http.StatusOK { - bodyBytes, _ := io.ReadAll(resp.Body) - return nil, fmt.Errorf("error searching documents: %d, %s", resp.StatusCode, string(bodyBytes)) + log.WithFields(logrus.Fields{ + "status_code": resp.StatusCode, + "path": path, + "response": string(bodyBytes), + "headers": resp.Header, + }).Error("Error response from server in GetDocumentsByTags") + return nil, fmt.Errorf("error searching documents: status=%d, body=%s", resp.StatusCode, string(bodyBytes)) } var documentsResponse GetDocumentsApiResponse - err = json.NewDecoder(resp.Body).Decode(&documentsResponse) + err = json.Unmarshal(bodyBytes, &documentsResponse) if err != nil { - return nil, err + log.WithFields(logrus.Fields{ + "response_body": string(bodyBytes), + "error": err, + }).Error("Failed to parse JSON response in GetDocumentsByTags") + return nil, fmt.Errorf("failed to parse JSON response: %w", err) } allTags, err := client.GetAllTags(ctx)