Merge remote-tracking branch 'origin/main' into correspondents

This commit is contained in:
set 2025-01-07 14:51:39 +01:00
commit 98fb0e2e49
24 changed files with 1430 additions and 163 deletions

View file

@ -15,10 +15,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v4
uses: actions/setup-go@v5
with:
go-version: 1.22
@ -60,7 +60,7 @@ jobs:
needs: test
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

1
.gitignore vendored
View file

@ -3,3 +3,4 @@
prompts/
tests/tmp
tmp/
db/

View file

@ -18,11 +18,14 @@ COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy the rest of the application code
COPY . .
# Pre-compile go-sqlite3 to avoid doing this every time
RUN CGO_ENABLED=1 go build -tags musl -o /dev/null github.com/mattn/go-sqlite3
# Build the Go binary with the musl build tag
RUN go build -tags musl -o paperless-gpt .
# Now copy the actual source files
COPY *.go .
# Build the binary using caching for both go modules and build cache
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
# Stage 2: Build Vite frontend
FROM node:20-alpine AS frontend

View file

@ -2,7 +2,7 @@
[![License](https://img.shields.io/github/license/icereed/paperless-gpt)](LICENSE)
[![Docker Pulls](https://img.shields.io/docker/pulls/icereed/paperless-gpt)](https://hub.docker.com/r/icereed/paperless-gpt)
[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](code_of_conduct.md)
[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](CODE_OF_CONDUCT.md)
![Screenshot](./paperless-gpt-screenshot.png)
@ -59,7 +59,7 @@
The easiest way to get started is by using Docker Compose. Below is an example `docker-compose.yml` file to set up paperless-gpt alongside paperless-ngx.
```yaml
version: '3.7'
version: "3.7"
services:
paperless-ngx:
image: ghcr.io/paperless-ngx/paperless-ngx:latest
@ -68,20 +68,24 @@ services:
paperless-gpt:
image: icereed/paperless-gpt:latest
environment:
PAPERLESS_BASE_URL: 'http://paperless-ngx:8000'
PAPERLESS_API_TOKEN: 'your_paperless_api_token'
LLM_PROVIDER: 'openai' # or 'ollama'
LLM_MODEL: 'gpt-4o' # or 'llama2'
OPENAI_API_KEY: 'your_openai_api_key' # Required if using OpenAI
LLM_LANGUAGE: 'English' # Optional, default is 'English'
OLLAMA_HOST: 'http://host.docker.internal:11434' # If using Ollama
VISION_LLM_PROVIDER: 'ollama' # Optional, for OCR
VISION_LLM_MODEL: 'minicpm-v' # Optional, for OCR
LOG_LEVEL: 'info' # Optional or 'debug', 'warn', 'error'
PAPERLESS_BASE_URL: "http://paperless-ngx:8000"
PAPERLESS_API_TOKEN: "your_paperless_api_token"
PAPERLESS_PUBLIC_URL: "http://paperless.mydomain.com" # Optional, your public link to access Paperless
MANUAL_TAG: "paperless-gpt" # Optional, default is 'paperless-gpt'
AUTO_TAG: "paperless-gpt-auto" # Optional, default is 'paperless-gpt-auto'
LLM_PROVIDER: "openai" # or 'ollama'
LLM_MODEL: "gpt-4o" # or 'llama2'
OPENAI_API_KEY: "your_openai_api_key" # Required if using OpenAI
LLM_LANGUAGE: "English" # Optional, default is 'English'
OLLAMA_HOST: "http://host.docker.internal:11434" # If using Ollama
VISION_LLM_PROVIDER: "ollama" # Optional (for OCR) - ollama or openai
VISION_LLM_MODEL: "minicpm-v" # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default is 'paperless-gpt-ocr-auto'
LOG_LEVEL: "info" # Optional or 'debug', 'warn', 'error'
volumes:
- ./prompts:/app/prompts # Mount the prompts directory
ports:
- '8080:8080'
- "8080:8080"
depends_on:
- paperless-ngx
```
@ -134,9 +138,12 @@ If you prefer to run the application manually:
### Environment Variables
| Variable | Description | Required |
|----------------------------|----------------------------------------------------------------------------------------------------------------------------------------|----------|
| -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | -------- |
| `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes |
| `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes |
| `PAPERLESS_PUBLIC_URL` | The public URL for your Paperless instance, if it is different to your `PAPERLESS_BASE_URL` - say if you are running in Docker Compose | No |
| `MANUAL_TAG` | The tag to use for manually processing documents. Default is `paperless-gpt`. | No |
| `AUTO_TAG` | The tag to use for automatically processing documents. Default is `paperless-gpt-auto`. | No |
| `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes |
| `LLM_MODEL` | The model name to use (e.g., `gpt-4o`, `gpt-3.5-turbo`, `llama2`). | Yes |
| `OPENAI_API_KEY` | Your OpenAI API key. Required if using OpenAI as the LLM provider. | Cond. |
@ -144,7 +151,12 @@ If you prefer to run the application manually:
| `OLLAMA_HOST` | The URL of the Ollama server (e.g., `http://host.docker.internal:11434`). Useful if using Ollama. Default is `http://127.0.0.1:11434`. | No |
| `VISION_LLM_PROVIDER` | The vision LLM provider to use for OCR (`openai` or `ollama`). | No |
| `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No |
| `AUTO_OCR_TAG` | The tag to use for automatically processing documents with OCR. Default is `paperless-gpt-ocr-auto`. | No |
| `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No |
| `LISTEN_INTERFACE` | The interface paperless-gpt listens to. Default is `:8080` | No |
| `WEBUI_PATH` | The path to load static content from. Default is `./web-app/dist` | No |
| `AUTO_GENERATE_TITLE` | Enable/disable title generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No |
| `AUTO_GENERATE_TAGS` | Enable/disable tag generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No |
| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`. | No |
**Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.

View file

@ -1,6 +1,7 @@
package main
import (
"encoding/json"
"fmt"
"net/http"
"os"
@ -138,7 +139,7 @@ func (app *App) updateDocumentsHandler(c *gin.Context) {
return
}
err := app.Client.UpdateDocuments(ctx, documents)
err := app.Client.UpdateDocuments(ctx, documents, app.Database, false)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error updating documents: %v", err)})
log.Errorf("Error updating documents: %v", err)
@ -237,8 +238,94 @@ func (app *App) getDocumentHandler() gin.HandlerFunc {
document, err := app.Client.GetDocument(c, parsedID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
log.Errorf("Error fetching document: %v", err)
return
}
c.JSON(http.StatusOK, document)
}
}
// Section for local-db actions
func (app *App) getModificationHistoryHandler(c *gin.Context) {
modifications, err := GetAllModifications(app.Database)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve modification history"})
log.Errorf("Failed to retrieve modification history: %v", err)
return
}
c.JSON(http.StatusOK, modifications)
}
func (app *App) undoModificationHandler(c *gin.Context) {
id := c.Param("id")
modID, err := strconv.Atoi(id)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid modification ID"})
log.Errorf("Invalid modification ID: %v", err)
return
}
modification, err := GetModification(app.Database, uint(modID))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve modification"})
log.Errorf("Failed to retrieve modification: %v", err)
return
}
if modification.Undone {
c.JSON(http.StatusBadRequest, gin.H{"error": "Modification has already been undone"})
log.Errorf("Modification has already been undone: %v", id)
return
}
// Ok, we're actually doing the update:
ctx := c.Request.Context()
// Make the document suggestions for UpdateDocuments
var suggestion DocumentSuggestion
suggestion.ID = int(modification.DocumentID)
suggestion.OriginalDocument, err = app.Client.GetDocument(ctx, int(modification.DocumentID))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve original document"})
log.Errorf("Failed to retrieve original document: %v", err)
return
}
switch modification.ModField {
case "title":
suggestion.SuggestedTitle = modification.PreviousValue
case "tags":
var tags []string
err := json.Unmarshal([]byte(modification.PreviousValue), &tags)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to unmarshal previous tags"})
log.Errorf("Failed to unmarshal previous tags: %v", err)
return
}
suggestion.SuggestedTags = tags
case "content":
suggestion.SuggestedContent = modification.PreviousValue
default:
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid modification field"})
log.Errorf("Invalid modification field: %v", modification.ModField)
return
}
// Update the document
err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{suggestion}, app.Database, true)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update document"})
log.Errorf("Failed to update document: %v", err)
return
}
// Successful, so set modification as undone
err = SetModificationUndone(app.Database, modification)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to mark modification as undone"})
return
}
// Else all was ok
c.Status(http.StatusOK)
}

View file

@ -3,6 +3,7 @@ package main
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"strings"
"sync"
@ -121,14 +122,26 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro
prompt := promptBuffer.String()
// If not OpenAI then use binary part for image, otherwise, use the ImageURL part with encoding from https://platform.openai.com/docs/guides/vision
var parts []llms.ContentPart
if strings.ToLower(visionLlmProvider) != "openai" {
parts = []llms.ContentPart{
llms.BinaryPart("image/jpeg", jpegBytes),
llms.TextPart(prompt),
}
} else {
base64Image := base64.StdEncoding.EncodeToString(jpegBytes)
parts = []llms.ContentPart{
llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)),
llms.TextPart(prompt),
}
}
// Convert the image to text
completion, err := app.VisionLLM.GenerateContent(ctx, []llms.MessageContent{
{
Parts: []llms.ContentPart{
llms.BinaryPart("image/jpeg", jpegBytes),
llms.TextPart(prompt),
},
Role: llms.ChatMessageTypeHuman,
Parts: parts,
Role: llms.ChatMessageTypeHuman,
},
})
if err != nil {

30
go.mod
View file

@ -5,19 +5,22 @@ go 1.22.0
toolchain go1.22.2
require (
github.com/Masterminds/sprig/v3 v3.2.3
github.com/Masterminds/sprig/v3 v3.3.0
github.com/gen2brain/go-fitz v1.24.14
github.com/gin-gonic/gin v1.10.0
github.com/google/uuid v1.6.0
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.9.0
github.com/stretchr/testify v1.10.0
github.com/tmc/langchaingo v0.1.12
golang.org/x/sync v0.7.0
golang.org/x/sync v0.10.0
gorm.io/driver/sqlite v1.5.6
gorm.io/gorm v1.25.12
)
require (
dario.cat/mergo v1.0.1 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/Masterminds/semver/v3 v3.3.0 // indirect
github.com/bytedance/sonic v1.11.6 // indirect
github.com/bytedance/sonic/loader v0.1.1 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
@ -31,29 +34,32 @@ require (
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.20.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
github.com/huandu/xstrings v1.5.0 // indirect
github.com/imdario/mergo v0.3.13 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/jupiterrider/ffi v0.2.0 // indirect
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mitchellh/copystructure v1.0.0 // indirect
github.com/mitchellh/reflectwalk v1.0.0 // indirect
github.com/mattn/go-sqlite3 v1.14.24 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/pkoukk/tiktoken-go v0.1.6 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/shopspring/decimal v1.2.0 // indirect
github.com/spf13/cast v1.3.1 // indirect
github.com/shopspring/decimal v1.4.0 // indirect
github.com/spf13/cast v1.7.0 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.23.0 // indirect
golang.org/x/crypto v0.26.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
golang.org/x/sys v0.23.0 // indirect
golang.org/x/text v0.20.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

42
go.sum
View file

@ -1,9 +1,15 @@
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
@ -45,9 +51,15 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk=
github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg=
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jupiterrider/ffi v0.2.0 h1:tMM70PexgYNmV+WyaYhJgCvQAvtTCs3wXeILPutihnA=
@ -60,10 +72,16 @@ github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@ -77,10 +95,14 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@ -95,6 +117,8 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tmc/langchaingo v0.1.12 h1:yXwSu54f3b1IKw0jJ5/DWu+qFVH1NBblwC0xddBzGJE=
github.com/tmc/langchaingo v0.1.12/go.mod h1:cd62xD6h+ouk8k/QQFhOsjRYBSA1JJ5UVKXSIgm7Ni4=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
@ -110,6 +134,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
@ -119,8 +145,10 @@ golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@ -132,6 +160,8 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
@ -139,8 +169,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
@ -157,6 +187,10 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gorm.io/driver/sqlite v1.5.6 h1:fO/X46qn5NUEEOZtnjJRWRzZMe8nqJiQ9E+0hi+hKQE=
gorm.io/driver/sqlite v1.5.6/go.mod h1:U+J8craQU6Fzkcvu8oLeAQmi50TkwPEhHDEjQZXDah4=
gorm.io/gorm v1.25.12 h1:I0u8i2hWQItBq1WfE0o2+WuL9+8L21K9e2HHSTE/0f8=
gorm.io/gorm v1.25.12/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=

33
jobs.go
View file

@ -2,10 +2,8 @@ package main
import (
"context"
"fmt"
"os"
"sort"
"strings"
"sync"
"time"
@ -125,38 +123,13 @@ func processJob(app *App, job *Job) {
ctx := context.Background()
// Download images of the document
imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, job.DocumentID)
fullOcrText, err := app.ProcessDocumentOCR(ctx, job.DocumentID)
if err != nil {
logger.Infof("Error downloading document images for job %s: %v", job.ID, err)
jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error downloading document images: %v", err))
logger.Errorf("Error processing document OCR for job %s: %v", job.ID, err)
jobStore.updateJobStatus(job.ID, "failed", err.Error())
return
}
var ocrTexts []string
for i, imagePath := range imagePaths {
imageContent, err := os.ReadFile(imagePath)
if err != nil {
logger.Errorf("Error reading image file for job %s: %v", job.ID, err)
jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error reading image file: %v", err))
return
}
ocrText, err := app.doOCRViaLLM(ctx, imageContent)
if err != nil {
logger.Errorf("Error performing OCR for job %s: %v", job.ID, err)
jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error performing OCR: %v", err))
return
}
ocrTexts = append(ocrTexts, ocrText)
jobStore.updatePagesDone(job.ID, i+1) // Update PagesDone after each page is processed
}
// Combine the OCR texts
fullOcrText := strings.Join(ocrTexts, "\n\n")
// Update job status and result
jobStore.updateJobStatus(job.ID, "completed", fullOcrText)
logger.Infof("Job completed: %s", job.ID)
}

79
local_db.go Normal file
View file

@ -0,0 +1,79 @@
package main
import (
"os"
"path/filepath"
"time"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
)
// ModificationHistory represents the schema of the modification_history table
type ModificationHistory struct {
ID uint `gorm:"primaryKey"` // Auto-incrementing primary key
DocumentID uint `gorm:"not null"` // Foreign key to documents table (if applicable)
DateChanged string `gorm:"not null"` // Date and time of modification
ModField string `gorm:"size:255;not null"` // Field being modified
PreviousValue string `gorm:"size:1048576"` // Previous value of the field
NewValue string `gorm:"size:1048576"` // New value of the field
Undone bool `gorm:"not null;default:false"` // Whether the modification has been undone
UndoneDate string `gorm:"default:null"` // Date and time of undoing the modification
}
// InitializeDB initializes the SQLite database and migrates the schema
func InitializeDB() *gorm.DB {
// Ensure db directory exists
dbDir := "db"
if err := os.MkdirAll(dbDir, os.ModePerm); err != nil {
log.Fatalf("Failed to create db directory: %v", err)
}
dbPath := filepath.Join(dbDir, "modification_history.db")
// Connect to SQLite database
db, err := gorm.Open(sqlite.Open(dbPath), &gorm.Config{})
if err != nil {
log.Fatalf("Failed to connect to database: %v", err)
}
// Migrate the schema (create the table if it doesn't exist)
err = db.AutoMigrate(&ModificationHistory{})
if err != nil {
log.Fatalf("Failed to migrate database schema: %v", err)
}
return db
}
// InsertModification inserts a new modification record into the database
func InsertModification(db *gorm.DB, record *ModificationHistory) error {
log.Debugf("Passed modification record: %+v", record)
record.DateChanged = time.Now().Format(time.RFC3339) // Set the DateChanged field to the current time
log.Debugf("Inserting modification record: %+v", record)
result := db.Create(&record) // GORM's Create method
log.Debugf("Insertion result: %+v", result)
return result.Error
}
// GetModification retrieves a modification record by its ID
func GetModification(db *gorm.DB, id uint) (*ModificationHistory, error) {
var record ModificationHistory
result := db.First(&record, id) // GORM's First method retrieves the first record matching the ID
return &record, result.Error
}
// GetAllModifications retrieves all modification records from the database
func GetAllModifications(db *gorm.DB) ([]ModificationHistory, error) {
var records []ModificationHistory
result := db.Order("date_changed DESC").Find(&records) // GORM's Find method retrieves all records
return records, result.Error
}
// UndoModification marks a modification record as undone and sets the undo date
func SetModificationUndone(db *gorm.DB, record *ModificationHistory) error {
record.Undone = true
record.UndoneDate = time.Now().Format(time.RFC3339)
result := db.Save(&record) // GORM's Save method
return result.Error
}

163
main.go
View file

@ -17,6 +17,7 @@ import (
"github.com/tmc/langchaingo/llms"
"github.com/tmc/langchaingo/llms/ollama"
"github.com/tmc/langchaingo/llms/openai"
"gorm.io/gorm"
)
// Global Variables and Constants
@ -26,17 +27,24 @@ var (
log = logrus.New()
// Environment Variables
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
manualTag = "paperless-gpt"
autoTag = "paperless-gpt-auto"
llmProvider = os.Getenv("LLM_PROVIDER")
llmModel = os.Getenv("LLM_MODEL")
visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER")
visionLlmModel = os.Getenv("VISION_LLM_MODEL")
logLevel = strings.ToLower(os.Getenv("LOG_LEVEL"))
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
manualTag = os.Getenv("MANUAL_TAG")
autoTag = os.Getenv("AUTO_TAG")
manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet
autoOcrTag = os.Getenv("AUTO_OCR_TAG")
llmProvider = os.Getenv("LLM_PROVIDER")
llmModel = os.Getenv("LLM_MODEL")
visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER")
visionLlmModel = os.Getenv("VISION_LLM_MODEL")
logLevel = strings.ToLower(os.Getenv("LOG_LEVEL"))
listenInterface = os.Getenv("LISTEN_INTERFACE")
webuiPath = os.Getenv("WEBUI_PATH")
autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE")
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS")
// Templates
titleTemplate *template.Template
@ -68,7 +76,6 @@ Content:
Please concisely select the {{.Language}} tags from the list above that best describe the document.
Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable.
`
defaultCorrespondentTemplate = `I will provide you with the content of a document. Your task is to suggest a correspondent that is most relevant to the document.
Correspondents are the senders of documents that reach you. In the other direction, correspondents are the recipients of documents that you send.
@ -96,20 +103,20 @@ The content is likely in {{.Language}}.
Document Content:
{{.Content}}
`
defaultOcrPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format.`
defaultOcrPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format but without a code block.`
)
// App struct to hold dependencies
type App struct {
Client *PaperlessClient
Database *gorm.DB
LLM llms.Model
VisionLLM llms.Model
}
func main() {
// Validate Environment Variables
validateEnvVars()
validateOrDefaultEnvVars()
// Initialize logrus logger
initLogger()
@ -117,6 +124,9 @@ func main() {
// Initialize PaperlessClient
client := NewPaperlessClient(paperlessBaseURL, paperlessAPIToken)
// Initialize Database
database := InitializeDB()
// Load Templates
loadTemplates()
@ -135,6 +145,7 @@ func main() {
// Initialize App with dependencies
app := &App{
Client: client,
Database: database,
LLM: llm,
VisionLLM: visionLlm,
}
@ -147,7 +158,23 @@ func main() {
backoffDuration := minBackoffDuration
for {
processedCount, err := app.processAutoTagDocuments()
processedCount, err := func() (int, error) {
count := 0
if isOcrEnabled() {
ocrCount, err := app.processAutoOcrTagDocuments()
if err != nil {
return 0, fmt.Errorf("error in processAutoOcrTagDocuments: %w", err)
}
count += ocrCount
}
autoCount, err := app.processAutoTagDocuments()
if err != nil {
return 0, fmt.Errorf("error in processAutoTagDocuments: %w", err)
}
count += autoCount
return count, nil
}()
if err != nil {
log.Errorf("Error in processAutoTagDocuments: %v", err)
time.Sleep(backoffDuration)
@ -195,23 +222,43 @@ func main() {
enabled := isOcrEnabled()
c.JSON(http.StatusOK, gin.H{"enabled": enabled})
})
// Local db actions
api.GET("/modifications", app.getModificationHistoryHandler)
api.POST("/undo-modification/:id", app.undoModificationHandler)
// Get public Paperless environment (as set in environment variables)
api.GET("/paperless-url", func(c *gin.Context) {
baseUrl := os.Getenv("PAPERLESS_PUBLIC_URL")
if baseUrl == "" {
baseUrl = os.Getenv("PAPERLESS_BASE_URL")
}
baseUrl = strings.TrimRight(baseUrl, "/")
c.JSON(http.StatusOK, gin.H{"url": baseUrl})
})
}
if webuiPath == "" {
webuiPath = "./web-app/dist"
}
// Serve static files for the frontend under /assets
router.StaticFS("/assets", gin.Dir("./web-app/dist/assets", true))
router.StaticFile("/vite.svg", "./web-app/dist/vite.svg")
router.StaticFS("/assets", gin.Dir(webuiPath+"/assets", true))
router.StaticFile("/vite.svg", webuiPath+"/vite.svg")
// Catch-all route for serving the frontend
router.NoRoute(func(c *gin.Context) {
c.File("./web-app/dist/index.html")
c.File(webuiPath + "/index.html")
})
// Start OCR worker pool
numWorkers := 1 // Number of workers to start
startWorkerPool(app, numWorkers)
log.Infoln("Server started on port :8080")
if err := router.Run(":8080"); err != nil {
if listenInterface == "" {
listenInterface = ":8080"
}
log.Infoln("Server started on interface", listenInterface)
if err := router.Run(listenInterface); err != nil {
log.Fatalf("Failed to run server: %v", err)
}
}
@ -242,8 +289,32 @@ func isOcrEnabled() bool {
return visionLlmModel != "" && visionLlmProvider != ""
}
// validateEnvVars ensures all necessary environment variables are set
func validateEnvVars() {
// validateOrDefaultEnvVars ensures all necessary environment variables are set
func validateOrDefaultEnvVars() {
if manualTag == "" {
manualTag = "paperless-gpt"
}
fmt.Printf("Using %s as manual tag\n", manualTag)
if autoTag == "" {
autoTag = "paperless-gpt-auto"
}
fmt.Printf("Using %s as auto tag\n", autoTag)
if manualOcrTag == "" {
manualOcrTag = "paperless-gpt-ocr"
}
if isOcrEnabled() {
fmt.Printf("Using %s as manual OCR tag\n", manualOcrTag)
}
if autoOcrTag == "" {
autoOcrTag = "paperless-gpt-ocr-auto"
}
if isOcrEnabled() {
fmt.Printf("Using %s as auto OCR tag\n", autoOcrTag)
}
if paperlessBaseURL == "" {
log.Fatal("Please set the PAPERLESS_BASE_URL environment variable.")
}
@ -287,9 +358,9 @@ func (app *App) processAutoTagDocuments() (int, error) {
suggestionRequest := GenerateSuggestionsRequest{
Documents: documents,
GenerateTitles: true,
GenerateTags: true,
GenerateCorrespondents: true,
GenerateTitles: strings.ToLower(autoGenerateTitle) != "false",
GenerateTags: strings.ToLower(autoGenerateTags) != "false",
GenerateCorrespondents: strings.ToLower(autoGenerateCorrespondents) != "false",
}
suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest)
@ -297,7 +368,7 @@ func (app *App) processAutoTagDocuments() (int, error) {
return 0, fmt.Errorf("error generating suggestions: %w", err)
}
err = app.Client.UpdateDocuments(ctx, suggestions)
err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false)
if err != nil {
return 0, fmt.Errorf("error updating documents: %w", err)
}
@ -305,6 +376,44 @@ func (app *App) processAutoTagDocuments() (int, error) {
return len(documents), nil
}
// processAutoOcrTagDocuments handles the background auto-tagging of OCR documents
func (app *App) processAutoOcrTagDocuments() (int, error) {
ctx := context.Background()
documents, err := app.Client.GetDocumentsByTags(ctx, []string{autoOcrTag})
if err != nil {
return 0, fmt.Errorf("error fetching documents with autoOcrTag: %w", err)
}
if len(documents) == 0 {
log.Debugf("No documents with tag %s found", autoOcrTag)
return 0, nil // No documents to process
}
log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag)
documents = documents[:1] // Process only one document at a time
ocrContent, err := app.ProcessDocumentOCR(ctx, documents[0].ID)
if err != nil {
return 0, fmt.Errorf("error processing document OCR: %w", err)
}
log.Debugf("OCR content for document %d: %s", documents[0].ID, ocrContent)
err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{
{
ID: documents[0].ID,
OriginalDocument: documents[0],
SuggestedContent: ocrContent,
},
}, app.Database, false)
if err != nil {
return 0, fmt.Errorf("error updating documents: %w", err)
}
return 1, nil // Processed one document
}
// removeTagFromList removes a specific tag from a list of tags
func removeTagFromList(tags []string, tagToRemove string) []string {
filteredTags := []string{}

39
ocr.go Normal file
View file

@ -0,0 +1,39 @@
package main
import (
"context"
"fmt"
"os"
"strings"
)
// ProcessDocumentOCR processes a document through OCR and returns the combined text
func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string, error) {
imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, documentID)
defer func() {
for _, imagePath := range imagePaths {
os.Remove(imagePath)
}
}()
if err != nil {
return "", fmt.Errorf("error downloading document images: %w", err)
}
var ocrTexts []string
for _, imagePath := range imagePaths {
imageContent, err := os.ReadFile(imagePath)
if err != nil {
return "", fmt.Errorf("error reading image file: %w", err)
}
ocrText, err := app.doOCRViaLLM(ctx, imageContent)
if err != nil {
return "", fmt.Errorf("error performing OCR: %w", err)
}
log.Debugf("OCR text: %s", ocrText)
ocrTexts = append(ocrTexts, ocrText)
}
return strings.Join(ocrTexts, "\n\n"), nil
}

View file

@ -11,11 +11,13 @@ import (
"os"
"path/filepath"
"slices"
"sort"
"strings"
"sync"
"github.com/gen2brain/go-fitz"
"golang.org/x/sync/errgroup"
"gorm.io/gorm"
)
// PaperlessClient struct to interact with the Paperless-NGX API
@ -26,6 +28,32 @@ type PaperlessClient struct {
CacheFolder string
}
func hasSameTags(original, suggested []string) bool {
if len(original) != len(suggested) {
return false
}
// Create copies to avoid modifying original slices
orig := make([]string, len(original))
sugg := make([]string, len(suggested))
copy(orig, original)
copy(sugg, suggested)
// Sort both slices
sort.Strings(orig)
sort.Strings(sugg)
// Compare elements
for i := range orig {
if orig[i] != sugg[i] {
return false
}
}
return true
}
// NewPaperlessClient creates a new instance of PaperlessClient with a default HTTP client
func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient {
cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR")
@ -108,10 +136,10 @@ func (client *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int,
func (client *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []string, pageSize int) ([]Document, error) {
tagQueries := make([]string, len(tags))
for i, tag := range tags {
tagQueries[i] = fmt.Sprintf("tag:%s", tag)
tagQueries[i] = fmt.Sprintf("tags__name__iexact=%s", tag)
}
searchQuery := strings.Join(tagQueries, " ")
path := fmt.Sprintf("api/documents/?query=%s&page_size=%d", urlEncode(searchQuery), pageSize)
searchQuery := strings.Join(tagQueries, "&")
path := fmt.Sprintf("api/documents/?%s&page_size=%d", urlEncode(searchQuery), pageSize)
resp, err := client.Do(ctx, "GET", path, nil)
if err != nil {
@ -218,7 +246,7 @@ func (client *PaperlessClient) GetDocument(ctx context.Context, documentID int)
}
// UpdateDocuments updates the specified documents with suggested changes
func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion) error {
func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion, db *gorm.DB, isUndo bool) error {
// Fetch all available tags
availableTags, err := client.GetAllTags(ctx)
if err != nil {
@ -247,21 +275,44 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
for _, document := range documents {
documentID := document.ID
// Original fields will store any updated fields to store records for
originalFields := make(map[string]interface{})
updatedFields := make(map[string]interface{})
newTags := []int{}
tags := document.SuggestedTags
if len(tags) == 0 {
tags = document.OriginalDocument.Tags
originalTags := document.OriginalDocument.Tags
originalTagsJSON, err := json.Marshal(originalTags)
if err != nil {
log.Errorf("Error marshalling JSON for document %d: %v", documentID, err)
return err
}
// remove autoTag to prevent infinite loop (even if it is in the original tags)
tags = removeTagFromList(tags, autoTag)
originalTags = removeTagFromList(originalTags, autoTag)
originalTags = removeTagFromList(originalTags, autoOcrTag)
if len(tags) == 0 {
tags = originalTags
} else {
// We have suggested tags to change
originalFields["tags"] = originalTags
// remove autoTag to prevent infinite loop - this is required in case of undo
tags = removeTagFromList(tags, autoTag)
}
updatedTagsJSON, err := json.Marshal(tags)
if err != nil {
log.Errorf("Error marshalling JSON for document %d: %v", documentID, err)
return err
}
// Map suggested tag names to IDs
for _, tagName := range tags {
if tagID, exists := availableTags[tagName]; exists {
// Skip the tag that we are filtering
if tagName == manualTag {
if !isUndo && tagName == manualTag {
continue
}
newTags = append(newTags, tagID)
@ -292,6 +343,7 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
suggestedTitle = suggestedTitle[:128]
}
if suggestedTitle != "" {
originalFields["title"] = document.OriginalDocument.Title
updatedFields["title"] = suggestedTitle
} else {
log.Warnf("No valid title found for document %d, skipping.", documentID)
@ -300,8 +352,11 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
// Suggested Content
suggestedContent := document.SuggestedContent
if suggestedContent != "" {
originalFields["content"] = document.OriginalDocument.Content
updatedFields["content"] = suggestedContent
}
log.Debugf("Document %d: Original fields: %v", documentID, originalFields)
log.Debugf("Document %d: Updated fields: %v Tags: %v", documentID, updatedFields, tags)
// Marshal updated fields to JSON
jsonData, err := json.Marshal(updatedFields)
@ -323,6 +378,43 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
bodyBytes, _ := io.ReadAll(resp.Body)
log.Errorf("Error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
return fmt.Errorf("error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
} else {
for field, value := range originalFields {
log.Printf("Document %d: Updated %s from %v to %v", documentID, field, originalFields[field], value)
// Insert the modification record into the database
var modificationRecord ModificationHistory
if field == "tags" {
// Make sure we only store changes where tags are changed - not the same before and after
// And we have to use tags, not updatedFields as they are IDs not fields
if !hasSameTags(document.OriginalDocument.Tags, tags) {
modificationRecord = ModificationHistory{
DocumentID: uint(documentID),
ModField: field,
PreviousValue: string(originalTagsJSON),
NewValue: string(updatedTagsJSON),
}
}
} else {
// Only store mod if field actually changed
if originalFields[field] != updatedFields[field] {
modificationRecord = ModificationHistory{
DocumentID: uint(documentID),
ModField: field,
PreviousValue: fmt.Sprintf("%v", originalFields[field]),
NewValue: fmt.Sprintf("%v", updatedFields[field]),
}
}
}
// Only store if we have a valid modification record
if (modificationRecord != ModificationHistory{}) {
err = InsertModification(db, &modificationRecord)
}
if err != nil {
log.Errorf("Error inserting modification record for document %d: %v", documentID, err)
return err
}
}
}
log.Printf("Document %d updated successfully.", documentID)

View file

@ -13,6 +13,8 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
)
// Helper struct to hold common test data and methods
@ -22,6 +24,7 @@ type testEnv struct {
client *PaperlessClient
requestCount int
mockResponses map[string]http.HandlerFunc
db *gorm.DB
}
// newTestEnv initializes a new test environment
@ -31,6 +34,11 @@ func newTestEnv(t *testing.T) *testEnv {
mockResponses: make(map[string]http.HandlerFunc),
}
// Initialize test database
db, err := InitializeTestDB()
require.NoError(t, err)
env.db = db
// Create a mock server with a handler that dispatches based on URL path
env.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
env.requestCount++
@ -50,6 +58,22 @@ func newTestEnv(t *testing.T) *testEnv {
return env
}
func InitializeTestDB() (*gorm.DB, error) {
// Use in-memory SQLite for testing
db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared"), &gorm.Config{})
if err != nil {
return nil, err
}
// Migrate schema
err = db.AutoMigrate(&ModificationHistory{})
if err != nil {
return nil, err
}
return db, nil
}
// teardown closes the mock server
func (env *testEnv) teardown() {
env.server.Close()
@ -203,7 +227,7 @@ func TestGetDocumentsByTags(t *testing.T) {
// Set mock responses
env.setMockResponse("/api/documents/", func(w http.ResponseWriter, r *http.Request) {
// Verify query parameters
expectedQuery := "query=tag:tag1+tag:tag2&page_size=25"
expectedQuery := "tags__name__iexact=tag1&tags__name__iexact=tag2&page_size=25"
assert.Equal(t, expectedQuery, r.URL.RawQuery)
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(documentsResponse)
@ -327,7 +351,7 @@ func TestUpdateDocuments(t *testing.T) {
})
ctx := context.Background()
err := env.client.UpdateDocuments(ctx, documents)
err := env.client.UpdateDocuments(ctx, documents, env.db, false)
require.NoError(t, err)
}

6
renovate.json Normal file
View file

@ -0,0 +1,6 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
"config:recommended"
]
}

View file

@ -10,17 +10,23 @@
"dependencies": {
"@headlessui/react": "^2.1.8",
"@heroicons/react": "^2.1.5",
"@mdi/js": "^7.4.47",
"@mdi/react": "^1.6.1",
"axios": "^1.7.7",
"classnames": "^2.5.1",
"date-fns": "^4.1.0",
"prop-types": "^15.8.1",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-icons": "^5.3.0",
"react-router-dom": "^6.27.0",
"react-tag-autocomplete": "^7.3.0"
"react-tag-autocomplete": "^7.3.0",
"react-tooltip": "^5.28.0",
"winston": "^3.17.0"
},
"devDependencies": {
"@eslint/js": "^9.9.0",
"@types/node": "^22.10.1",
"@types/react": "^18.3.3",
"@types/react-dom": "^18.3.0",
"@vitejs/plugin-react-swc": "^3.5.0",
@ -48,6 +54,24 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/@colors/colors": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.6.0.tgz",
"integrity": "sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA==",
"engines": {
"node": ">=0.1.90"
}
},
"node_modules/@dabh/diagnostics": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/@dabh/diagnostics/-/diagnostics-2.0.3.tgz",
"integrity": "sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==",
"dependencies": {
"colorspace": "1.1.x",
"enabled": "2.0.x",
"kuler": "^2.0.0"
}
},
"node_modules/@esbuild/aix-ppc64": {
"version": "0.21.5",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz",
@ -723,6 +747,19 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@mdi/js": {
"version": "7.4.47",
"resolved": "https://registry.npmjs.org/@mdi/js/-/js-7.4.47.tgz",
"integrity": "sha512-KPnNOtm5i2pMabqZxpUz7iQf+mfrYZyKCZ8QNz85czgEt7cuHcGorWfdzUMWYA0SD+a6Hn4FmJ+YhzzzjkTZrQ=="
},
"node_modules/@mdi/react": {
"version": "1.6.1",
"resolved": "https://registry.npmjs.org/@mdi/react/-/react-1.6.1.tgz",
"integrity": "sha512-4qZeDcluDFGFTWkHs86VOlHkm6gnKaMql13/gpIcUQ8kzxHgpj31NuCkD8abECVfbULJ3shc7Yt4HJ6Wu6SN4w==",
"dependencies": {
"prop-types": "^15.7.2"
}
},
"node_modules/@nodelib/fs.scandir": {
"version": "2.1.5",
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@ -1313,6 +1350,16 @@
"integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==",
"dev": true
},
"node_modules/@types/node": {
"version": "22.10.5",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.5.tgz",
"integrity": "sha512-F8Q+SeGimwOo86fiovQh8qiXfFEh2/ocYv7tU5pJ3EXMSSxk1Joj5wefpFK2fHTf/N6HKGSxIDBT9f3gCxXPkQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~6.20.0"
}
},
"node_modules/@types/prop-types": {
"version": "15.7.13",
"resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.13.tgz",
@ -1338,6 +1385,11 @@
"@types/react": "*"
}
},
"node_modules/@types/triple-beam": {
"version": "1.3.5",
"resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.5.tgz",
"integrity": "sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw=="
},
"node_modules/@typescript-eslint/eslint-plugin": {
"version": "8.6.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.6.0.tgz",
@ -1557,15 +1609,16 @@
}
},
"node_modules/@vitejs/plugin-react-swc": {
"version": "3.7.0",
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react-swc/-/plugin-react-swc-3.7.0.tgz",
"integrity": "sha512-yrknSb3Dci6svCd/qhHqhFPDSw0QtjumcqdKMoNNzmOl5lMXTTiqzjWtG4Qask2HdvvzaNgSunbQGet8/GrKdA==",
"version": "3.7.2",
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react-swc/-/plugin-react-swc-3.7.2.tgz",
"integrity": "sha512-y0byko2b2tSVVf5Gpng1eEhX1OvPC7x8yns1Fx8jDzlJp4LS6CMkCPfLw47cjyoMrshQDoQw4qcgjsU9VvlCew==",
"dev": true,
"license": "MIT",
"dependencies": {
"@swc/core": "^1.5.7"
"@swc/core": "^1.7.26"
},
"peerDependencies": {
"vite": "^4 || ^5"
"vite": "^4 || ^5 || ^6"
}
},
"node_modules/acorn": {
@ -1660,6 +1713,11 @@
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
"dev": true
},
"node_modules/async": {
"version": "3.2.6",
"resolved": "https://registry.npmjs.org/async/-/async-3.2.6.tgz",
"integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA=="
},
"node_modules/asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
@ -1887,6 +1945,15 @@
"node": ">=6"
}
},
"node_modules/color": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz",
"integrity": "sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==",
"dependencies": {
"color-convert": "^1.9.3",
"color-string": "^1.6.0"
}
},
"node_modules/color-convert": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@ -1902,8 +1969,38 @@
"node_modules/color-name": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
"dev": true
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
},
"node_modules/color-string": {
"version": "1.9.1",
"resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz",
"integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==",
"dependencies": {
"color-name": "^1.0.0",
"simple-swizzle": "^0.2.2"
}
},
"node_modules/color/node_modules/color-convert": {
"version": "1.9.3",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
"dependencies": {
"color-name": "1.1.3"
}
},
"node_modules/color/node_modules/color-name": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw=="
},
"node_modules/colorspace": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/colorspace/-/colorspace-1.1.4.tgz",
"integrity": "sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==",
"dependencies": {
"color": "^3.1.3",
"text-hex": "1.0.x"
}
},
"node_modules/combined-stream": {
"version": "1.0.8",
@ -1963,6 +2060,15 @@
"integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
"dev": true
},
"node_modules/date-fns": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/date-fns/-/date-fns-4.1.0.tgz",
"integrity": "sha512-Ukq0owbQXxa/U3EGtsdVBkR1w7KOQ5gIBqdH2hkvknzZPYvBxb/aa6E8L7tmjFtkwZBu3UXBbjIgPo/Ez4xaNg==",
"funding": {
"type": "github",
"url": "https://github.com/sponsors/kossnocorp"
}
},
"node_modules/debug": {
"version": "4.3.7",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
@ -2024,6 +2130,11 @@
"integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==",
"dev": true
},
"node_modules/enabled": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/enabled/-/enabled-2.0.0.tgz",
"integrity": "sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ=="
},
"node_modules/esbuild": {
"version": "0.21.5",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz",
@ -2155,12 +2266,13 @@
}
},
"node_modules/eslint-plugin-react-refresh": {
"version": "0.4.12",
"resolved": "https://registry.npmjs.org/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.12.tgz",
"integrity": "sha512-9neVjoGv20FwYtCP6CB1dzR1vr57ZDNOXst21wd2xJ/cTlM2xLq0GWVlSNTdMn/4BtP6cHYBMCSp1wFBJ9jBsg==",
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.16.tgz",
"integrity": "sha512-slterMlxAhov/DZO8NScf6mEeMBBXodFUolijDvrtTxyezyLoTQaa73FyYus/VbTdftd8wBgBxPMRk3poleXNQ==",
"dev": true,
"license": "MIT",
"peerDependencies": {
"eslint": ">=7"
"eslint": ">=8.40"
}
},
"node_modules/eslint-scope": {
@ -2305,6 +2417,11 @@
"reusify": "^1.0.4"
}
},
"node_modules/fecha": {
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz",
"integrity": "sha512-OP2IUU6HeYKJi3i0z4A19kHMQoLVs4Hc+DPqqxI2h/DPZHTm/vjsfC6P0b4jCMy14XizLBqvndQ+UilD7707Jw=="
},
"node_modules/file-entry-cache": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz",
@ -2364,6 +2481,11 @@
"integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==",
"dev": true
},
"node_modules/fn.name": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz",
"integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw=="
},
"node_modules/follow-redirects": {
"version": "1.15.9",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz",
@ -2505,10 +2627,11 @@
}
},
"node_modules/globals": {
"version": "15.9.0",
"resolved": "https://registry.npmjs.org/globals/-/globals-15.9.0.tgz",
"integrity": "sha512-SmSKyLLKFbSr6rptvP8izbyxJL4ILwqO9Jg23UA0sDlGlu58V59D1//I3vlc0KJphVdUR7vMjHIplYnzBxorQA==",
"version": "15.14.0",
"resolved": "https://registry.npmjs.org/globals/-/globals-15.14.0.tgz",
"integrity": "sha512-OkToC372DtlQeje9/zHIo5CT8lRP/FUgEOKBEhU4e0abL7J7CD24fD9ohiLN5hagG/kWCYj4K5oaxxtj2Z0Dig==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=18"
},
@ -2577,6 +2700,16 @@
"node": ">=0.8.19"
}
},
"node_modules/inherits": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
},
"node_modules/is-arrayish": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
"integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ=="
},
"node_modules/is-binary-path": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz",
@ -2652,6 +2785,17 @@
"node": ">=8"
}
},
"node_modules/is-stream": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
"integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
"engines": {
"node": ">=8"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/isexe": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
@ -2726,6 +2870,11 @@
"json-buffer": "3.0.1"
}
},
"node_modules/kuler": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz",
"integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A=="
},
"node_modules/levn": {
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
@ -2775,6 +2924,22 @@
"integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==",
"dev": true
},
"node_modules/logform": {
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/logform/-/logform-2.7.0.tgz",
"integrity": "sha512-TFYA4jnP7PVbmlBIfhlSe+WKxs9dklXMTEGcBCIvLhE/Tn3H6Gk1norupVW7m5Cnd4bLcr08AytbyV/xj7f/kQ==",
"dependencies": {
"@colors/colors": "1.6.0",
"@types/triple-beam": "^1.3.2",
"fecha": "^4.2.0",
"ms": "^2.1.1",
"safe-stable-stringify": "^2.3.1",
"triple-beam": "^1.3.0"
},
"engines": {
"node": ">= 12.0.0"
}
},
"node_modules/loose-envify": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
@ -2857,8 +3022,7 @@
"node_modules/ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
"dev": true
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
},
"node_modules/mz": {
"version": "2.7.0",
@ -2936,6 +3100,14 @@
"node": ">= 6"
}
},
"node_modules/one-time": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/one-time/-/one-time-1.0.0.tgz",
"integrity": "sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==",
"dependencies": {
"fn.name": "1.x.x"
}
},
"node_modules/optionator": {
"version": "0.9.4",
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@ -3042,10 +3214,11 @@
}
},
"node_modules/picocolors": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.0.tgz",
"integrity": "sha512-TQ92mBOW0l3LeMeyLV6mzy/kWr8lkd/hp3mTg7wYK7zJhuBStmGMBG0BdeDZS/dZx1IukaX6Bk11zcln25o1Aw==",
"dev": true
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
"integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
"dev": true,
"license": "ISC"
},
"node_modules/picomatch": {
"version": "2.3.1",
@ -3078,9 +3251,9 @@
}
},
"node_modules/postcss": {
"version": "8.4.47",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.47.tgz",
"integrity": "sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==",
"version": "8.4.49",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.49.tgz",
"integrity": "sha512-OCVPnIObs4N29kxTjzLfUryOkvZEq+pf8jTF0lg8E7uETuWHA+v7j3c/xJmiqpX450191LlmZfUKkXxkTry7nA==",
"dev": true,
"funding": [
{
@ -3096,9 +3269,10 @@
"url": "https://github.com/sponsors/ai"
}
],
"license": "MIT",
"dependencies": {
"nanoid": "^3.3.7",
"picocolors": "^1.1.0",
"picocolors": "^1.1.1",
"source-map-js": "^1.2.1"
},
"engines": {
@ -3362,6 +3536,19 @@
"react": "^18.0.0"
}
},
"node_modules/react-tooltip": {
"version": "5.28.0",
"resolved": "https://registry.npmjs.org/react-tooltip/-/react-tooltip-5.28.0.tgz",
"integrity": "sha512-R5cO3JPPXk6FRbBHMO0rI9nkUG/JKfalBSQfZedZYzmqaZQgq7GLzF8vcCWx6IhUCKg0yPqJhXIzmIO5ff15xg==",
"dependencies": {
"@floating-ui/dom": "^1.6.1",
"classnames": "^2.3.0"
},
"peerDependencies": {
"react": ">=16.14.0",
"react-dom": ">=16.14.0"
}
},
"node_modules/read-cache": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz",
@ -3371,6 +3558,19 @@
"pify": "^2.3.0"
}
},
"node_modules/readable-stream": {
"version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
"dependencies": {
"inherits": "^2.0.3",
"string_decoder": "^1.1.1",
"util-deprecate": "^1.0.1"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/readdirp": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
@ -3477,6 +3677,33 @@
"queue-microtask": "^1.2.2"
}
},
"node_modules/safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/safe-stable-stringify": {
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz",
"integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==",
"engines": {
"node": ">=10"
}
},
"node_modules/scheduler": {
"version": "0.23.2",
"resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz",
@ -3530,6 +3757,14 @@
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/simple-swizzle": {
"version": "0.2.2",
"resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
"integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==",
"dependencies": {
"is-arrayish": "^0.3.1"
}
},
"node_modules/source-map-js": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
@ -3539,6 +3774,22 @@
"node": ">=0.10.0"
}
},
"node_modules/stack-trace": {
"version": "0.0.10",
"resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz",
"integrity": "sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg==",
"engines": {
"node": "*"
}
},
"node_modules/string_decoder": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
"integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
"dependencies": {
"safe-buffer": "~5.2.0"
}
},
"node_modules/string-width": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
@ -3729,6 +3980,11 @@
"node": ">=14.0.0"
}
},
"node_modules/text-hex": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz",
"integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg=="
},
"node_modules/text-table": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
@ -3768,6 +4024,14 @@
"node": ">=8.0"
}
},
"node_modules/triple-beam": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz",
"integrity": "sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==",
"engines": {
"node": ">= 14.0.0"
}
},
"node_modules/ts-api-utils": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.3.0.tgz",
@ -3804,10 +4068,11 @@
}
},
"node_modules/typescript": {
"version": "5.6.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz",
"integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==",
"version": "5.7.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz",
"integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==",
"dev": true,
"license": "Apache-2.0",
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
@ -3839,6 +4104,12 @@
}
}
},
"node_modules/undici-types": {
"version": "6.20.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz",
"integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==",
"dev": true
},
"node_modules/update-browserslist-db": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.0.tgz",
@ -3881,8 +4152,7 @@
"node_modules/util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
"dev": true
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
},
"node_modules/vite": {
"version": "5.4.7",
@ -3958,6 +4228,40 @@
"node": ">= 8"
}
},
"node_modules/winston": {
"version": "3.17.0",
"resolved": "https://registry.npmjs.org/winston/-/winston-3.17.0.tgz",
"integrity": "sha512-DLiFIXYC5fMPxaRg832S6F5mJYvePtmO5G9v9IgUFPhXm9/GkXarH/TUrBAVzhTCzAj9anE/+GjrgXp/54nOgw==",
"dependencies": {
"@colors/colors": "^1.6.0",
"@dabh/diagnostics": "^2.0.2",
"async": "^3.2.3",
"is-stream": "^2.0.0",
"logform": "^2.7.0",
"one-time": "^1.0.0",
"readable-stream": "^3.4.0",
"safe-stable-stringify": "^2.3.1",
"stack-trace": "0.0.x",
"triple-beam": "^1.3.0",
"winston-transport": "^4.9.0"
},
"engines": {
"node": ">= 12.0.0"
}
},
"node_modules/winston-transport": {
"version": "4.9.0",
"resolved": "https://registry.npmjs.org/winston-transport/-/winston-transport-4.9.0.tgz",
"integrity": "sha512-8drMJ4rkgaPo1Me4zD/3WLfI/zPdA9o2IipKODunnGDcuqbHwjsbB79ylv04LCGGzU0xQ6vTznOMpQGaLhhm6A==",
"dependencies": {
"logform": "^2.7.0",
"readable-stream": "^3.6.2",
"triple-beam": "^1.3.0"
},
"engines": {
"node": ">= 12.0.0"
}
},
"node_modules/word-wrap": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz",

View file

@ -13,17 +13,23 @@
"dependencies": {
"@headlessui/react": "^2.1.8",
"@heroicons/react": "^2.1.5",
"@mdi/js": "^7.4.47",
"@mdi/react": "^1.6.1",
"axios": "^1.7.7",
"classnames": "^2.5.1",
"date-fns": "^4.1.0",
"prop-types": "^15.8.1",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-icons": "^5.3.0",
"react-router-dom": "^6.27.0",
"react-tag-autocomplete": "^7.3.0"
"react-tag-autocomplete": "^7.3.0",
"react-tooltip": "^5.28.0",
"winston": "^3.17.0"
},
"devDependencies": {
"@eslint/js": "^9.9.0",
"@types/node": "^22.10.1",
"@types/react": "^18.3.3",
"@types/react-dom": "^18.3.0",
"@vitejs/plugin-react-swc": "^3.5.0",

View file

@ -1,16 +1,24 @@
// App.tsx or App.jsx
import React from 'react';
import { Route, BrowserRouter as Router, Routes } from 'react-router-dom';
import Sidebar from './components/Sidebar';
import DocumentProcessor from './DocumentProcessor';
import ExperimentalOCR from './ExperimentalOCR'; // New component
import History from './History';
const App: React.FC = () => {
return (
<Router>
<Routes>
<Route path="/" element={<DocumentProcessor />} />
<Route path="/experimental-ocr" element={<ExperimentalOCR />} />
</Routes>
<div style={{ display: "flex", height: "100vh" }}>
<Sidebar onSelectPage={(page) => console.log(page)} />
<div style={{ flex: 1, overflowY: "auto" }}>
<Routes>
<Route path="/" element={<DocumentProcessor />} />
<Route path="/experimental-ocr" element={<ExperimentalOCR />} />
<Route path="/history" element={<History />} />
</Routes>
</div>
</div>
</Router>
);
};

View file

@ -1,6 +1,5 @@
import axios from "axios";
import React, { useCallback, useEffect, useState } from "react";
import { Link } from "react-router-dom";
import "react-tag-autocomplete/example/src/styles.css"; // Ensure styles are loaded
import DocumentsToProcess from "./components/DocumentsToProcess";
import NoDocuments from "./components/NoDocuments";
@ -46,22 +45,17 @@ const DocumentProcessor: React.FC = () => {
const [generateTags, setGenerateTags] = useState(true);
const [error, setError] = useState<string | null>(null);
// Temporary feature flags
const [ocrEnabled, setOcrEnabled] = useState(false);
// Custom hook to fetch initial data
const fetchInitialData = useCallback(async () => {
try {
const [filterTagRes, documentsRes, tagsRes, ocrEnabledRes] = await Promise.all([
const [filterTagRes, documentsRes, tagsRes] = await Promise.all([
axios.get<{ tag: string }>("/api/filter-tag"),
axios.get<Document[]>("/api/documents"),
axios.get<Record<string, number>>("/api/tags"),
axios.get<{enabled: boolean}>("/api/experimental/ocr"),
]);
setFilterTag(filterTagRes.data.tag);
setDocuments(documentsRes.data);
setOcrEnabled(ocrEnabledRes.data.enabled);
const tags = Object.keys(tagsRes.data).map((tag) => ({
id: tag,
name: tag,
@ -199,16 +193,6 @@ const DocumentProcessor: React.FC = () => {
<div className="max-w-5xl mx-auto p-6 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200">
<header className="text-center">
<h1 className="text-4xl font-bold mb-8">Paperless GPT</h1>
{ocrEnabled && (
<div>
<Link
to="/experimental-ocr"
className="inline-block bg-blue-600 hover:bg-blue-700 text-white font-semibold py-2 px-4 rounded transition duration-200 dark:bg-blue-500 dark:hover:bg-blue-600"
>
OCR via LLMs (Experimental)
</Link>
</div>
)}
</header>
{error && (

126
web-app/src/History.tsx Normal file
View file

@ -0,0 +1,126 @@
import React, { useEffect, useState } from 'react';
import UndoCard from './components/UndoCard';
interface ModificationHistory {
ID: number;
DocumentID: number;
DateChanged: string;
ModField: string;
PreviousValue: string;
NewValue: string;
Undone: boolean;
UndoneDate: string | null;
}
const History: React.FC = () => {
const [modifications, setModifications] = useState<ModificationHistory[]>([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [paperlessUrl, setPaperlessUrl] = useState<string>('');
// Get Paperless URL
useEffect(() => {
const fetchUrl = async () => {
try {
const response = await fetch('/api/paperless-url');
if (!response.ok) {
throw new Error('Failed to fetch public URL');
}
const { url } = await response.json();
setPaperlessUrl(url);
} catch (err) {
console.error('Error fetching Paperless URL:', err);
}
};
fetchUrl();
}, []);
// Get all modifications
useEffect(() => {
fetchModifications();
}, []);
const fetchModifications = async () => {
try {
const response = await fetch('/api/modifications');
if (!response.ok) {
throw new Error('Failed to fetch modifications');
}
const data = await response.json();
setModifications(data);
} catch (err) {
setError(err instanceof Error ? err.message : 'Unknown error occurred');
} finally {
setLoading(false);
}
};
const handleUndo = async (id: number) => {
try {
const response = await fetch(`/api/undo-modification/${id}`, {
method: 'POST',
});
if (!response.ok) {
throw new Error('Failed to undo modification');
}
// Use ISO 8601 format for consistency
const now = new Date().toISOString();
setModifications(mods => mods.map(mod =>
mod.ID === id
? { ...mod, Undone: true, UndoneDate: now }
: mod
));
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to undo modification');
}
};
if (loading) {
return (
<div className="flex justify-center items-center min-h-screen">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-500" />
</div>
);
}
if (error) {
return (
<div className="text-red-500 dark:text-red-400 p-4 text-center">
Error: {error}
</div>
);
}
return (
<div className="container mx-auto px-4 py-8">
<h1 className="text-2xl font-bold text-gray-800 dark:text-gray-200">
Modification History
</h1>
<div className="mb-6 text-sm text-gray-500 dark:text-gray-400">
Note: when undoing tag changes, this will not re-add 'paperless-gpt-auto'
</div>
{modifications.length === 0 ? (
<p className="text-gray-500 dark:text-gray-400 text-center">
No modifications found
</p>
) : (
<div className="grid gap-4 md:grid-cols-1 lg:grid-cols-1">
{modifications.map((modification) => (
<UndoCard
key={modification.ID}
{...modification}
onUndo={handleUndo}
paperlessUrl={paperlessUrl}
/>
))}
</div>
)}
</div>
);
};
export default History;

View file

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 27.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
viewBox="0 0 1000 1000" style="enable-background:new 0 0 1000 1000;" xml:space="preserve">
<style type="text/css">
.st0{fill:#FFFFFF;}
</style>
<path class="st0" d="M299,891.7c-4.2-19.8-12.5-59.6-13.6-59.6c-176.7-105.7-155.8-288.7-97.3-393.4
c12.5,131.8,245.8,222.8,109.8,383.9c-1.1,2,6.2,27.2,12.5,50.2c27.2-46,68-101.4,65.8-106.7C208.9,358.2,731.9,326.9,840.6,73.7
c49.1,244.8-25.1,623.5-445.5,719.7c-2,1.1-76.3,131.8-79.5,132.9c0-2-31.4-1.1-27.2-11.5C290.7,908.4,294.8,900.1,299,891.7
L299,891.7z M293.8,793.4c53.3-61.8-9.4-167.4-47.1-201.9C310.5,701.3,306.3,765.1,293.8,793.4L293.8,793.4z"/>
</svg>

After

Width:  |  Height:  |  Size: 869 B

View file

@ -0,0 +1,75 @@
.sidebar {
width: 250px;
background-color: #2c3e50;
color: #ecf0f1;
display: flex;
flex-direction: column;
transition: width 0.3s;
}
.sidebar.collapsed {
width: 60px;
}
.sidebar-header {
display: flex;
align-items: center;
padding: 10px;
background-color: #34495e;
justify-content: space-between;
}
.sidebar-header.collapsed {
justify-content: center;
}
.logo {
height: 40px;
margin-right: 10px;
}
.menu-items {
list-style: none;
padding: 0;
margin: 0;
}
.menu-items li {
padding: 15px 20px;
cursor: pointer;
}
.menu-items li.active {
background-color: darkslategray;
padding: 15px 20px;
cursor: pointer;
}
.menu-items li:hover {
background-color: #1abc9c;
}
.menu-items li a {
text-decoration: none;
color: inherit;
font-size: 18px;
}
.toggle-btn {
background: none;
border: none;
color: white;
font-size: 24px;
cursor: pointer;
}
.sidebar.collapsed .menu-items li a {
display: none;
}
.sidebar.collapsed .logo {
height: 40px;
margin: auto;
}

View file

@ -0,0 +1,81 @@
import axios from "axios";
import React, { useCallback, useEffect, useState } from 'react';
import "./Sidebar.css";
import { Link, useLocation } from 'react-router-dom';
import { Icon } from '@mdi/react';
import { mdiHomeOutline, mdiTextBoxSearchOutline, mdiHistory } from '@mdi/js';
import logo from "../assets/logo.svg";
interface SidebarProps {
onSelectPage: (page: string) => void;
}
const Sidebar: React.FC<SidebarProps> = ({ onSelectPage }) => {
const [collapsed, setCollapsed] = useState(false);
const location = useLocation();
const toggleSidebar = () => {
setCollapsed(!collapsed);
};
const handlePageClick = (page: string) => {
onSelectPage(page);
};
// Get whether experimental OCR is enabled
const [ocrEnabled, setOcrEnabled] = useState(false);
const fetchOcrEnabled = useCallback(async () => {
try {
const res = await axios.get<{ enabled: boolean }>("/api/experimental/ocr");
setOcrEnabled(res.data.enabled);
} catch (err) {
console.error(err);
}
}, []);
useEffect(() => {
fetchOcrEnabled();
}, [fetchOcrEnabled]);
const menuItems = [
{ name: 'home', path: '/', icon: mdiHomeOutline, title: 'Home' },
{ name: 'history', path: '/history', icon: mdiHistory, title: 'History' },
];
// If OCR is enabled, add the OCR menu item
if (ocrEnabled) {
menuItems.push({ name: 'ocr', path: '/experimental-ocr', icon: mdiTextBoxSearchOutline, title: 'OCR' });
}
return (
<div className={`sidebar min-w-[64px] ${collapsed ? "collapsed" : ""}`}>
<div className={`sidebar-header ${collapsed ? "collapsed" : ""}`}>
{!collapsed && <img src={logo} alt="Logo" className="logo w-8 h-8 object-contain flex-shrink-0" />}
<button className="toggle-btn" onClick={toggleSidebar}>
&#9776;
</button>
</div>
<ul className="menu-items">
{menuItems.map((item) => (
<li key={item.name} className={location.pathname === item.path ? "active" : ""}>
<Link
to={item.path}
onClick={() => handlePageClick(item.name)}
style={{ display: 'flex', alignItems: 'center' }}
>
{/* <Icon path={item.icon} size={1} />
{!collapsed && <span>&nbsp; {item.title}</span>} */}
<div className="w-7 h-7 flex items-center justify-center flex-shrink-0">
<Icon path={item.icon} size={1} />
</div>
{!collapsed && <span className="ml-2">{item.title}</span>}
</Link>
</li>
))}
</ul>
</div>
);
};
export default Sidebar;

View file

@ -0,0 +1,193 @@
// UndoCard.tsx
import React from 'react';
import { Tooltip } from 'react-tooltip'
interface ModificationProps {
ID: number;
DocumentID: number;
DateChanged: string;
ModField: string;
PreviousValue: string;
NewValue: string;
Undone: boolean;
UndoneDate: string | null;
onUndo: (id: number) => void;
paperlessUrl: string;
}
const formatDate = (dateString: string | null): string => {
if (!dateString) return '';
try {
const date = new Date(dateString);
// Check if date is valid
if (isNaN(date.getTime())) {
return 'Invalid date';
}
return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')} ${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}`;
} catch {
return 'Invalid date';
}
};
const buildPaperlessUrl = (paperlessUrl: string, documentId: number): string => {
return `${paperlessUrl}/documents/${documentId}/details`;
};
const UndoCard: React.FC<ModificationProps> = ({
ID,
DocumentID,
DateChanged,
ModField,
PreviousValue,
NewValue,
Undone,
UndoneDate,
onUndo,
paperlessUrl,
}) => {
const formatValue = (value: string, field: string) => {
if (field === 'tags') {
try {
const tags = JSON.parse(value) as string[];
return (
<div className="flex flex-wrap gap-1">
{tags.map((tag) => (
<span
key={tag}
className="bg-blue-100 dark:bg-blue-900 text-blue-800 dark:text-blue-200 text-xs font-medium px-2.5 py-0.5 rounded-full"
>
{tag}
</span>
))}
</div>
);
} catch {
return value;
}
} else if (field.toLowerCase().includes('date')) {
return formatDate(value);
}
return value;
};
return (
<div className="relative bg-white dark:bg-gray-800 p-4 rounded-md shadow-md">
<div className="grid grid-cols-6">
<div className="col-span-5"> {/* Left content */}
<div className="grid grid-cols-3 gap-4 mb-4">
<div className="">
<div className="text-xs uppercase text-gray-500 dark:text-gray-400 font-semibold mb-1">
Date Modified
</div>
<div className="text-sm text-gray-700 dark:text-gray-300">
{DateChanged && formatDate(DateChanged)}
</div>
</div>
<div className="">
<a
href={buildPaperlessUrl(paperlessUrl, DocumentID)}
target="_blank"
rel="noopener noreferrer"
className="text-blue-500 hover:text-blue-600 dark:text-blue-400 dark:hover:text-blue-300"
>
<div className="text-xs uppercase text-gray-500 dark:text-gray-400 font-semibold mb-1">
Document ID
</div>
<div className="text-sm text-gray-700 dark:text-gray-300">
{DocumentID}
</div>
</a>
</div>
<div className="">
<div className="text-xs uppercase text-gray-500 dark:text-gray-400 font-semibold mb-1">
Modified Field
</div>
<div className="text-sm text-gray-700 dark:text-gray-300">
{ModField}
</div>
</div>
</div>
<div className="mt-3">
<div className="mt-2 space-y-2">
<div className={`text-sm flex flex-nowrap ${Undone ? 'line-through' : ''}`}>
<span className="text-red-500 dark:text-red-400">Previous: &nbsp;</span>
<span
className="text-gray-600 dark:text-gray-300 truncate overflow-hidden flex-shrink-0 whitespace-nowrap flex-1 max-w-full group relative"
{ // Add tooltip if value is too long and not tags
...(ModField !== 'tags' && PreviousValue.length > 100 ? {
'data-tooltip-id': `tooltip-${ID}-prev`
} : {})}
>
{formatValue(PreviousValue, ModField)}
</span>
</div>
<div className={`text-sm flex flex-nowrap ${Undone ? 'line-through' : ''}`}>
<span className="text-green-500 dark:text-green-400">New: &nbsp;</span>
<span
className="text-gray-600 dark:text-gray-300 truncate overflow-hidden flex-shrink-0 whitespace-nowrap flex-1 max-w-full group relative"
{ // Add tooltip if value is too long and not tags
...(ModField !== 'tags' && NewValue.length > 100 ? {
'data-tooltip-id': `tooltip-${ID}-new`
} : {})}
>
{formatValue(NewValue, ModField)}
</span>
</div>
</div>
<Tooltip
id={`tooltip-${ID}-prev`}
place="bottom"
className="flex-wrap"
style={{
flexWrap: 'wrap',
wordWrap: 'break-word',
zIndex: 10,
whiteSpace: 'pre-line',
textAlign: 'left',
}}
>
{PreviousValue}
</Tooltip>
<Tooltip
id={`tooltip-${ID}-new`}
place="bottom"
className="flex-wrap"
style={{
flexWrap: 'wrap',
wordWrap: 'break-word',
zIndex: 10,
whiteSpace: 'pre-line',
textAlign: 'left',
}}
>
{NewValue}
</Tooltip>
</div>
</div>
<div className="grid place-items-center"> {/* Button content */}
<button
onClick={() => onUndo(ID)}
disabled={Undone}
className={`mt-2 mb-2 p-4 text-sm font-medium rounded-md min-w-[100px] max-w-[150px] text-center break-words ${Undone
? 'bg-gray-300 dark:bg-gray-700 text-gray-500 dark:text-gray-400 cursor-not-allowed'
: 'bg-blue-500 dark:bg-blue-600 text-white hover:bg-blue-600 dark:hover:bg-blue-700'
} transition-colors duration-200`}
>
{Undone ? (
<>
<span className="block text-xs">Undone on</span>
<span className="block text-xs">{formatDate(UndoneDate)}</span>
</>
) : (
'Undo'
)}
</button>
</div>
</div>
</div>
);
};
export default UndoCard;