From d1f23de5a6cbc4ceb0bded36885251e340e623c8 Mon Sep 17 00:00:00 2001 From: ccrlawrence <ccrlawrence@gmail.com> Date: Mon, 11 Nov 2024 09:40:39 +0000 Subject: [PATCH 01/22] Tag query change (#44) * Tag query change --------- Co-authored-by: Icereed <domi@icereed.net> --- paperless.go | 6 +++--- paperless_test.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/paperless.go b/paperless.go index 18ccf52..212e73e 100644 --- a/paperless.go +++ b/paperless.go @@ -108,10 +108,10 @@ func (c *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int, error func (c *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []string) ([]Document, error) { tagQueries := make([]string, len(tags)) for i, tag := range tags { - tagQueries[i] = fmt.Sprintf("tag:%s", tag) + tagQueries[i] = fmt.Sprintf("tags__name__iexact=%s", tag) } - searchQuery := strings.Join(tagQueries, " ") - path := fmt.Sprintf("api/documents/?query=%s", urlEncode(searchQuery)) + searchQuery := strings.Join(tagQueries, "&") + path := fmt.Sprintf("api/documents/?%s", urlEncode(searchQuery)) resp, err := c.Do(ctx, "GET", path, nil) if err != nil { diff --git a/paperless_test.go b/paperless_test.go index 6c70b9d..17cddde 100644 --- a/paperless_test.go +++ b/paperless_test.go @@ -203,7 +203,7 @@ func TestGetDocumentsByTags(t *testing.T) { // Set mock responses env.setMockResponse("/api/documents/", func(w http.ResponseWriter, r *http.Request) { // Verify query parameters - expectedQuery := "query=tag:tag1+tag:tag2" + expectedQuery := "tags__name__iexact=tag1&tags__name__iexact=tag2" assert.Equal(t, expectedQuery, r.URL.RawQuery) w.WriteHeader(http.StatusOK) json.NewEncoder(w).Encode(documentsResponse) From 5b3373743a9050ff5761eb89c648564457cea1f9 Mon Sep 17 00:00:00 2001 From: ccrlawrence <ccrlawrence@gmail.com> Date: Wed, 13 Nov 2024 04:47:25 +0000 Subject: [PATCH 02/22] Fix Vision OCR for OpenAI (#47) ## Summary by CodeRabbit - **New Features** - Updated environment variable descriptions for improved clarity on OCR processing options. - Enhanced the `doOCRViaLLM` method to support different LLM providers and improve image data handling. - **Bug Fixes** - Standardized error handling for better reporting across multiple methods. - **Documentation** - Revised Docker Compose section in `README.md` to reflect updated environment variable options. --- README.md | 4 ++-- app_llm.go | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index c829818..6b00991 100644 --- a/README.md +++ b/README.md @@ -75,8 +75,8 @@ services: OPENAI_API_KEY: 'your_openai_api_key' # Required if using OpenAI LLM_LANGUAGE: 'English' # Optional, default is 'English' OLLAMA_HOST: 'http://host.docker.internal:11434' # If using Ollama - VISION_LLM_PROVIDER: 'ollama' # Optional, for OCR - VISION_LLM_MODEL: 'minicpm-v' # Optional, for OCR + VISION_LLM_PROVIDER: 'ollama' # Optional (for OCR) - ollama or openai + VISION_LLM_MODEL: 'minicpm-v' # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai LOG_LEVEL: 'info' # Optional or 'debug', 'warn', 'error' volumes: - ./prompts:/app/prompts # Mount the prompts directory diff --git a/app_llm.go b/app_llm.go index b7228f6..a4a4c6b 100644 --- a/app_llm.go +++ b/app_llm.go @@ -3,6 +3,7 @@ package main import ( "bytes" "context" + "encoding/base64" "fmt" "strings" "sync" @@ -81,14 +82,26 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro prompt := promptBuffer.String() + // If not OpenAI then use binary part for image, otherwise, use the ImageURL part with encoding from https://platform.openai.com/docs/guides/vision + var parts []llms.ContentPart + if strings.ToLower(visionLlmProvider) != "openai" { + parts = []llms.ContentPart{ + llms.BinaryPart("image/jpeg", jpegBytes), + llms.TextPart(prompt), + } + } else { + base64Image := base64.StdEncoding.EncodeToString(jpegBytes) + parts = []llms.ContentPart{ + llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)), + llms.TextPart(prompt), + } + } + // Convert the image to text completion, err := app.VisionLLM.GenerateContent(ctx, []llms.MessageContent{ { - Parts: []llms.ContentPart{ - llms.BinaryPart("image/jpeg", jpegBytes), - llms.TextPart(prompt), - }, - Role: llms.ChatMessageTypeHuman, + Parts: parts, + Role: llms.ChatMessageTypeHuman, }, }) if err != nil { From b788f091858823e374f882bbcb6b2d9b56ae7295 Mon Sep 17 00:00:00 2001 From: ccrlawrence <ccrlawrence@gmail.com> Date: Fri, 13 Dec 2024 15:48:09 +0000 Subject: [PATCH 03/22] UNDO feature - easily track changes (#54) --- .gitignore | 3 +- Dockerfile | 11 +- README.md | 2 + app_http_handlers.go | 89 +++++++- go.mod | 9 +- go.sum | 18 +- local_db.go | 79 +++++++ main.go | 22 +- paperless.go | 101 ++++++++- paperless_test.go | 26 ++- web-app/package-lock.json | 311 +++++++++++++++++++++++++++- web-app/package.json | 8 +- web-app/src/App.tsx | 16 +- web-app/src/DocumentProcessor.tsx | 18 +- web-app/src/History.tsx | 126 +++++++++++ web-app/src/assets/logo.svg | 12 ++ web-app/src/components/Sidebar.css | 75 +++++++ web-app/src/components/Sidebar.tsx | 81 ++++++++ web-app/src/components/UndoCard.tsx | 193 +++++++++++++++++ 19 files changed, 1152 insertions(+), 48 deletions(-) create mode 100644 local_db.go create mode 100644 web-app/src/History.tsx create mode 100644 web-app/src/assets/logo.svg create mode 100644 web-app/src/components/Sidebar.css create mode 100644 web-app/src/components/Sidebar.tsx create mode 100644 web-app/src/components/UndoCard.tsx diff --git a/.gitignore b/.gitignore index 296cc9f..e2c91ba 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .DS_Store prompts/ tests/tmp -tmp/ \ No newline at end of file +tmp/ +db/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index b83b642..92e32c9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,11 +18,14 @@ COPY go.mod go.sum ./ # Download dependencies RUN go mod download -# Copy the rest of the application code -COPY . . +# Pre-compile go-sqlite3 to avoid doing this every time +RUN CGO_ENABLED=1 go build -tags musl -o /dev/null github.com/mattn/go-sqlite3 -# Build the Go binary with the musl build tag -RUN go build -tags musl -o paperless-gpt . +# Now copy the actual source files +COPY *.go . + +# Build the binary using caching for both go modules and build cache +RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt . # Stage 2: Build Vite frontend FROM node:20-alpine AS frontend diff --git a/README.md b/README.md index 6b00991..ca10f02 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,7 @@ services: environment: PAPERLESS_BASE_URL: 'http://paperless-ngx:8000' PAPERLESS_API_TOKEN: 'your_paperless_api_token' + PAPERLESS_PUBLIC_URL: 'http://paperless.mydomain.com' # Optional, your public link to access Paperless LLM_PROVIDER: 'openai' # or 'ollama' LLM_MODEL: 'gpt-4o' # or 'llama2' OPENAI_API_KEY: 'your_openai_api_key' # Required if using OpenAI @@ -137,6 +138,7 @@ If you prefer to run the application manually: |-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|----------| | `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes | | `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes | +| `PAPERLESS_PUBLIC_URL` | The public URL for your Paperless instance, if it is different to your `PAPERLESS_BASE_URL` - say if you are running in Docker Compose | No | | `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes | | `LLM_MODEL` | The model name to use (e.g., `gpt-4o`, `gpt-3.5-turbo`, `llama2`). | Yes | | `OPENAI_API_KEY` | Your OpenAI API key. Required if using OpenAI as the LLM provider. | Cond. | diff --git a/app_http_handlers.go b/app_http_handlers.go index c9c243f..27ea742 100644 --- a/app_http_handlers.go +++ b/app_http_handlers.go @@ -1,6 +1,7 @@ package main import ( + "encoding/json" "fmt" "net/http" "os" @@ -138,7 +139,7 @@ func (app *App) updateDocumentsHandler(c *gin.Context) { return } - err := app.Client.UpdateDocuments(ctx, documents) + err := app.Client.UpdateDocuments(ctx, documents, app.Database, false) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error updating documents: %v", err)}) log.Errorf("Error updating documents: %v", err) @@ -237,8 +238,94 @@ func (app *App) getDocumentHandler() gin.HandlerFunc { document, err := app.Client.GetDocument(c, parsedID) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + log.Errorf("Error fetching document: %v", err) return } c.JSON(http.StatusOK, document) } } + +// Section for local-db actions + +func (app *App) getModificationHistoryHandler(c *gin.Context) { + modifications, err := GetAllModifications(app.Database) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve modification history"}) + log.Errorf("Failed to retrieve modification history: %v", err) + return + } + c.JSON(http.StatusOK, modifications) +} + +func (app *App) undoModificationHandler(c *gin.Context) { + id := c.Param("id") + modID, err := strconv.Atoi(id) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid modification ID"}) + log.Errorf("Invalid modification ID: %v", err) + return + } + + modification, err := GetModification(app.Database, uint(modID)) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve modification"}) + log.Errorf("Failed to retrieve modification: %v", err) + return + } + + if modification.Undone { + c.JSON(http.StatusBadRequest, gin.H{"error": "Modification has already been undone"}) + log.Errorf("Modification has already been undone: %v", id) + return + } + + // Ok, we're actually doing the update: + ctx := c.Request.Context() + + // Make the document suggestions for UpdateDocuments + var suggestion DocumentSuggestion + suggestion.ID = int(modification.DocumentID) + suggestion.OriginalDocument, err = app.Client.GetDocument(ctx, int(modification.DocumentID)) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve original document"}) + log.Errorf("Failed to retrieve original document: %v", err) + return + } + switch modification.ModField { + case "title": + suggestion.SuggestedTitle = modification.PreviousValue + case "tags": + var tags []string + err := json.Unmarshal([]byte(modification.PreviousValue), &tags) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to unmarshal previous tags"}) + log.Errorf("Failed to unmarshal previous tags: %v", err) + return + } + suggestion.SuggestedTags = tags + case "content": + suggestion.SuggestedContent = modification.PreviousValue + default: + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid modification field"}) + log.Errorf("Invalid modification field: %v", modification.ModField) + return + } + + // Update the document + err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{suggestion}, app.Database, true) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update document"}) + log.Errorf("Failed to update document: %v", err) + return + } + + // Successful, so set modification as undone + err = SetModificationUndone(app.Database, modification) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to mark modification as undone"}) + return + } + + // Else all was ok + c.Status(http.StatusOK) +} diff --git a/go.mod b/go.mod index babd910..ddeb457 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,9 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.9.0 github.com/tmc/langchaingo v0.1.12 - golang.org/x/sync v0.7.0 + golang.org/x/sync v0.9.0 + gorm.io/driver/sqlite v1.5.6 + gorm.io/gorm v1.25.12 ) require ( @@ -33,11 +35,14 @@ require ( github.com/goccy/go-json v0.10.2 // indirect github.com/huandu/xstrings v1.3.3 // indirect github.com/imdario/mergo v0.3.13 // indirect + github.com/jinzhu/inflection v1.0.0 // indirect + github.com/jinzhu/now v1.1.5 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/jupiterrider/ffi v0.2.0 // indirect github.com/klauspost/cpuid/v2 v2.2.7 // indirect github.com/leodido/go-urn v1.4.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-sqlite3 v1.14.24 // indirect github.com/mitchellh/copystructure v1.0.0 // indirect github.com/mitchellh/reflectwalk v1.0.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect @@ -53,7 +58,7 @@ require ( golang.org/x/crypto v0.23.0 // indirect golang.org/x/net v0.25.0 // indirect golang.org/x/sys v0.20.0 // indirect - golang.org/x/text v0.15.0 // indirect + golang.org/x/text v0.20.0 // indirect google.golang.org/protobuf v1.34.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 76d584c..f914d5b 100644 --- a/go.sum +++ b/go.sum @@ -48,6 +48,10 @@ github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk= github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jupiterrider/ffi v0.2.0 h1:tMM70PexgYNmV+WyaYhJgCvQAvtTCs3wXeILPutihnA= @@ -60,6 +64,8 @@ github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM= +github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ= github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY= @@ -119,8 +125,8 @@ golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -139,8 +145,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= -golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= @@ -157,6 +163,10 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gorm.io/driver/sqlite v1.5.6 h1:fO/X46qn5NUEEOZtnjJRWRzZMe8nqJiQ9E+0hi+hKQE= +gorm.io/driver/sqlite v1.5.6/go.mod h1:U+J8craQU6Fzkcvu8oLeAQmi50TkwPEhHDEjQZXDah4= +gorm.io/gorm v1.25.12 h1:I0u8i2hWQItBq1WfE0o2+WuL9+8L21K9e2HHSTE/0f8= +gorm.io/gorm v1.25.12/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ= nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= diff --git a/local_db.go b/local_db.go new file mode 100644 index 0000000..931d7fb --- /dev/null +++ b/local_db.go @@ -0,0 +1,79 @@ +package main + +import ( + "os" + "path/filepath" + "time" + + "gorm.io/driver/sqlite" + "gorm.io/gorm" +) + +// ModificationHistory represents the schema of the modification_history table +type ModificationHistory struct { + ID uint `gorm:"primaryKey"` // Auto-incrementing primary key + DocumentID uint `gorm:"not null"` // Foreign key to documents table (if applicable) + DateChanged string `gorm:"not null"` // Date and time of modification + ModField string `gorm:"size:255;not null"` // Field being modified + PreviousValue string `gorm:"size:1048576"` // Previous value of the field + NewValue string `gorm:"size:1048576"` // New value of the field + Undone bool `gorm:"not null;default:false"` // Whether the modification has been undone + UndoneDate string `gorm:"default:null"` // Date and time of undoing the modification +} + +// InitializeDB initializes the SQLite database and migrates the schema +func InitializeDB() *gorm.DB { + // Ensure db directory exists + dbDir := "db" + if err := os.MkdirAll(dbDir, os.ModePerm); err != nil { + log.Fatalf("Failed to create db directory: %v", err) + } + + dbPath := filepath.Join(dbDir, "modification_history.db") + + // Connect to SQLite database + db, err := gorm.Open(sqlite.Open(dbPath), &gorm.Config{}) + if err != nil { + log.Fatalf("Failed to connect to database: %v", err) + } + + // Migrate the schema (create the table if it doesn't exist) + err = db.AutoMigrate(&ModificationHistory{}) + if err != nil { + log.Fatalf("Failed to migrate database schema: %v", err) + } + + return db +} + +// InsertModification inserts a new modification record into the database +func InsertModification(db *gorm.DB, record *ModificationHistory) error { + log.Debugf("Passed modification record: %+v", record) + record.DateChanged = time.Now().Format(time.RFC3339) // Set the DateChanged field to the current time + log.Debugf("Inserting modification record: %+v", record) + result := db.Create(&record) // GORM's Create method + log.Debugf("Insertion result: %+v", result) + return result.Error +} + +// GetModification retrieves a modification record by its ID +func GetModification(db *gorm.DB, id uint) (*ModificationHistory, error) { + var record ModificationHistory + result := db.First(&record, id) // GORM's First method retrieves the first record matching the ID + return &record, result.Error +} + +// GetAllModifications retrieves all modification records from the database +func GetAllModifications(db *gorm.DB) ([]ModificationHistory, error) { + var records []ModificationHistory + result := db.Order("date_changed DESC").Find(&records) // GORM's Find method retrieves all records + return records, result.Error +} + +// UndoModification marks a modification record as undone and sets the undo date +func SetModificationUndone(db *gorm.DB, record *ModificationHistory) error { + record.Undone = true + record.UndoneDate = time.Now().Format(time.RFC3339) + result := db.Save(&record) // GORM's Save method + return result.Error +} diff --git a/main.go b/main.go index 5c8d3ef..4448832 100644 --- a/main.go +++ b/main.go @@ -17,6 +17,7 @@ import ( "github.com/tmc/langchaingo/llms" "github.com/tmc/langchaingo/llms/ollama" "github.com/tmc/langchaingo/llms/openai" + "gorm.io/gorm" ) // Global Variables and Constants @@ -73,6 +74,7 @@ Be very selective and only choose the most relevant tags since too many tags wil // App struct to hold dependencies type App struct { Client *PaperlessClient + Database *gorm.DB LLM llms.Model VisionLLM llms.Model } @@ -87,6 +89,9 @@ func main() { // Initialize PaperlessClient client := NewPaperlessClient(paperlessBaseURL, paperlessAPIToken) + // Initialize Database + database := InitializeDB() + // Load Templates loadTemplates() @@ -105,6 +110,7 @@ func main() { // Initialize App with dependencies app := &App{ Client: client, + Database: database, LLM: llm, VisionLLM: visionLlm, } @@ -165,6 +171,20 @@ func main() { enabled := isOcrEnabled() c.JSON(http.StatusOK, gin.H{"enabled": enabled}) }) + + // Local db actions + api.GET("/modifications", app.getModificationHistoryHandler) + api.POST("/undo-modification/:id", app.undoModificationHandler) + + // Get public Paperless environment (as set in environment variables) + api.GET("/paperless-url", func(c *gin.Context) { + baseUrl := os.Getenv("PAPERLESS_PUBLIC_URL") + if baseUrl == "" { + baseUrl = os.Getenv("PAPERLESS_BASE_URL") + } + baseUrl = strings.TrimRight(baseUrl, "/") + c.JSON(http.StatusOK, gin.H{"url": baseUrl}) + }) } // Serve static files for the frontend under /assets @@ -268,7 +288,7 @@ func (app *App) processAutoTagDocuments() (int, error) { return 0, fmt.Errorf("error generating suggestions: %w", err) } - err = app.Client.UpdateDocuments(ctx, suggestions) + err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false) if err != nil { return 0, fmt.Errorf("error updating documents: %w", err) } diff --git a/paperless.go b/paperless.go index 212e73e..6cff47c 100644 --- a/paperless.go +++ b/paperless.go @@ -11,11 +11,13 @@ import ( "os" "path/filepath" "slices" + "sort" "strings" "sync" "github.com/gen2brain/go-fitz" "golang.org/x/sync/errgroup" + "gorm.io/gorm" ) // PaperlessClient struct to interact with the Paperless-NGX API @@ -26,6 +28,32 @@ type PaperlessClient struct { CacheFolder string } +func hasSameTags(original, suggested []string) bool { + if len(original) != len(suggested) { + return false + } + + // Create copies to avoid modifying original slices + orig := make([]string, len(original)) + sugg := make([]string, len(suggested)) + + copy(orig, original) + copy(sugg, suggested) + + // Sort both slices + sort.Strings(orig) + sort.Strings(sugg) + + // Compare elements + for i := range orig { + if orig[i] != sugg[i] { + return false + } + } + + return true +} + // NewPaperlessClient creates a new instance of PaperlessClient with a default HTTP client func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient { cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR") @@ -218,7 +246,7 @@ func (c *PaperlessClient) GetDocument(ctx context.Context, documentID int) (Docu } // UpdateDocuments updates the specified documents with suggested changes -func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion) error { +func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion, db *gorm.DB, isUndo bool) error { // Fetch all available tags availableTags, err := c.GetAllTags(ctx) if err != nil { @@ -229,21 +257,43 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum for _, document := range documents { documentID := document.ID + // Original fields will store any updated fields to store records for + originalFields := make(map[string]interface{}) updatedFields := make(map[string]interface{}) newTags := []int{} tags := document.SuggestedTags - if len(tags) == 0 { - tags = document.OriginalDocument.Tags + originalTags := document.OriginalDocument.Tags + + originalTagsJSON, err := json.Marshal(originalTags) + if err != nil { + log.Errorf("Error marshalling JSON for document %d: %v", documentID, err) + return err } + // remove autoTag to prevent infinite loop (even if it is in the original tags) - tags = removeTagFromList(tags, autoTag) + originalTags = removeTagFromList(originalTags, autoTag) + + if len(tags) == 0 { + tags = originalTags + } else { + // We have suggested tags to change + originalFields["tags"] = originalTags + // remove autoTag to prevent infinite loop - this is required in case of undo + tags = removeTagFromList(tags, autoTag) + } + + updatedTagsJSON, err := json.Marshal(tags) + if err != nil { + log.Errorf("Error marshalling JSON for document %d: %v", documentID, err) + return err + } // Map suggested tag names to IDs for _, tagName := range tags { if tagID, exists := availableTags[tagName]; exists { // Skip the tag that we are filtering - if tagName == manualTag { + if !isUndo && tagName == manualTag { continue } newTags = append(newTags, tagID) @@ -259,6 +309,7 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum suggestedTitle = suggestedTitle[:128] } if suggestedTitle != "" { + originalFields["title"] = document.OriginalDocument.Title updatedFields["title"] = suggestedTitle } else { log.Warnf("No valid title found for document %d, skipping.", documentID) @@ -267,8 +318,11 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum // Suggested Content suggestedContent := document.SuggestedContent if suggestedContent != "" { + originalFields["content"] = document.OriginalDocument.Content updatedFields["content"] = suggestedContent } + log.Debugf("Document %d: Original fields: %v", documentID, originalFields) + log.Debugf("Document %d: Updated fields: %v Tags: %v", documentID, updatedFields, tags) // Marshal updated fields to JSON jsonData, err := json.Marshal(updatedFields) @@ -290,6 +344,43 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum bodyBytes, _ := io.ReadAll(resp.Body) log.Errorf("Error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes)) return fmt.Errorf("error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes)) + } else { + for field, value := range originalFields { + log.Printf("Document %d: Updated %s from %v to %v", documentID, field, originalFields[field], value) + // Insert the modification record into the database + var modificationRecord ModificationHistory + if field == "tags" { + // Make sure we only store changes where tags are changed - not the same before and after + // And we have to use tags, not updatedFields as they are IDs not fields + if !hasSameTags(document.OriginalDocument.Tags, tags) { + modificationRecord = ModificationHistory{ + DocumentID: uint(documentID), + ModField: field, + PreviousValue: string(originalTagsJSON), + NewValue: string(updatedTagsJSON), + } + } + } else { + // Only store mod if field actually changed + if originalFields[field] != updatedFields[field] { + modificationRecord = ModificationHistory{ + DocumentID: uint(documentID), + ModField: field, + PreviousValue: fmt.Sprintf("%v", originalFields[field]), + NewValue: fmt.Sprintf("%v", updatedFields[field]), + } + } + } + + // Only store if we have a valid modification record + if (modificationRecord != ModificationHistory{}) { + err = InsertModification(db, &modificationRecord) + } + if err != nil { + log.Errorf("Error inserting modification record for document %d: %v", documentID, err) + return err + } + } } log.Printf("Document %d updated successfully.", documentID) diff --git a/paperless_test.go b/paperless_test.go index 17cddde..c75049f 100644 --- a/paperless_test.go +++ b/paperless_test.go @@ -13,6 +13,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "gorm.io/driver/sqlite" + "gorm.io/gorm" ) // Helper struct to hold common test data and methods @@ -22,6 +24,7 @@ type testEnv struct { client *PaperlessClient requestCount int mockResponses map[string]http.HandlerFunc + db *gorm.DB } // newTestEnv initializes a new test environment @@ -31,6 +34,11 @@ func newTestEnv(t *testing.T) *testEnv { mockResponses: make(map[string]http.HandlerFunc), } + // Initialize test database + db, err := InitializeTestDB() + require.NoError(t, err) + env.db = db + // Create a mock server with a handler that dispatches based on URL path env.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { env.requestCount++ @@ -50,6 +58,22 @@ func newTestEnv(t *testing.T) *testEnv { return env } +func InitializeTestDB() (*gorm.DB, error) { + // Use in-memory SQLite for testing + db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared"), &gorm.Config{}) + if err != nil { + return nil, err + } + + // Migrate schema + err = db.AutoMigrate(&ModificationHistory{}) + if err != nil { + return nil, err + } + + return db, nil +} + // teardown closes the mock server func (env *testEnv) teardown() { env.server.Close() @@ -327,7 +351,7 @@ func TestUpdateDocuments(t *testing.T) { }) ctx := context.Background() - err := env.client.UpdateDocuments(ctx, documents) + err := env.client.UpdateDocuments(ctx, documents, env.db, false) require.NoError(t, err) } diff --git a/web-app/package-lock.json b/web-app/package-lock.json index 31ecd9b..a342dba 100644 --- a/web-app/package-lock.json +++ b/web-app/package-lock.json @@ -10,17 +10,23 @@ "dependencies": { "@headlessui/react": "^2.1.8", "@heroicons/react": "^2.1.5", + "@mdi/js": "^7.4.47", + "@mdi/react": "^1.6.1", "axios": "^1.7.7", "classnames": "^2.5.1", + "date-fns": "^4.1.0", "prop-types": "^15.8.1", "react": "^18.3.1", "react-dom": "^18.3.1", "react-icons": "^5.3.0", "react-router-dom": "^6.27.0", - "react-tag-autocomplete": "^7.3.0" + "react-tag-autocomplete": "^7.3.0", + "react-tooltip": "^5.28.0", + "winston": "^3.17.0" }, "devDependencies": { "@eslint/js": "^9.9.0", + "@types/node": "^22.10.1", "@types/react": "^18.3.3", "@types/react-dom": "^18.3.0", "@vitejs/plugin-react-swc": "^3.5.0", @@ -48,6 +54,24 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/@colors/colors": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.6.0.tgz", + "integrity": "sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA==", + "engines": { + "node": ">=0.1.90" + } + }, + "node_modules/@dabh/diagnostics": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@dabh/diagnostics/-/diagnostics-2.0.3.tgz", + "integrity": "sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==", + "dependencies": { + "colorspace": "1.1.x", + "enabled": "2.0.x", + "kuler": "^2.0.0" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", @@ -723,6 +747,19 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@mdi/js": { + "version": "7.4.47", + "resolved": "https://registry.npmjs.org/@mdi/js/-/js-7.4.47.tgz", + "integrity": "sha512-KPnNOtm5i2pMabqZxpUz7iQf+mfrYZyKCZ8QNz85czgEt7cuHcGorWfdzUMWYA0SD+a6Hn4FmJ+YhzzzjkTZrQ==" + }, + "node_modules/@mdi/react": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@mdi/react/-/react-1.6.1.tgz", + "integrity": "sha512-4qZeDcluDFGFTWkHs86VOlHkm6gnKaMql13/gpIcUQ8kzxHgpj31NuCkD8abECVfbULJ3shc7Yt4HJ6Wu6SN4w==", + "dependencies": { + "prop-types": "^15.7.2" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -1313,6 +1350,15 @@ "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==", "dev": true }, + "node_modules/@types/node": { + "version": "22.10.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz", + "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==", + "dev": true, + "dependencies": { + "undici-types": "~6.20.0" + } + }, "node_modules/@types/prop-types": { "version": "15.7.13", "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.13.tgz", @@ -1338,6 +1384,11 @@ "@types/react": "*" } }, + "node_modules/@types/triple-beam": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.5.tgz", + "integrity": "sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw==" + }, "node_modules/@typescript-eslint/eslint-plugin": { "version": "8.6.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.6.0.tgz", @@ -1660,6 +1711,11 @@ "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", "dev": true }, + "node_modules/async": { + "version": "3.2.6", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.6.tgz", + "integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==" + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -1887,6 +1943,15 @@ "node": ">=6" } }, + "node_modules/color": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz", + "integrity": "sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==", + "dependencies": { + "color-convert": "^1.9.3", + "color-string": "^1.6.0" + } + }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", @@ -1902,8 +1967,38 @@ "node_modules/color-name": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + }, + "node_modules/color-string": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", + "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", + "dependencies": { + "color-name": "^1.0.0", + "simple-swizzle": "^0.2.2" + } + }, + "node_modules/color/node_modules/color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dependencies": { + "color-name": "1.1.3" + } + }, + "node_modules/color/node_modules/color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" + }, + "node_modules/colorspace": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/colorspace/-/colorspace-1.1.4.tgz", + "integrity": "sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==", + "dependencies": { + "color": "^3.1.3", + "text-hex": "1.0.x" + } }, "node_modules/combined-stream": { "version": "1.0.8", @@ -1963,6 +2058,15 @@ "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", "dev": true }, + "node_modules/date-fns": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-4.1.0.tgz", + "integrity": "sha512-Ukq0owbQXxa/U3EGtsdVBkR1w7KOQ5gIBqdH2hkvknzZPYvBxb/aa6E8L7tmjFtkwZBu3UXBbjIgPo/Ez4xaNg==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/kossnocorp" + } + }, "node_modules/debug": { "version": "4.3.7", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", @@ -2024,6 +2128,11 @@ "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", "dev": true }, + "node_modules/enabled": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/enabled/-/enabled-2.0.0.tgz", + "integrity": "sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ==" + }, "node_modules/esbuild": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz", @@ -2305,6 +2414,11 @@ "reusify": "^1.0.4" } }, + "node_modules/fecha": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz", + "integrity": "sha512-OP2IUU6HeYKJi3i0z4A19kHMQoLVs4Hc+DPqqxI2h/DPZHTm/vjsfC6P0b4jCMy14XizLBqvndQ+UilD7707Jw==" + }, "node_modules/file-entry-cache": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz", @@ -2364,6 +2478,11 @@ "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==", "dev": true }, + "node_modules/fn.name": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz", + "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==" + }, "node_modules/follow-redirects": { "version": "1.15.9", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz", @@ -2577,6 +2696,16 @@ "node": ">=0.8.19" } }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "node_modules/is-arrayish": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz", + "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==" + }, "node_modules/is-binary-path": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", @@ -2652,6 +2781,17 @@ "node": ">=8" } }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -2726,6 +2866,11 @@ "json-buffer": "3.0.1" } }, + "node_modules/kuler": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz", + "integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==" + }, "node_modules/levn": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", @@ -2775,6 +2920,22 @@ "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", "dev": true }, + "node_modules/logform": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/logform/-/logform-2.7.0.tgz", + "integrity": "sha512-TFYA4jnP7PVbmlBIfhlSe+WKxs9dklXMTEGcBCIvLhE/Tn3H6Gk1norupVW7m5Cnd4bLcr08AytbyV/xj7f/kQ==", + "dependencies": { + "@colors/colors": "1.6.0", + "@types/triple-beam": "^1.3.2", + "fecha": "^4.2.0", + "ms": "^2.1.1", + "safe-stable-stringify": "^2.3.1", + "triple-beam": "^1.3.0" + }, + "engines": { + "node": ">= 12.0.0" + } + }, "node_modules/loose-envify": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", @@ -2857,8 +3018,7 @@ "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, "node_modules/mz": { "version": "2.7.0", @@ -2936,6 +3096,14 @@ "node": ">= 6" } }, + "node_modules/one-time": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/one-time/-/one-time-1.0.0.tgz", + "integrity": "sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==", + "dependencies": { + "fn.name": "1.x.x" + } + }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -3362,6 +3530,19 @@ "react": "^18.0.0" } }, + "node_modules/react-tooltip": { + "version": "5.28.0", + "resolved": "https://registry.npmjs.org/react-tooltip/-/react-tooltip-5.28.0.tgz", + "integrity": "sha512-R5cO3JPPXk6FRbBHMO0rI9nkUG/JKfalBSQfZedZYzmqaZQgq7GLzF8vcCWx6IhUCKg0yPqJhXIzmIO5ff15xg==", + "dependencies": { + "@floating-ui/dom": "^1.6.1", + "classnames": "^2.3.0" + }, + "peerDependencies": { + "react": ">=16.14.0", + "react-dom": ">=16.14.0" + } + }, "node_modules/read-cache": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", @@ -3371,6 +3552,19 @@ "pify": "^2.3.0" } }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/readdirp": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", @@ -3477,6 +3671,33 @@ "queue-microtask": "^1.2.2" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/safe-stable-stringify": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz", + "integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==", + "engines": { + "node": ">=10" + } + }, "node_modules/scheduler": { "version": "0.23.2", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", @@ -3530,6 +3751,14 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/simple-swizzle": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", + "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==", + "dependencies": { + "is-arrayish": "^0.3.1" + } + }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -3539,6 +3768,22 @@ "node": ">=0.10.0" } }, + "node_modules/stack-trace": { + "version": "0.0.10", + "resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz", + "integrity": "sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg==", + "engines": { + "node": "*" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, "node_modules/string-width": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", @@ -3729,6 +3974,11 @@ "node": ">=14.0.0" } }, + "node_modules/text-hex": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz", + "integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==" + }, "node_modules/text-table": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", @@ -3768,6 +4018,14 @@ "node": ">=8.0" } }, + "node_modules/triple-beam": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz", + "integrity": "sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==", + "engines": { + "node": ">= 14.0.0" + } + }, "node_modules/ts-api-utils": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.3.0.tgz", @@ -3839,6 +4097,12 @@ } } }, + "node_modules/undici-types": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", + "dev": true + }, "node_modules/update-browserslist-db": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.0.tgz", @@ -3881,8 +4145,7 @@ "node_modules/util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "dev": true + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" }, "node_modules/vite": { "version": "5.4.7", @@ -3958,6 +4221,40 @@ "node": ">= 8" } }, + "node_modules/winston": { + "version": "3.17.0", + "resolved": "https://registry.npmjs.org/winston/-/winston-3.17.0.tgz", + "integrity": "sha512-DLiFIXYC5fMPxaRg832S6F5mJYvePtmO5G9v9IgUFPhXm9/GkXarH/TUrBAVzhTCzAj9anE/+GjrgXp/54nOgw==", + "dependencies": { + "@colors/colors": "^1.6.0", + "@dabh/diagnostics": "^2.0.2", + "async": "^3.2.3", + "is-stream": "^2.0.0", + "logform": "^2.7.0", + "one-time": "^1.0.0", + "readable-stream": "^3.4.0", + "safe-stable-stringify": "^2.3.1", + "stack-trace": "0.0.x", + "triple-beam": "^1.3.0", + "winston-transport": "^4.9.0" + }, + "engines": { + "node": ">= 12.0.0" + } + }, + "node_modules/winston-transport": { + "version": "4.9.0", + "resolved": "https://registry.npmjs.org/winston-transport/-/winston-transport-4.9.0.tgz", + "integrity": "sha512-8drMJ4rkgaPo1Me4zD/3WLfI/zPdA9o2IipKODunnGDcuqbHwjsbB79ylv04LCGGzU0xQ6vTznOMpQGaLhhm6A==", + "dependencies": { + "logform": "^2.7.0", + "readable-stream": "^3.6.2", + "triple-beam": "^1.3.0" + }, + "engines": { + "node": ">= 12.0.0" + } + }, "node_modules/word-wrap": { "version": "1.2.5", "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", diff --git a/web-app/package.json b/web-app/package.json index 09b6007..c8cd1ef 100644 --- a/web-app/package.json +++ b/web-app/package.json @@ -13,17 +13,23 @@ "dependencies": { "@headlessui/react": "^2.1.8", "@heroicons/react": "^2.1.5", + "@mdi/js": "^7.4.47", + "@mdi/react": "^1.6.1", "axios": "^1.7.7", "classnames": "^2.5.1", + "date-fns": "^4.1.0", "prop-types": "^15.8.1", "react": "^18.3.1", "react-dom": "^18.3.1", "react-icons": "^5.3.0", "react-router-dom": "^6.27.0", - "react-tag-autocomplete": "^7.3.0" + "react-tag-autocomplete": "^7.3.0", + "react-tooltip": "^5.28.0", + "winston": "^3.17.0" }, "devDependencies": { "@eslint/js": "^9.9.0", + "@types/node": "^22.10.1", "@types/react": "^18.3.3", "@types/react-dom": "^18.3.0", "@vitejs/plugin-react-swc": "^3.5.0", diff --git a/web-app/src/App.tsx b/web-app/src/App.tsx index 4b89f18..d5a2052 100644 --- a/web-app/src/App.tsx +++ b/web-app/src/App.tsx @@ -1,16 +1,24 @@ // App.tsx or App.jsx import React from 'react'; import { Route, BrowserRouter as Router, Routes } from 'react-router-dom'; +import Sidebar from './components/Sidebar'; import DocumentProcessor from './DocumentProcessor'; import ExperimentalOCR from './ExperimentalOCR'; // New component +import History from './History'; const App: React.FC = () => { return ( <Router> - <Routes> - <Route path="/" element={<DocumentProcessor />} /> - <Route path="/experimental-ocr" element={<ExperimentalOCR />} /> - </Routes> + <div style={{ display: "flex", height: "100vh" }}> + <Sidebar onSelectPage={(page) => console.log(page)} /> + <div style={{ flex: 1, overflowY: "auto" }}> + <Routes> + <Route path="/" element={<DocumentProcessor />} /> + <Route path="/experimental-ocr" element={<ExperimentalOCR />} /> + <Route path="/history" element={<History />} /> + </Routes> + </div> + </div> </Router> ); }; diff --git a/web-app/src/DocumentProcessor.tsx b/web-app/src/DocumentProcessor.tsx index d83709c..093a5e1 100644 --- a/web-app/src/DocumentProcessor.tsx +++ b/web-app/src/DocumentProcessor.tsx @@ -1,6 +1,5 @@ import axios from "axios"; import React, { useCallback, useEffect, useState } from "react"; -import { Link } from "react-router-dom"; import "react-tag-autocomplete/example/src/styles.css"; // Ensure styles are loaded import DocumentsToProcess from "./components/DocumentsToProcess"; import NoDocuments from "./components/NoDocuments"; @@ -46,22 +45,17 @@ const DocumentProcessor: React.FC = () => { const [generateTags, setGenerateTags] = useState(true); const [error, setError] = useState<string | null>(null); - // Temporary feature flags - const [ocrEnabled, setOcrEnabled] = useState(false); - // Custom hook to fetch initial data const fetchInitialData = useCallback(async () => { try { - const [filterTagRes, documentsRes, tagsRes, ocrEnabledRes] = await Promise.all([ + const [filterTagRes, documentsRes, tagsRes] = await Promise.all([ axios.get<{ tag: string }>("/api/filter-tag"), axios.get<Document[]>("/api/documents"), axios.get<Record<string, number>>("/api/tags"), - axios.get<{enabled: boolean}>("/api/experimental/ocr"), ]); setFilterTag(filterTagRes.data.tag); setDocuments(documentsRes.data); - setOcrEnabled(ocrEnabledRes.data.enabled); const tags = Object.keys(tagsRes.data).map((tag) => ({ id: tag, name: tag, @@ -199,16 +193,6 @@ const DocumentProcessor: React.FC = () => { <div className="max-w-5xl mx-auto p-6 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"> <header className="text-center"> <h1 className="text-4xl font-bold mb-8">Paperless GPT</h1> - {ocrEnabled && ( - <div> - <Link - to="/experimental-ocr" - className="inline-block bg-blue-600 hover:bg-blue-700 text-white font-semibold py-2 px-4 rounded transition duration-200 dark:bg-blue-500 dark:hover:bg-blue-600" - > - OCR via LLMs (Experimental) - </Link> - </div> - )} </header> {error && ( diff --git a/web-app/src/History.tsx b/web-app/src/History.tsx new file mode 100644 index 0000000..e68fb57 --- /dev/null +++ b/web-app/src/History.tsx @@ -0,0 +1,126 @@ +import React, { useEffect, useState } from 'react'; +import UndoCard from './components/UndoCard'; + +interface ModificationHistory { + ID: number; + DocumentID: number; + DateChanged: string; + ModField: string; + PreviousValue: string; + NewValue: string; + Undone: boolean; + UndoneDate: string | null; +} + +const History: React.FC = () => { + const [modifications, setModifications] = useState<ModificationHistory[]>([]); + const [loading, setLoading] = useState(true); + const [error, setError] = useState<string | null>(null); + const [paperlessUrl, setPaperlessUrl] = useState<string>(''); + + // Get Paperless URL + useEffect(() => { + const fetchUrl = async () => { + try { + const response = await fetch('/api/paperless-url'); + if (!response.ok) { + throw new Error('Failed to fetch public URL'); + } + const { url } = await response.json(); + setPaperlessUrl(url); + } catch (err) { + console.error('Error fetching Paperless URL:', err); + } + }; + + fetchUrl(); + }, []); + + // Get all modifications + useEffect(() => { + fetchModifications(); + }, []); + + const fetchModifications = async () => { + try { + const response = await fetch('/api/modifications'); + if (!response.ok) { + throw new Error('Failed to fetch modifications'); + } + const data = await response.json(); + setModifications(data); + } catch (err) { + setError(err instanceof Error ? err.message : 'Unknown error occurred'); + } finally { + setLoading(false); + } + }; + + const handleUndo = async (id: number) => { + try { + const response = await fetch(`/api/undo-modification/${id}`, { + method: 'POST', + }); + + if (!response.ok) { + throw new Error('Failed to undo modification'); + } + + // Use ISO 8601 format for consistency + const now = new Date().toISOString(); + + setModifications(mods => mods.map(mod => + mod.ID === id + ? { ...mod, Undone: true, UndoneDate: now } + : mod + )); + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to undo modification'); + } + }; + + if (loading) { + return ( + <div className="flex justify-center items-center min-h-screen"> + <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-500" /> + </div> + ); + } + + if (error) { + return ( + <div className="text-red-500 dark:text-red-400 p-4 text-center"> + Error: {error} + </div> + ); + } + + return ( + <div className="container mx-auto px-4 py-8"> + <h1 className="text-2xl font-bold text-gray-800 dark:text-gray-200"> + Modification History + </h1> + <div className="mb-6 text-sm text-gray-500 dark:text-gray-400"> + Note: when undoing tag changes, this will not re-add 'paperless-gpt-auto' + </div> + {modifications.length === 0 ? ( + <p className="text-gray-500 dark:text-gray-400 text-center"> + No modifications found + </p> + ) : ( + <div className="grid gap-4 md:grid-cols-1 lg:grid-cols-1"> + {modifications.map((modification) => ( + <UndoCard + key={modification.ID} + {...modification} + onUndo={handleUndo} + paperlessUrl={paperlessUrl} + /> + ))} + </div> + )} + </div> + ); +}; + +export default History; \ No newline at end of file diff --git a/web-app/src/assets/logo.svg b/web-app/src/assets/logo.svg new file mode 100644 index 0000000..347b1e7 --- /dev/null +++ b/web-app/src/assets/logo.svg @@ -0,0 +1,12 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- Generator: Adobe Illustrator 27.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) --> +<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" + viewBox="0 0 1000 1000" style="enable-background:new 0 0 1000 1000;" xml:space="preserve"> +<style type="text/css"> + .st0{fill:#FFFFFF;} +</style> +<path class="st0" d="M299,891.7c-4.2-19.8-12.5-59.6-13.6-59.6c-176.7-105.7-155.8-288.7-97.3-393.4 + c12.5,131.8,245.8,222.8,109.8,383.9c-1.1,2,6.2,27.2,12.5,50.2c27.2-46,68-101.4,65.8-106.7C208.9,358.2,731.9,326.9,840.6,73.7 + c49.1,244.8-25.1,623.5-445.5,719.7c-2,1.1-76.3,131.8-79.5,132.9c0-2-31.4-1.1-27.2-11.5C290.7,908.4,294.8,900.1,299,891.7 + L299,891.7z M293.8,793.4c53.3-61.8-9.4-167.4-47.1-201.9C310.5,701.3,306.3,765.1,293.8,793.4L293.8,793.4z"/> +</svg> diff --git a/web-app/src/components/Sidebar.css b/web-app/src/components/Sidebar.css new file mode 100644 index 0000000..2459a05 --- /dev/null +++ b/web-app/src/components/Sidebar.css @@ -0,0 +1,75 @@ +.sidebar { + width: 250px; + background-color: #2c3e50; + color: #ecf0f1; + display: flex; + flex-direction: column; + transition: width 0.3s; + } + + .sidebar.collapsed { + width: 60px; + + } + + .sidebar-header { + display: flex; + align-items: center; + padding: 10px; + background-color: #34495e; + justify-content: space-between; + } + + .sidebar-header.collapsed { + justify-content: center; + } + + .logo { + height: 40px; + margin-right: 10px; + } + + .menu-items { + list-style: none; + padding: 0; + margin: 0; + } + + .menu-items li { + padding: 15px 20px; + cursor: pointer; + } + + .menu-items li.active { + background-color: darkslategray; + padding: 15px 20px; + cursor: pointer; + } + + .menu-items li:hover { + background-color: #1abc9c; + } + + .menu-items li a { + text-decoration: none; + color: inherit; + font-size: 18px; + } + + .toggle-btn { + background: none; + border: none; + color: white; + font-size: 24px; + cursor: pointer; + } + + .sidebar.collapsed .menu-items li a { + display: none; + } + + .sidebar.collapsed .logo { + height: 40px; + margin: auto; + } + \ No newline at end of file diff --git a/web-app/src/components/Sidebar.tsx b/web-app/src/components/Sidebar.tsx new file mode 100644 index 0000000..ae17d27 --- /dev/null +++ b/web-app/src/components/Sidebar.tsx @@ -0,0 +1,81 @@ +import axios from "axios"; +import React, { useCallback, useEffect, useState } from 'react'; +import "./Sidebar.css"; +import { Link, useLocation } from 'react-router-dom'; +import { Icon } from '@mdi/react'; +import { mdiHomeOutline, mdiTextBoxSearchOutline, mdiHistory } from '@mdi/js'; +import logo from "../assets/logo.svg"; + + +interface SidebarProps { + onSelectPage: (page: string) => void; +} + +const Sidebar: React.FC<SidebarProps> = ({ onSelectPage }) => { + const [collapsed, setCollapsed] = useState(false); + const location = useLocation(); + + const toggleSidebar = () => { + setCollapsed(!collapsed); + }; + + const handlePageClick = (page: string) => { + onSelectPage(page); + }; + + // Get whether experimental OCR is enabled + const [ocrEnabled, setOcrEnabled] = useState(false); + const fetchOcrEnabled = useCallback(async () => { + try { + const res = await axios.get<{ enabled: boolean }>("/api/experimental/ocr"); + setOcrEnabled(res.data.enabled); + } catch (err) { + console.error(err); + } + }, []); + + useEffect(() => { + fetchOcrEnabled(); + }, [fetchOcrEnabled]); + + const menuItems = [ + { name: 'home', path: '/', icon: mdiHomeOutline, title: 'Home' }, + { name: 'history', path: '/history', icon: mdiHistory, title: 'History' }, + ]; + + // If OCR is enabled, add the OCR menu item + if (ocrEnabled) { + menuItems.push({ name: 'ocr', path: '/experimental-ocr', icon: mdiTextBoxSearchOutline, title: 'OCR' }); + } + + return ( + <div className={`sidebar min-w-[64px] ${collapsed ? "collapsed" : ""}`}> + <div className={`sidebar-header ${collapsed ? "collapsed" : ""}`}> + {!collapsed && <img src={logo} alt="Logo" className="logo w-8 h-8 object-contain flex-shrink-0" />} + <button className="toggle-btn" onClick={toggleSidebar}> + ☰ + </button> + </div> + <ul className="menu-items"> + {menuItems.map((item) => ( + <li key={item.name} className={location.pathname === item.path ? "active" : ""}> + <Link + to={item.path} + onClick={() => handlePageClick(item.name)} + style={{ display: 'flex', alignItems: 'center' }} + > + {/* <Icon path={item.icon} size={1} /> + {!collapsed && <span> {item.title}</span>} */} + <div className="w-7 h-7 flex items-center justify-center flex-shrink-0"> + <Icon path={item.icon} size={1} /> + </div> + {!collapsed && <span className="ml-2">{item.title}</span>} + </Link> + </li> + ))} + </ul> + </div> + ); +}; + +export default Sidebar; diff --git a/web-app/src/components/UndoCard.tsx b/web-app/src/components/UndoCard.tsx new file mode 100644 index 0000000..d3c2254 --- /dev/null +++ b/web-app/src/components/UndoCard.tsx @@ -0,0 +1,193 @@ +// UndoCard.tsx +import React from 'react'; +import { Tooltip } from 'react-tooltip' + +interface ModificationProps { + ID: number; + DocumentID: number; + DateChanged: string; + ModField: string; + PreviousValue: string; + NewValue: string; + Undone: boolean; + UndoneDate: string | null; + onUndo: (id: number) => void; + paperlessUrl: string; +} + +const formatDate = (dateString: string | null): string => { + if (!dateString) return ''; + + try { + const date = new Date(dateString); + // Check if date is valid + if (isNaN(date.getTime())) { + return 'Invalid date'; + } + return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')} ${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}`; + } catch { + return 'Invalid date'; + } +}; + +const buildPaperlessUrl = (paperlessUrl: string, documentId: number): string => { + return `${paperlessUrl}/documents/${documentId}/details`; +}; + +const UndoCard: React.FC<ModificationProps> = ({ + ID, + DocumentID, + DateChanged, + ModField, + PreviousValue, + NewValue, + Undone, + UndoneDate, + onUndo, + paperlessUrl, +}) => { + const formatValue = (value: string, field: string) => { + if (field === 'tags') { + try { + const tags = JSON.parse(value) as string[]; + return ( + <div className="flex flex-wrap gap-1"> + {tags.map((tag) => ( + <span + key={tag} + className="bg-blue-100 dark:bg-blue-900 text-blue-800 dark:text-blue-200 text-xs font-medium px-2.5 py-0.5 rounded-full" + > + {tag} + </span> + ))} + </div> + ); + } catch { + return value; + } + } else if (field.toLowerCase().includes('date')) { + return formatDate(value); + } + return value; + }; + + return ( + <div className="relative bg-white dark:bg-gray-800 p-4 rounded-md shadow-md"> + <div className="grid grid-cols-6"> + <div className="col-span-5"> {/* Left content */} + <div className="grid grid-cols-3 gap-4 mb-4"> + <div className=""> + <div className="text-xs uppercase text-gray-500 dark:text-gray-400 font-semibold mb-1"> + Date Modified + </div> + <div className="text-sm text-gray-700 dark:text-gray-300"> + {DateChanged && formatDate(DateChanged)} + </div> + </div> + <div className=""> + <a + href={buildPaperlessUrl(paperlessUrl, DocumentID)} + target="_blank" + rel="noopener noreferrer" + className="text-blue-500 hover:text-blue-600 dark:text-blue-400 dark:hover:text-blue-300" + > + <div className="text-xs uppercase text-gray-500 dark:text-gray-400 font-semibold mb-1"> + Document ID + </div> + <div className="text-sm text-gray-700 dark:text-gray-300"> + {DocumentID} + </div> + </a> + </div> + + <div className=""> + <div className="text-xs uppercase text-gray-500 dark:text-gray-400 font-semibold mb-1"> + Modified Field + </div> + <div className="text-sm text-gray-700 dark:text-gray-300"> + {ModField} + </div> + </div> + </div> + <div className="mt-3"> + <div className="mt-2 space-y-2"> + <div className={`text-sm flex flex-nowrap ${Undone ? 'line-through' : ''}`}> + <span className="text-red-500 dark:text-red-400">Previous: </span> + <span + className="text-gray-600 dark:text-gray-300 truncate overflow-hidden flex-shrink-0 whitespace-nowrap flex-1 max-w-full group relative" + { // Add tooltip if value is too long and not tags + ...(ModField !== 'tags' && PreviousValue.length > 100 ? { + 'data-tooltip-id': `tooltip-${ID}-prev` + } : {})} + > + {formatValue(PreviousValue, ModField)} + </span> + </div> + <div className={`text-sm flex flex-nowrap ${Undone ? 'line-through' : ''}`}> + <span className="text-green-500 dark:text-green-400">New: </span> + <span + className="text-gray-600 dark:text-gray-300 truncate overflow-hidden flex-shrink-0 whitespace-nowrap flex-1 max-w-full group relative" + { // Add tooltip if value is too long and not tags + ...(ModField !== 'tags' && NewValue.length > 100 ? { + 'data-tooltip-id': `tooltip-${ID}-new` + } : {})} + > + {formatValue(NewValue, ModField)} + </span> + </div> + </div> + <Tooltip + id={`tooltip-${ID}-prev`} + place="bottom" + className="flex-wrap" + style={{ + flexWrap: 'wrap', + wordWrap: 'break-word', + zIndex: 10, + whiteSpace: 'pre-line', + textAlign: 'left', + }} + > + {PreviousValue} + </Tooltip> + <Tooltip + id={`tooltip-${ID}-new`} + place="bottom" + className="flex-wrap" + style={{ + flexWrap: 'wrap', + wordWrap: 'break-word', + zIndex: 10, + whiteSpace: 'pre-line', + textAlign: 'left', + }} + > + {NewValue} + </Tooltip> + </div> + </div> + <div className="grid place-items-center"> {/* Button content */} + <button + onClick={() => onUndo(ID)} + disabled={Undone} + className={`mt-2 mb-2 p-4 text-sm font-medium rounded-md min-w-[100px] max-w-[150px] text-center break-words ${Undone + ? 'bg-gray-300 dark:bg-gray-700 text-gray-500 dark:text-gray-400 cursor-not-allowed' + : 'bg-blue-500 dark:bg-blue-600 text-white hover:bg-blue-600 dark:hover:bg-blue-700' + } transition-colors duration-200`} + > + {Undone ? ( + <> + <span className="block text-xs">Undone on</span> + <span className="block text-xs">{formatDate(UndoneDate)}</span> + </> + ) : ( + 'Undo' + )} + </button> + </div> + </div> + </div> + ); +}; + +export default UndoCard; \ No newline at end of file From 13f31ddc3f383a8d177d3a3321dfd49f13db1817 Mon Sep 17 00:00:00 2001 From: Icereed <domi@icereed.net> Date: Fri, 20 Dec 2024 15:12:16 +0100 Subject: [PATCH 04/22] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ca10f02..04513bf 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [](LICENSE) [](https://hub.docker.com/r/icereed/paperless-gpt) -[](code_of_conduct.md) +[](CODE_OF_CONDUCT.md)  From bede39f6efc582579cd638787fc85920129c2eb2 Mon Sep 17 00:00:00 2001 From: Christoph Ruckstetter <github@oboe.email> Date: Fri, 3 Jan 2025 16:51:36 +0000 Subject: [PATCH 05/22] make listen interface configurable (#57) * make listen interface configurable * describe new listen interface setting in readme --- README.md | 2 ++ main.go | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 04513bf..79930c2 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ services: VISION_LLM_PROVIDER: 'ollama' # Optional (for OCR) - ollama or openai VISION_LLM_MODEL: 'minicpm-v' # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai LOG_LEVEL: 'info' # Optional or 'debug', 'warn', 'error' + LISTEN_INTERFACE: '127.0.0.1:8080' # Optional, default is ':8080' volumes: - ./prompts:/app/prompts # Mount the prompts directory ports: @@ -147,6 +148,7 @@ If you prefer to run the application manually: | `VISION_LLM_PROVIDER` | The vision LLM provider to use for OCR (`openai` or `ollama`). | No | | `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No | | `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No | +| `LISTEN_INTERFACE` | The interface paperless-gpt listens to. Default is `:8080` | No | **Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container. diff --git a/main.go b/main.go index 4448832..57f807f 100644 --- a/main.go +++ b/main.go @@ -37,6 +37,7 @@ var ( visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER") visionLlmModel = os.Getenv("VISION_LLM_MODEL") logLevel = strings.ToLower(os.Getenv("LOG_LEVEL")) + listenInterface = os.Getenv("LISTEN_INTERFACE") // Templates titleTemplate *template.Template @@ -200,8 +201,11 @@ func main() { numWorkers := 1 // Number of workers to start startWorkerPool(app, numWorkers) - log.Infoln("Server started on port :8080") - if err := router.Run(":8080"); err != nil { + if listenInterface == "" { + listenInterface = ":8080" + } + log.Infoln("Server started on interface", listenInterface) + if err := router.Run(listenInterface); err != nil { log.Fatalf("Failed to run server: %v", err) } } From 6226b8c898a18b6b22bc6b5eb88b9989bd799a75 Mon Sep 17 00:00:00 2001 From: Brian Torres-Gil <brian@ixi.us> Date: Mon, 6 Jan 2025 01:19:00 -0800 Subject: [PATCH 06/22] feat: Setting to disabled auto generation of title or tags (#60) --- README.md | 2 ++ main.go | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 79930c2..d0dbb6b 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,8 @@ If you prefer to run the application manually: | `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No | | `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No | | `LISTEN_INTERFACE` | The interface paperless-gpt listens to. Default is `:8080` | No | +| `AUTO_GENERATE_TITLE` | Enable/disable title generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No | +| `AUTO_GENERATE_TAGS` | Enable/disable tag generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No | **Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container. diff --git a/main.go b/main.go index 57f807f..56662be 100644 --- a/main.go +++ b/main.go @@ -38,6 +38,8 @@ var ( visionLlmModel = os.Getenv("VISION_LLM_MODEL") logLevel = strings.ToLower(os.Getenv("LOG_LEVEL")) listenInterface = os.Getenv("LISTEN_INTERFACE") + autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE") + autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS") // Templates titleTemplate *template.Template @@ -283,8 +285,8 @@ func (app *App) processAutoTagDocuments() (int, error) { suggestionRequest := GenerateSuggestionsRequest{ Documents: documents, - GenerateTitles: true, - GenerateTags: true, + GenerateTitles: strings.ToLower(autoGenerateTitle) != "false", + GenerateTags: strings.ToLower(autoGenerateTags) != "false", } suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest) From 3b1d876d1350b64ae0c45e68b102af3fdedb0bb2 Mon Sep 17 00:00:00 2001 From: Christoph Ruckstetter <github@oboe.email> Date: Mon, 6 Jan 2025 09:29:07 +0000 Subject: [PATCH 07/22] Allow setting of path to read static files from (#58) * feat: allow setting of path to load static files from * Describe WEBUI_PATH setting in readme --------- Co-authored-by: Icereed <domi@icereed.net> --- README.md | 2 ++ main.go | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d0dbb6b..4276357 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ services: VISION_LLM_MODEL: 'minicpm-v' # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai LOG_LEVEL: 'info' # Optional or 'debug', 'warn', 'error' LISTEN_INTERFACE: '127.0.0.1:8080' # Optional, default is ':8080' + WEBUI_PATH: '/usr/share/paperless-gpt/webui' # Optional, default is './web-app/dist' volumes: - ./prompts:/app/prompts # Mount the prompts directory ports: @@ -149,6 +150,7 @@ If you prefer to run the application manually: | `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No | | `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No | | `LISTEN_INTERFACE` | The interface paperless-gpt listens to. Default is `:8080` | No | +| `WEBUI_PATH` | The path to load static content from. Default is `./web-app/dist` | No | | `AUTO_GENERATE_TITLE` | Enable/disable title generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No | | `AUTO_GENERATE_TAGS` | Enable/disable tag generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No | diff --git a/main.go b/main.go index 56662be..91c86a7 100644 --- a/main.go +++ b/main.go @@ -38,6 +38,7 @@ var ( visionLlmModel = os.Getenv("VISION_LLM_MODEL") logLevel = strings.ToLower(os.Getenv("LOG_LEVEL")) listenInterface = os.Getenv("LISTEN_INTERFACE") + webuiPath = os.Getenv("WEBUI_PATH") autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE") autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS") @@ -190,13 +191,16 @@ func main() { }) } + if webuiPath == "" { + webuiPath = "./web-app/dist" + } // Serve static files for the frontend under /assets - router.StaticFS("/assets", gin.Dir("./web-app/dist/assets", true)) - router.StaticFile("/vite.svg", "./web-app/dist/vite.svg") + router.StaticFS("/assets", gin.Dir(webuiPath+"/assets", true)) + router.StaticFile("/vite.svg", webuiPath+"/vite.svg") // Catch-all route for serving the frontend router.NoRoute(func(c *gin.Context) { - c.File("./web-app/dist/index.html") + c.File(webuiPath + "/index.html") }) // Start OCR worker pool From 83029cad8d69fb27215dc5597d1b754b143dbd40 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 17:06:35 +0100 Subject: [PATCH 08/22] Add renovate.json (#62) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- renovate.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 renovate.json diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..5db72dd --- /dev/null +++ b/renovate.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "config:recommended" + ] +} From 7a96cb8430892d2b098f8377e262d689f361d21a Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 17:07:39 +0100 Subject: [PATCH 09/22] chore(deps): update dependency @types/node to v22.10.5 (#63) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- web-app/package-lock.json | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/web-app/package-lock.json b/web-app/package-lock.json index a342dba..56b379e 100644 --- a/web-app/package-lock.json +++ b/web-app/package-lock.json @@ -1351,10 +1351,11 @@ "dev": true }, "node_modules/@types/node": { - "version": "22.10.1", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz", - "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==", + "version": "22.10.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.5.tgz", + "integrity": "sha512-F8Q+SeGimwOo86fiovQh8qiXfFEh2/ocYv7tU5pJ3EXMSSxk1Joj5wefpFK2fHTf/N6HKGSxIDBT9f3gCxXPkQ==", "dev": true, + "license": "MIT", "dependencies": { "undici-types": "~6.20.0" } From f8d6928c0ec1a09845200b27e1cc2235d3fac860 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:15:02 +0000 Subject: [PATCH 10/22] chore(deps): update dependency @vitejs/plugin-react-swc to v3.7.2 (#64) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- web-app/package-lock.json | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/web-app/package-lock.json b/web-app/package-lock.json index 56b379e..3d7ba58 100644 --- a/web-app/package-lock.json +++ b/web-app/package-lock.json @@ -1609,15 +1609,16 @@ } }, "node_modules/@vitejs/plugin-react-swc": { - "version": "3.7.0", - "resolved": "https://registry.npmjs.org/@vitejs/plugin-react-swc/-/plugin-react-swc-3.7.0.tgz", - "integrity": "sha512-yrknSb3Dci6svCd/qhHqhFPDSw0QtjumcqdKMoNNzmOl5lMXTTiqzjWtG4Qask2HdvvzaNgSunbQGet8/GrKdA==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@vitejs/plugin-react-swc/-/plugin-react-swc-3.7.2.tgz", + "integrity": "sha512-y0byko2b2tSVVf5Gpng1eEhX1OvPC7x8yns1Fx8jDzlJp4LS6CMkCPfLw47cjyoMrshQDoQw4qcgjsU9VvlCew==", "dev": true, + "license": "MIT", "dependencies": { - "@swc/core": "^1.5.7" + "@swc/core": "^1.7.26" }, "peerDependencies": { - "vite": "^4 || ^5" + "vite": "^4 || ^5 || ^6" } }, "node_modules/acorn": { From 781823858a4bea93d24d0ce3c64a1a48aea6e922 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:28:39 +0000 Subject: [PATCH 11/22] fix(deps): update module github.com/stretchr/testify to v1.10.0 (#67) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index ddeb457..5e100c2 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/gin-gonic/gin v1.10.0 github.com/google/uuid v1.6.0 github.com/sirupsen/logrus v1.9.3 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.10.0 github.com/tmc/langchaingo v0.1.12 golang.org/x/sync v0.9.0 gorm.io/driver/sqlite v1.5.6 diff --git a/go.sum b/go.sum index f914d5b..48e1cef 100644 --- a/go.sum +++ b/go.sum @@ -101,6 +101,8 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tmc/langchaingo v0.1.12 h1:yXwSu54f3b1IKw0jJ5/DWu+qFVH1NBblwC0xddBzGJE= github.com/tmc/langchaingo v0.1.12/go.mod h1:cd62xD6h+ouk8k/QQFhOsjRYBSA1JJ5UVKXSIgm7Ni4= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= From 4eadb0e090950f79d424ae2e6cf4f67a9f8227e5 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:30:21 +0000 Subject: [PATCH 12/22] fix(deps): update module golang.org/x/sync to v0.10.0 (#68) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 5e100c2..fec1328 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.10.0 github.com/tmc/langchaingo v0.1.12 - golang.org/x/sync v0.9.0 + golang.org/x/sync v0.10.0 gorm.io/driver/sqlite v1.5.6 gorm.io/gorm v1.25.12 ) diff --git a/go.sum b/go.sum index 48e1cef..67a06d3 100644 --- a/go.sum +++ b/go.sum @@ -129,6 +129,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= From 71742e5f267167beb6331b2226b62a8b2afe516d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 17:31:56 +0100 Subject: [PATCH 13/22] fix(deps): update module github.com/masterminds/sprig/v3 to v3.3.0 (#66) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- go.mod | 19 ++++++++++--------- go.sum | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index fec1328..880f413 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.22.0 toolchain go1.22.2 require ( - github.com/Masterminds/sprig/v3 v3.2.3 + github.com/Masterminds/sprig/v3 v3.3.0 github.com/gen2brain/go-fitz v1.24.14 github.com/gin-gonic/gin v1.10.0 github.com/google/uuid v1.6.0 @@ -18,8 +18,9 @@ require ( ) require ( + dario.cat/mergo v1.0.1 // indirect github.com/Masterminds/goutils v1.1.1 // indirect - github.com/Masterminds/semver/v3 v3.2.0 // indirect + github.com/Masterminds/semver/v3 v3.3.0 // indirect github.com/bytedance/sonic v1.11.6 // indirect github.com/bytedance/sonic/loader v0.1.1 // indirect github.com/cloudwego/base64x v0.1.4 // indirect @@ -33,7 +34,7 @@ require ( github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.20.0 // indirect github.com/goccy/go-json v0.10.2 // indirect - github.com/huandu/xstrings v1.3.3 // indirect + github.com/huandu/xstrings v1.5.0 // indirect github.com/imdario/mergo v0.3.13 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect @@ -43,21 +44,21 @@ require ( github.com/leodido/go-urn v1.4.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-sqlite3 v1.14.24 // indirect - github.com/mitchellh/copystructure v1.0.0 // indirect - github.com/mitchellh/reflectwalk v1.0.0 // indirect + github.com/mitchellh/copystructure v1.2.0 // indirect + github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/pkoukk/tiktoken-go v0.1.6 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/shopspring/decimal v1.2.0 // indirect - github.com/spf13/cast v1.3.1 // indirect + github.com/shopspring/decimal v1.4.0 // indirect + github.com/spf13/cast v1.7.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.12 // indirect golang.org/x/arch v0.8.0 // indirect - golang.org/x/crypto v0.23.0 // indirect + golang.org/x/crypto v0.26.0 // indirect golang.org/x/net v0.25.0 // indirect - golang.org/x/sys v0.20.0 // indirect + golang.org/x/sys v0.23.0 // indirect golang.org/x/text v0.20.0 // indirect google.golang.org/protobuf v1.34.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 67a06d3..945e689 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,15 @@ +dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= +dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= +github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= +github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA= github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM= +github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs= +github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0= github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= @@ -45,6 +51,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4= github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= +github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= +github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk= github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg= @@ -68,8 +76,12 @@ github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBW github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ= github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= +github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= +github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY= github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= +github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -83,10 +95,14 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= +github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= +github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng= github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w= +github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -118,6 +134,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= +golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= @@ -142,6 +160,8 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= +golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= From c7539c7229054bed3a7bee0b94df81a3f4d10e92 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 21:54:22 +0100 Subject: [PATCH 14/22] chore(deps): update actions/checkout action to v4 (#72) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- .github/workflows/docker-build-and-push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-build-and-push.yml b/.github/workflows/docker-build-and-push.yml index f4dc2a3..1db54df 100644 --- a/.github/workflows/docker-build-and-push.yml +++ b/.github/workflows/docker-build-and-push.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v4 @@ -60,7 +60,7 @@ jobs: needs: test steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 From b12a96c6a641eea198a3a20b1432ba2448523a8a Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 21:54:39 +0100 Subject: [PATCH 15/22] chore(deps): update dependency postcss to v8.4.49 (#71) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- web-app/package-lock.json | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/web-app/package-lock.json b/web-app/package-lock.json index 3d7ba58..f5207ee 100644 --- a/web-app/package-lock.json +++ b/web-app/package-lock.json @@ -3212,10 +3212,11 @@ } }, "node_modules/picocolors": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.0.tgz", - "integrity": "sha512-TQ92mBOW0l3LeMeyLV6mzy/kWr8lkd/hp3mTg7wYK7zJhuBStmGMBG0BdeDZS/dZx1IukaX6Bk11zcln25o1Aw==", - "dev": true + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true, + "license": "ISC" }, "node_modules/picomatch": { "version": "2.3.1", @@ -3248,9 +3249,9 @@ } }, "node_modules/postcss": { - "version": "8.4.47", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.47.tgz", - "integrity": "sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==", + "version": "8.4.49", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.49.tgz", + "integrity": "sha512-OCVPnIObs4N29kxTjzLfUryOkvZEq+pf8jTF0lg8E7uETuWHA+v7j3c/xJmiqpX450191LlmZfUKkXxkTry7nA==", "dev": true, "funding": [ { @@ -3266,9 +3267,10 @@ "url": "https://github.com/sponsors/ai" } ], + "license": "MIT", "dependencies": { "nanoid": "^3.3.7", - "picocolors": "^1.1.0", + "picocolors": "^1.1.1", "source-map-js": "^1.2.1" }, "engines": { From 0ff367bad0f5b04a24940128e34d1958595998ca Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 21:54:49 +0100 Subject: [PATCH 16/22] chore(deps): update actions/setup-go action to v5 (#73) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- .github/workflows/docker-build-and-push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-build-and-push.yml b/.github/workflows/docker-build-and-push.yml index 1db54df..361f876 100644 --- a/.github/workflows/docker-build-and-push.yml +++ b/.github/workflows/docker-build-and-push.yml @@ -18,7 +18,7 @@ jobs: uses: actions/checkout@v4 - name: Set up Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: 1.22 From 99ad4883e8d62bff601bfec5e413f382eeb81c75 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 21:55:00 +0100 Subject: [PATCH 17/22] chore(deps): update dependency eslint-plugin-react-refresh to v0.4.16 (#70) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- web-app/package-lock.json | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/web-app/package-lock.json b/web-app/package-lock.json index f5207ee..f5702ff 100644 --- a/web-app/package-lock.json +++ b/web-app/package-lock.json @@ -2266,12 +2266,13 @@ } }, "node_modules/eslint-plugin-react-refresh": { - "version": "0.4.12", - "resolved": "https://registry.npmjs.org/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.12.tgz", - "integrity": "sha512-9neVjoGv20FwYtCP6CB1dzR1vr57ZDNOXst21wd2xJ/cTlM2xLq0GWVlSNTdMn/4BtP6cHYBMCSp1wFBJ9jBsg==", + "version": "0.4.16", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.16.tgz", + "integrity": "sha512-slterMlxAhov/DZO8NScf6mEeMBBXodFUolijDvrtTxyezyLoTQaa73FyYus/VbTdftd8wBgBxPMRk3poleXNQ==", "dev": true, + "license": "MIT", "peerDependencies": { - "eslint": ">=7" + "eslint": ">=8.40" } }, "node_modules/eslint-scope": { From 32f83ec93f969ef930a55e31b752611a8262c5c8 Mon Sep 17 00:00:00 2001 From: Icereed <domi@icereed.net> Date: Mon, 6 Jan 2025 23:03:41 +0100 Subject: [PATCH 18/22] feat: add support automatic OCR (#75) --- README.md | 6 ++++ jobs.go | 33 ++---------------- main.go | 94 ++++++++++++++++++++++++++++++++++++++++++++++++---- ocr.go | 39 ++++++++++++++++++++++ paperless.go | 1 + 5 files changed, 136 insertions(+), 37 deletions(-) create mode 100644 ocr.go diff --git a/README.md b/README.md index 4276357..77ff36a 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,8 @@ services: PAPERLESS_BASE_URL: 'http://paperless-ngx:8000' PAPERLESS_API_TOKEN: 'your_paperless_api_token' PAPERLESS_PUBLIC_URL: 'http://paperless.mydomain.com' # Optional, your public link to access Paperless + MANUAL_TAG: 'paperless-gpt' # Optional, default is 'paperless-gpt' + AUTO_TAG: 'paperless-gpt-auto' # Optional, default is 'paperless-gpt-auto' LLM_PROVIDER: 'openai' # or 'ollama' LLM_MODEL: 'gpt-4o' # or 'llama2' OPENAI_API_KEY: 'your_openai_api_key' # Required if using OpenAI @@ -78,6 +80,7 @@ services: OLLAMA_HOST: 'http://host.docker.internal:11434' # If using Ollama VISION_LLM_PROVIDER: 'ollama' # Optional (for OCR) - ollama or openai VISION_LLM_MODEL: 'minicpm-v' # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai + AUTO_OCR_TAG: 'paperless-gpt-ocr-auto' # Optional, default is 'paperless-gpt-ocr-auto' LOG_LEVEL: 'info' # Optional or 'debug', 'warn', 'error' LISTEN_INTERFACE: '127.0.0.1:8080' # Optional, default is ':8080' WEBUI_PATH: '/usr/share/paperless-gpt/webui' # Optional, default is './web-app/dist' @@ -141,6 +144,8 @@ If you prefer to run the application manually: | `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes | | `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes | | `PAPERLESS_PUBLIC_URL` | The public URL for your Paperless instance, if it is different to your `PAPERLESS_BASE_URL` - say if you are running in Docker Compose | No | +| `MANUAL_TAG` | The tag to use for manually processing documents. Default is `paperless-gpt`. | No | +| `AUTO_TAG` | The tag to use for automatically processing documents. Default is `paperless-gpt-auto`. | No | | `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes | | `LLM_MODEL` | The model name to use (e.g., `gpt-4o`, `gpt-3.5-turbo`, `llama2`). | Yes | | `OPENAI_API_KEY` | Your OpenAI API key. Required if using OpenAI as the LLM provider. | Cond. | @@ -148,6 +153,7 @@ If you prefer to run the application manually: | `OLLAMA_HOST` | The URL of the Ollama server (e.g., `http://host.docker.internal:11434`). Useful if using Ollama. Default is `http://127.0.0.1:11434`. | No | | `VISION_LLM_PROVIDER` | The vision LLM provider to use for OCR (`openai` or `ollama`). | No | | `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No | +| `AUTO_OCR_TAG` | The tag to use for automatically processing documents with OCR. Default is `paperless-gpt-ocr-auto`. | No | | `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No | | `LISTEN_INTERFACE` | The interface paperless-gpt listens to. Default is `:8080` | No | | `WEBUI_PATH` | The path to load static content from. Default is `./web-app/dist` | No | diff --git a/jobs.go b/jobs.go index bc58b82..7b21876 100644 --- a/jobs.go +++ b/jobs.go @@ -2,10 +2,8 @@ package main import ( "context" - "fmt" "os" "sort" - "strings" "sync" "time" @@ -125,38 +123,13 @@ func processJob(app *App, job *Job) { ctx := context.Background() - // Download images of the document - imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, job.DocumentID) + fullOcrText, err := app.ProcessDocumentOCR(ctx, job.DocumentID) if err != nil { - logger.Infof("Error downloading document images for job %s: %v", job.ID, err) - jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error downloading document images: %v", err)) + logger.Errorf("Error processing document OCR for job %s: %v", job.ID, err) + jobStore.updateJobStatus(job.ID, "failed", err.Error()) return } - var ocrTexts []string - for i, imagePath := range imagePaths { - imageContent, err := os.ReadFile(imagePath) - if err != nil { - logger.Errorf("Error reading image file for job %s: %v", job.ID, err) - jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error reading image file: %v", err)) - return - } - - ocrText, err := app.doOCRViaLLM(ctx, imageContent) - if err != nil { - logger.Errorf("Error performing OCR for job %s: %v", job.ID, err) - jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error performing OCR: %v", err)) - return - } - - ocrTexts = append(ocrTexts, ocrText) - jobStore.updatePagesDone(job.ID, i+1) // Update PagesDone after each page is processed - } - - // Combine the OCR texts - fullOcrText := strings.Join(ocrTexts, "\n\n") - - // Update job status and result jobStore.updateJobStatus(job.ID, "completed", fullOcrText) logger.Infof("Job completed: %s", job.ID) } diff --git a/main.go b/main.go index 91c86a7..b795ad8 100644 --- a/main.go +++ b/main.go @@ -30,8 +30,10 @@ var ( paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL") paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN") openaiAPIKey = os.Getenv("OPENAI_API_KEY") - manualTag = "paperless-gpt" - autoTag = "paperless-gpt-auto" + manualTag = os.Getenv("MANUAL_TAG") + autoTag = os.Getenv("AUTO_TAG") + manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet + autoOcrTag = os.Getenv("AUTO_OCR_TAG") llmProvider = os.Getenv("LLM_PROVIDER") llmModel = os.Getenv("LLM_MODEL") visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER") @@ -72,7 +74,7 @@ Please concisely select the {{.Language}} tags from the list above that best des Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable. ` - defaultOcrPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format.` + defaultOcrPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format but without a code block.` ) // App struct to hold dependencies @@ -85,7 +87,7 @@ type App struct { func main() { // Validate Environment Variables - validateEnvVars() + validateOrDefaultEnvVars() // Initialize logrus logger initLogger() @@ -127,7 +129,23 @@ func main() { backoffDuration := minBackoffDuration for { - processedCount, err := app.processAutoTagDocuments() + processedCount, err := func() (int, error) { + count := 0 + if isOcrEnabled() { + ocrCount, err := app.processAutoOcrTagDocuments() + if err != nil { + return 0, fmt.Errorf("error in processAutoOcrTagDocuments: %w", err) + } + count += ocrCount + } + autoCount, err := app.processAutoTagDocuments() + if err != nil { + return 0, fmt.Errorf("error in processAutoTagDocuments: %w", err) + } + count += autoCount + return count, nil + }() + if err != nil { log.Errorf("Error in processAutoTagDocuments: %v", err) time.Sleep(backoffDuration) @@ -242,8 +260,32 @@ func isOcrEnabled() bool { return visionLlmModel != "" && visionLlmProvider != "" } -// validateEnvVars ensures all necessary environment variables are set -func validateEnvVars() { +// validateOrDefaultEnvVars ensures all necessary environment variables are set +func validateOrDefaultEnvVars() { + if manualTag == "" { + manualTag = "paperless-gpt" + } + fmt.Printf("Using %s as manual tag\n", manualTag) + + if autoTag == "" { + autoTag = "paperless-gpt-auto" + } + fmt.Printf("Using %s as auto tag\n", autoTag) + + if manualOcrTag == "" { + manualOcrTag = "paperless-gpt-ocr" + } + if isOcrEnabled() { + fmt.Printf("Using %s as manual OCR tag\n", manualOcrTag) + } + + if autoOcrTag == "" { + autoOcrTag = "paperless-gpt-ocr-auto" + } + if isOcrEnabled() { + fmt.Printf("Using %s as auto OCR tag\n", autoOcrTag) + } + if paperlessBaseURL == "" { log.Fatal("Please set the PAPERLESS_BASE_URL environment variable.") } @@ -306,6 +348,44 @@ func (app *App) processAutoTagDocuments() (int, error) { return len(documents), nil } +// processAutoOcrTagDocuments handles the background auto-tagging of OCR documents +func (app *App) processAutoOcrTagDocuments() (int, error) { + ctx := context.Background() + + documents, err := app.Client.GetDocumentsByTags(ctx, []string{autoOcrTag}) + if err != nil { + return 0, fmt.Errorf("error fetching documents with autoOcrTag: %w", err) + } + + if len(documents) == 0 { + log.Debugf("No documents with tag %s found", autoOcrTag) + return 0, nil // No documents to process + } + + log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag) + + documents = documents[:1] // Process only one document at a time + + ocrContent, err := app.ProcessDocumentOCR(ctx, documents[0].ID) + if err != nil { + return 0, fmt.Errorf("error processing document OCR: %w", err) + } + log.Debugf("OCR content for document %d: %s", documents[0].ID, ocrContent) + + err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{ + { + ID: documents[0].ID, + OriginalDocument: documents[0], + SuggestedContent: ocrContent, + }, + }, app.Database, false) + if err != nil { + return 0, fmt.Errorf("error updating documents: %w", err) + } + + return 1, nil // Processed one document +} + // removeTagFromList removes a specific tag from a list of tags func removeTagFromList(tags []string, tagToRemove string) []string { filteredTags := []string{} diff --git a/ocr.go b/ocr.go new file mode 100644 index 0000000..ca8ed28 --- /dev/null +++ b/ocr.go @@ -0,0 +1,39 @@ +package main + +import ( + "context" + "fmt" + "os" + "strings" +) + +// ProcessDocumentOCR processes a document through OCR and returns the combined text +func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string, error) { + imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, documentID) + defer func() { + for _, imagePath := range imagePaths { + os.Remove(imagePath) + } + }() + if err != nil { + return "", fmt.Errorf("error downloading document images: %w", err) + } + + var ocrTexts []string + for _, imagePath := range imagePaths { + imageContent, err := os.ReadFile(imagePath) + if err != nil { + return "", fmt.Errorf("error reading image file: %w", err) + } + + ocrText, err := app.doOCRViaLLM(ctx, imageContent) + if err != nil { + return "", fmt.Errorf("error performing OCR: %w", err) + } + log.Debugf("OCR text: %s", ocrText) + + ocrTexts = append(ocrTexts, ocrText) + } + + return strings.Join(ocrTexts, "\n\n"), nil +} diff --git a/paperless.go b/paperless.go index 6cff47c..a96fc1e 100644 --- a/paperless.go +++ b/paperless.go @@ -273,6 +273,7 @@ func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []Docum // remove autoTag to prevent infinite loop (even if it is in the original tags) originalTags = removeTagFromList(originalTags, autoTag) + originalTags = removeTagFromList(originalTags, autoOcrTag) if len(tags) == 0 { tags = originalTags From 0a98135286589f7ae203585b178ba76d12ab1fe2 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 07:00:53 +0100 Subject: [PATCH 19/22] chore(deps): update dependency globals to v15.14.0 (#82) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- web-app/package-lock.json | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/web-app/package-lock.json b/web-app/package-lock.json index f5702ff..0526981 100644 --- a/web-app/package-lock.json +++ b/web-app/package-lock.json @@ -2627,10 +2627,11 @@ } }, "node_modules/globals": { - "version": "15.9.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-15.9.0.tgz", - "integrity": "sha512-SmSKyLLKFbSr6rptvP8izbyxJL4ILwqO9Jg23UA0sDlGlu58V59D1//I3vlc0KJphVdUR7vMjHIplYnzBxorQA==", + "version": "15.14.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-15.14.0.tgz", + "integrity": "sha512-OkToC372DtlQeje9/zHIo5CT8lRP/FUgEOKBEhU4e0abL7J7CD24fD9ohiLN5hagG/kWCYj4K5oaxxtj2Z0Dig==", "dev": true, + "license": "MIT", "engines": { "node": ">=18" }, From f0dbc709ed16b19fb9d41ecc2a0dd50d880c9e06 Mon Sep 17 00:00:00 2001 From: Icereed <domi@icereed.net> Date: Tue, 7 Jan 2025 09:25:02 +0100 Subject: [PATCH 20/22] Update README.md (#85) --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 77ff36a..e83bf12 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,6 @@ services: AUTO_OCR_TAG: 'paperless-gpt-ocr-auto' # Optional, default is 'paperless-gpt-ocr-auto' LOG_LEVEL: 'info' # Optional or 'debug', 'warn', 'error' LISTEN_INTERFACE: '127.0.0.1:8080' # Optional, default is ':8080' - WEBUI_PATH: '/usr/share/paperless-gpt/webui' # Optional, default is './web-app/dist' volumes: - ./prompts:/app/prompts # Mount the prompts directory ports: From 0b8d5833603c6b62576b9f1f8632a0c03ac0738e Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 09:28:06 +0100 Subject: [PATCH 21/22] chore(deps): update dependency typescript to v5.7.2 (#84) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- web-app/package-lock.json | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/web-app/package-lock.json b/web-app/package-lock.json index 0526981..fe204d6 100644 --- a/web-app/package-lock.json +++ b/web-app/package-lock.json @@ -4068,10 +4068,11 @@ } }, "node_modules/typescript": { - "version": "5.6.2", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz", - "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==", + "version": "5.7.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz", + "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==", "dev": true, + "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" From 7568074a7371f7ec8fa44b602d568986f749d771 Mon Sep 17 00:00:00 2001 From: Icereed <domi@icereed.net> Date: Tue, 7 Jan 2025 14:36:26 +0100 Subject: [PATCH 22/22] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index e83bf12..ce26854 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,6 @@ services: VISION_LLM_MODEL: 'minicpm-v' # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai AUTO_OCR_TAG: 'paperless-gpt-ocr-auto' # Optional, default is 'paperless-gpt-ocr-auto' LOG_LEVEL: 'info' # Optional or 'debug', 'warn', 'error' - LISTEN_INTERFACE: '127.0.0.1:8080' # Optional, default is ':8080' volumes: - ./prompts:/app/prompts # Mount the prompts directory ports: