diff --git a/.github/workflows/docker-build-and-push.yml b/.github/workflows/docker-build-and-push.yml index f4dc2a3..361f876 100644 --- a/.github/workflows/docker-build-and-push.yml +++ b/.github/workflows/docker-build-and-push.yml @@ -15,10 +15,10 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: 1.22 @@ -60,7 +60,7 @@ jobs: needs: test steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 diff --git a/.gitignore b/.gitignore index 296cc9f..e2c91ba 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .DS_Store prompts/ tests/tmp -tmp/ \ No newline at end of file +tmp/ +db/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index b83b642..92e32c9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,11 +18,14 @@ COPY go.mod go.sum ./ # Download dependencies RUN go mod download -# Copy the rest of the application code -COPY . . +# Pre-compile go-sqlite3 to avoid doing this every time +RUN CGO_ENABLED=1 go build -tags musl -o /dev/null github.com/mattn/go-sqlite3 -# Build the Go binary with the musl build tag -RUN go build -tags musl -o paperless-gpt . +# Now copy the actual source files +COPY *.go . + +# Build the binary using caching for both go modules and build cache +RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt . # Stage 2: Build Vite frontend FROM node:20-alpine AS frontend diff --git a/README.md b/README.md index f81241b..c45de32 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![License](https://img.shields.io/github/license/icereed/paperless-gpt)](LICENSE) [![Docker Pulls](https://img.shields.io/docker/pulls/icereed/paperless-gpt)](https://hub.docker.com/r/icereed/paperless-gpt) -[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](code_of_conduct.md) +[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](CODE_OF_CONDUCT.md) ![Screenshot](./paperless-gpt-screenshot.png) @@ -59,7 +59,7 @@ The easiest way to get started is by using Docker Compose. Below is an example `docker-compose.yml` file to set up paperless-gpt alongside paperless-ngx. ```yaml -version: '3.7' +version: "3.7" services: paperless-ngx: image: ghcr.io/paperless-ngx/paperless-ngx:latest @@ -68,20 +68,24 @@ services: paperless-gpt: image: icereed/paperless-gpt:latest environment: - PAPERLESS_BASE_URL: 'http://paperless-ngx:8000' - PAPERLESS_API_TOKEN: 'your_paperless_api_token' - LLM_PROVIDER: 'openai' # or 'ollama' - LLM_MODEL: 'gpt-4o' # or 'llama2' - OPENAI_API_KEY: 'your_openai_api_key' # Required if using OpenAI - LLM_LANGUAGE: 'English' # Optional, default is 'English' - OLLAMA_HOST: 'http://host.docker.internal:11434' # If using Ollama - VISION_LLM_PROVIDER: 'ollama' # Optional, for OCR - VISION_LLM_MODEL: 'minicpm-v' # Optional, for OCR - LOG_LEVEL: 'info' # Optional or 'debug', 'warn', 'error' + PAPERLESS_BASE_URL: "http://paperless-ngx:8000" + PAPERLESS_API_TOKEN: "your_paperless_api_token" + PAPERLESS_PUBLIC_URL: "http://paperless.mydomain.com" # Optional, your public link to access Paperless + MANUAL_TAG: "paperless-gpt" # Optional, default is 'paperless-gpt' + AUTO_TAG: "paperless-gpt-auto" # Optional, default is 'paperless-gpt-auto' + LLM_PROVIDER: "openai" # or 'ollama' + LLM_MODEL: "gpt-4o" # or 'llama2' + OPENAI_API_KEY: "your_openai_api_key" # Required if using OpenAI + LLM_LANGUAGE: "English" # Optional, default is 'English' + OLLAMA_HOST: "http://host.docker.internal:11434" # If using Ollama + VISION_LLM_PROVIDER: "ollama" # Optional (for OCR) - ollama or openai + VISION_LLM_MODEL: "minicpm-v" # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai + AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default is 'paperless-gpt-ocr-auto' + LOG_LEVEL: "info" # Optional or 'debug', 'warn', 'error' volumes: - ./prompts:/app/prompts # Mount the prompts directory ports: - - '8080:8080' + - "8080:8080" depends_on: - paperless-ngx ``` @@ -134,9 +138,12 @@ If you prefer to run the application manually: ### Environment Variables | Variable | Description | Required | -|----------------------------|----------------------------------------------------------------------------------------------------------------------------------------|----------| +| -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | -------- | | `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes | | `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes | +| `PAPERLESS_PUBLIC_URL` | The public URL for your Paperless instance, if it is different to your `PAPERLESS_BASE_URL` - say if you are running in Docker Compose | No | +| `MANUAL_TAG` | The tag to use for manually processing documents. Default is `paperless-gpt`. | No | +| `AUTO_TAG` | The tag to use for automatically processing documents. Default is `paperless-gpt-auto`. | No | | `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes | | `LLM_MODEL` | The model name to use (e.g., `gpt-4o`, `gpt-3.5-turbo`, `llama2`). | Yes | | `OPENAI_API_KEY` | Your OpenAI API key. Required if using OpenAI as the LLM provider. | Cond. | @@ -144,7 +151,12 @@ If you prefer to run the application manually: | `OLLAMA_HOST` | The URL of the Ollama server (e.g., `http://host.docker.internal:11434`). Useful if using Ollama. Default is `http://127.0.0.1:11434`. | No | | `VISION_LLM_PROVIDER` | The vision LLM provider to use for OCR (`openai` or `ollama`). | No | | `VISION_LLM_MODEL` | The model name to use for OCR (e.g., `minicpm-v`). | No | +| `AUTO_OCR_TAG` | The tag to use for automatically processing documents with OCR. Default is `paperless-gpt-ocr-auto`. | No | | `LOG_LEVEL` | The log level for the application (`info`, `debug`, `warn`, `error`). Default is `info`. | No | +| `LISTEN_INTERFACE` | The interface paperless-gpt listens to. Default is `:8080` | No | +| `WEBUI_PATH` | The path to load static content from. Default is `./web-app/dist` | No | +| `AUTO_GENERATE_TITLE` | Enable/disable title generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No | +| `AUTO_GENERATE_TAGS` | Enable/disable tag generation when automatically applying suggestions with `paperless-gpt-auto`. Default is `true` | No | | `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`. | No | **Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container. diff --git a/app_http_handlers.go b/app_http_handlers.go index 5ca600b..6fff276 100644 --- a/app_http_handlers.go +++ b/app_http_handlers.go @@ -1,6 +1,7 @@ package main import ( + "encoding/json" "fmt" "net/http" "os" @@ -138,7 +139,7 @@ func (app *App) updateDocumentsHandler(c *gin.Context) { return } - err := app.Client.UpdateDocuments(ctx, documents) + err := app.Client.UpdateDocuments(ctx, documents, app.Database, false) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error updating documents: %v", err)}) log.Errorf("Error updating documents: %v", err) @@ -237,8 +238,94 @@ func (app *App) getDocumentHandler() gin.HandlerFunc { document, err := app.Client.GetDocument(c, parsedID) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + log.Errorf("Error fetching document: %v", err) return } c.JSON(http.StatusOK, document) } } + +// Section for local-db actions + +func (app *App) getModificationHistoryHandler(c *gin.Context) { + modifications, err := GetAllModifications(app.Database) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve modification history"}) + log.Errorf("Failed to retrieve modification history: %v", err) + return + } + c.JSON(http.StatusOK, modifications) +} + +func (app *App) undoModificationHandler(c *gin.Context) { + id := c.Param("id") + modID, err := strconv.Atoi(id) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid modification ID"}) + log.Errorf("Invalid modification ID: %v", err) + return + } + + modification, err := GetModification(app.Database, uint(modID)) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve modification"}) + log.Errorf("Failed to retrieve modification: %v", err) + return + } + + if modification.Undone { + c.JSON(http.StatusBadRequest, gin.H{"error": "Modification has already been undone"}) + log.Errorf("Modification has already been undone: %v", id) + return + } + + // Ok, we're actually doing the update: + ctx := c.Request.Context() + + // Make the document suggestions for UpdateDocuments + var suggestion DocumentSuggestion + suggestion.ID = int(modification.DocumentID) + suggestion.OriginalDocument, err = app.Client.GetDocument(ctx, int(modification.DocumentID)) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve original document"}) + log.Errorf("Failed to retrieve original document: %v", err) + return + } + switch modification.ModField { + case "title": + suggestion.SuggestedTitle = modification.PreviousValue + case "tags": + var tags []string + err := json.Unmarshal([]byte(modification.PreviousValue), &tags) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to unmarshal previous tags"}) + log.Errorf("Failed to unmarshal previous tags: %v", err) + return + } + suggestion.SuggestedTags = tags + case "content": + suggestion.SuggestedContent = modification.PreviousValue + default: + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid modification field"}) + log.Errorf("Invalid modification field: %v", modification.ModField) + return + } + + // Update the document + err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{suggestion}, app.Database, true) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update document"}) + log.Errorf("Failed to update document: %v", err) + return + } + + // Successful, so set modification as undone + err = SetModificationUndone(app.Database, modification) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to mark modification as undone"}) + return + } + + // Else all was ok + c.Status(http.StatusOK) +} diff --git a/app_llm.go b/app_llm.go index ec2cb59..7fae692 100644 --- a/app_llm.go +++ b/app_llm.go @@ -3,6 +3,7 @@ package main import ( "bytes" "context" + "encoding/base64" "fmt" "strings" "sync" @@ -121,14 +122,26 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro prompt := promptBuffer.String() + // If not OpenAI then use binary part for image, otherwise, use the ImageURL part with encoding from https://platform.openai.com/docs/guides/vision + var parts []llms.ContentPart + if strings.ToLower(visionLlmProvider) != "openai" { + parts = []llms.ContentPart{ + llms.BinaryPart("image/jpeg", jpegBytes), + llms.TextPart(prompt), + } + } else { + base64Image := base64.StdEncoding.EncodeToString(jpegBytes) + parts = []llms.ContentPart{ + llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)), + llms.TextPart(prompt), + } + } + // Convert the image to text completion, err := app.VisionLLM.GenerateContent(ctx, []llms.MessageContent{ { - Parts: []llms.ContentPart{ - llms.BinaryPart("image/jpeg", jpegBytes), - llms.TextPart(prompt), - }, - Role: llms.ChatMessageTypeHuman, + Parts: parts, + Role: llms.ChatMessageTypeHuman, }, }) if err != nil { diff --git a/go.mod b/go.mod index babd910..880f413 100644 --- a/go.mod +++ b/go.mod @@ -5,19 +5,22 @@ go 1.22.0 toolchain go1.22.2 require ( - github.com/Masterminds/sprig/v3 v3.2.3 + github.com/Masterminds/sprig/v3 v3.3.0 github.com/gen2brain/go-fitz v1.24.14 github.com/gin-gonic/gin v1.10.0 github.com/google/uuid v1.6.0 github.com/sirupsen/logrus v1.9.3 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.10.0 github.com/tmc/langchaingo v0.1.12 - golang.org/x/sync v0.7.0 + golang.org/x/sync v0.10.0 + gorm.io/driver/sqlite v1.5.6 + gorm.io/gorm v1.25.12 ) require ( + dario.cat/mergo v1.0.1 // indirect github.com/Masterminds/goutils v1.1.1 // indirect - github.com/Masterminds/semver/v3 v3.2.0 // indirect + github.com/Masterminds/semver/v3 v3.3.0 // indirect github.com/bytedance/sonic v1.11.6 // indirect github.com/bytedance/sonic/loader v0.1.1 // indirect github.com/cloudwego/base64x v0.1.4 // indirect @@ -31,29 +34,32 @@ require ( github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.20.0 // indirect github.com/goccy/go-json v0.10.2 // indirect - github.com/huandu/xstrings v1.3.3 // indirect + github.com/huandu/xstrings v1.5.0 // indirect github.com/imdario/mergo v0.3.13 // indirect + github.com/jinzhu/inflection v1.0.0 // indirect + github.com/jinzhu/now v1.1.5 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/jupiterrider/ffi v0.2.0 // indirect github.com/klauspost/cpuid/v2 v2.2.7 // indirect github.com/leodido/go-urn v1.4.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect - github.com/mitchellh/copystructure v1.0.0 // indirect - github.com/mitchellh/reflectwalk v1.0.0 // indirect + github.com/mattn/go-sqlite3 v1.14.24 // indirect + github.com/mitchellh/copystructure v1.2.0 // indirect + github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/pkoukk/tiktoken-go v0.1.6 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/shopspring/decimal v1.2.0 // indirect - github.com/spf13/cast v1.3.1 // indirect + github.com/shopspring/decimal v1.4.0 // indirect + github.com/spf13/cast v1.7.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.12 // indirect golang.org/x/arch v0.8.0 // indirect - golang.org/x/crypto v0.23.0 // indirect + golang.org/x/crypto v0.26.0 // indirect golang.org/x/net v0.25.0 // indirect - golang.org/x/sys v0.20.0 // indirect - golang.org/x/text v0.15.0 // indirect + golang.org/x/sys v0.23.0 // indirect + golang.org/x/text v0.20.0 // indirect google.golang.org/protobuf v1.34.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 76d584c..945e689 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,15 @@ +dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= +dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= +github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= +github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA= github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM= +github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs= +github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0= github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= @@ -45,9 +51,15 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4= github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= +github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= +github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk= github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jupiterrider/ffi v0.2.0 h1:tMM70PexgYNmV+WyaYhJgCvQAvtTCs3wXeILPutihnA= @@ -60,10 +72,16 @@ github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM= +github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ= github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= +github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= +github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY= github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= +github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -77,10 +95,14 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= +github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= +github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng= github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w= +github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -95,6 +117,8 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tmc/langchaingo v0.1.12 h1:yXwSu54f3b1IKw0jJ5/DWu+qFVH1NBblwC0xddBzGJE= github.com/tmc/langchaingo v0.1.12/go.mod h1:cd62xD6h+ouk8k/QQFhOsjRYBSA1JJ5UVKXSIgm7Ni4= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= @@ -110,6 +134,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= +golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= @@ -119,8 +145,10 @@ golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -132,6 +160,8 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= +golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= @@ -139,8 +169,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= -golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= @@ -157,6 +187,10 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gorm.io/driver/sqlite v1.5.6 h1:fO/X46qn5NUEEOZtnjJRWRzZMe8nqJiQ9E+0hi+hKQE= +gorm.io/driver/sqlite v1.5.6/go.mod h1:U+J8craQU6Fzkcvu8oLeAQmi50TkwPEhHDEjQZXDah4= +gorm.io/gorm v1.25.12 h1:I0u8i2hWQItBq1WfE0o2+WuL9+8L21K9e2HHSTE/0f8= +gorm.io/gorm v1.25.12/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ= nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= diff --git a/jobs.go b/jobs.go index bc58b82..7b21876 100644 --- a/jobs.go +++ b/jobs.go @@ -2,10 +2,8 @@ package main import ( "context" - "fmt" "os" "sort" - "strings" "sync" "time" @@ -125,38 +123,13 @@ func processJob(app *App, job *Job) { ctx := context.Background() - // Download images of the document - imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, job.DocumentID) + fullOcrText, err := app.ProcessDocumentOCR(ctx, job.DocumentID) if err != nil { - logger.Infof("Error downloading document images for job %s: %v", job.ID, err) - jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error downloading document images: %v", err)) + logger.Errorf("Error processing document OCR for job %s: %v", job.ID, err) + jobStore.updateJobStatus(job.ID, "failed", err.Error()) return } - var ocrTexts []string - for i, imagePath := range imagePaths { - imageContent, err := os.ReadFile(imagePath) - if err != nil { - logger.Errorf("Error reading image file for job %s: %v", job.ID, err) - jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error reading image file: %v", err)) - return - } - - ocrText, err := app.doOCRViaLLM(ctx, imageContent) - if err != nil { - logger.Errorf("Error performing OCR for job %s: %v", job.ID, err) - jobStore.updateJobStatus(job.ID, "failed", fmt.Sprintf("Error performing OCR: %v", err)) - return - } - - ocrTexts = append(ocrTexts, ocrText) - jobStore.updatePagesDone(job.ID, i+1) // Update PagesDone after each page is processed - } - - // Combine the OCR texts - fullOcrText := strings.Join(ocrTexts, "\n\n") - - // Update job status and result jobStore.updateJobStatus(job.ID, "completed", fullOcrText) logger.Infof("Job completed: %s", job.ID) } diff --git a/local_db.go b/local_db.go new file mode 100644 index 0000000..931d7fb --- /dev/null +++ b/local_db.go @@ -0,0 +1,79 @@ +package main + +import ( + "os" + "path/filepath" + "time" + + "gorm.io/driver/sqlite" + "gorm.io/gorm" +) + +// ModificationHistory represents the schema of the modification_history table +type ModificationHistory struct { + ID uint `gorm:"primaryKey"` // Auto-incrementing primary key + DocumentID uint `gorm:"not null"` // Foreign key to documents table (if applicable) + DateChanged string `gorm:"not null"` // Date and time of modification + ModField string `gorm:"size:255;not null"` // Field being modified + PreviousValue string `gorm:"size:1048576"` // Previous value of the field + NewValue string `gorm:"size:1048576"` // New value of the field + Undone bool `gorm:"not null;default:false"` // Whether the modification has been undone + UndoneDate string `gorm:"default:null"` // Date and time of undoing the modification +} + +// InitializeDB initializes the SQLite database and migrates the schema +func InitializeDB() *gorm.DB { + // Ensure db directory exists + dbDir := "db" + if err := os.MkdirAll(dbDir, os.ModePerm); err != nil { + log.Fatalf("Failed to create db directory: %v", err) + } + + dbPath := filepath.Join(dbDir, "modification_history.db") + + // Connect to SQLite database + db, err := gorm.Open(sqlite.Open(dbPath), &gorm.Config{}) + if err != nil { + log.Fatalf("Failed to connect to database: %v", err) + } + + // Migrate the schema (create the table if it doesn't exist) + err = db.AutoMigrate(&ModificationHistory{}) + if err != nil { + log.Fatalf("Failed to migrate database schema: %v", err) + } + + return db +} + +// InsertModification inserts a new modification record into the database +func InsertModification(db *gorm.DB, record *ModificationHistory) error { + log.Debugf("Passed modification record: %+v", record) + record.DateChanged = time.Now().Format(time.RFC3339) // Set the DateChanged field to the current time + log.Debugf("Inserting modification record: %+v", record) + result := db.Create(&record) // GORM's Create method + log.Debugf("Insertion result: %+v", result) + return result.Error +} + +// GetModification retrieves a modification record by its ID +func GetModification(db *gorm.DB, id uint) (*ModificationHistory, error) { + var record ModificationHistory + result := db.First(&record, id) // GORM's First method retrieves the first record matching the ID + return &record, result.Error +} + +// GetAllModifications retrieves all modification records from the database +func GetAllModifications(db *gorm.DB) ([]ModificationHistory, error) { + var records []ModificationHistory + result := db.Order("date_changed DESC").Find(&records) // GORM's Find method retrieves all records + return records, result.Error +} + +// UndoModification marks a modification record as undone and sets the undo date +func SetModificationUndone(db *gorm.DB, record *ModificationHistory) error { + record.Undone = true + record.UndoneDate = time.Now().Format(time.RFC3339) + result := db.Save(&record) // GORM's Save method + return result.Error +} diff --git a/main.go b/main.go index 506ecdf..b290886 100644 --- a/main.go +++ b/main.go @@ -17,6 +17,7 @@ import ( "github.com/tmc/langchaingo/llms" "github.com/tmc/langchaingo/llms/ollama" "github.com/tmc/langchaingo/llms/openai" + "gorm.io/gorm" ) // Global Variables and Constants @@ -26,17 +27,24 @@ var ( log = logrus.New() // Environment Variables - paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL") - paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN") - openaiAPIKey = os.Getenv("OPENAI_API_KEY") - manualTag = "paperless-gpt" - autoTag = "paperless-gpt-auto" - llmProvider = os.Getenv("LLM_PROVIDER") - llmModel = os.Getenv("LLM_MODEL") - visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER") - visionLlmModel = os.Getenv("VISION_LLM_MODEL") - logLevel = strings.ToLower(os.Getenv("LOG_LEVEL")) - correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",") + correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",") + paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL") + paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN") + openaiAPIKey = os.Getenv("OPENAI_API_KEY") + manualTag = os.Getenv("MANUAL_TAG") + autoTag = os.Getenv("AUTO_TAG") + manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet + autoOcrTag = os.Getenv("AUTO_OCR_TAG") + llmProvider = os.Getenv("LLM_PROVIDER") + llmModel = os.Getenv("LLM_MODEL") + visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER") + visionLlmModel = os.Getenv("VISION_LLM_MODEL") + logLevel = strings.ToLower(os.Getenv("LOG_LEVEL")) + listenInterface = os.Getenv("LISTEN_INTERFACE") + webuiPath = os.Getenv("WEBUI_PATH") + autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE") + autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS") + autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS") // Templates titleTemplate *template.Template @@ -68,7 +76,6 @@ Content: Please concisely select the {{.Language}} tags from the list above that best describe the document. Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable. ` - defaultCorrespondentTemplate = `I will provide you with the content of a document. Your task is to suggest a correspondent that is most relevant to the document. Correspondents are the senders of documents that reach you. In the other direction, correspondents are the recipients of documents that you send. @@ -96,20 +103,20 @@ The content is likely in {{.Language}}. Document Content: {{.Content}} ` - - defaultOcrPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format.` + defaultOcrPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format but without a code block.` ) // App struct to hold dependencies type App struct { Client *PaperlessClient + Database *gorm.DB LLM llms.Model VisionLLM llms.Model } func main() { // Validate Environment Variables - validateEnvVars() + validateOrDefaultEnvVars() // Initialize logrus logger initLogger() @@ -117,6 +124,9 @@ func main() { // Initialize PaperlessClient client := NewPaperlessClient(paperlessBaseURL, paperlessAPIToken) + // Initialize Database + database := InitializeDB() + // Load Templates loadTemplates() @@ -135,6 +145,7 @@ func main() { // Initialize App with dependencies app := &App{ Client: client, + Database: database, LLM: llm, VisionLLM: visionLlm, } @@ -147,7 +158,23 @@ func main() { backoffDuration := minBackoffDuration for { - processedCount, err := app.processAutoTagDocuments() + processedCount, err := func() (int, error) { + count := 0 + if isOcrEnabled() { + ocrCount, err := app.processAutoOcrTagDocuments() + if err != nil { + return 0, fmt.Errorf("error in processAutoOcrTagDocuments: %w", err) + } + count += ocrCount + } + autoCount, err := app.processAutoTagDocuments() + if err != nil { + return 0, fmt.Errorf("error in processAutoTagDocuments: %w", err) + } + count += autoCount + return count, nil + }() + if err != nil { log.Errorf("Error in processAutoTagDocuments: %v", err) time.Sleep(backoffDuration) @@ -195,23 +222,43 @@ func main() { enabled := isOcrEnabled() c.JSON(http.StatusOK, gin.H{"enabled": enabled}) }) + + // Local db actions + api.GET("/modifications", app.getModificationHistoryHandler) + api.POST("/undo-modification/:id", app.undoModificationHandler) + + // Get public Paperless environment (as set in environment variables) + api.GET("/paperless-url", func(c *gin.Context) { + baseUrl := os.Getenv("PAPERLESS_PUBLIC_URL") + if baseUrl == "" { + baseUrl = os.Getenv("PAPERLESS_BASE_URL") + } + baseUrl = strings.TrimRight(baseUrl, "/") + c.JSON(http.StatusOK, gin.H{"url": baseUrl}) + }) } + if webuiPath == "" { + webuiPath = "./web-app/dist" + } // Serve static files for the frontend under /assets - router.StaticFS("/assets", gin.Dir("./web-app/dist/assets", true)) - router.StaticFile("/vite.svg", "./web-app/dist/vite.svg") + router.StaticFS("/assets", gin.Dir(webuiPath+"/assets", true)) + router.StaticFile("/vite.svg", webuiPath+"/vite.svg") // Catch-all route for serving the frontend router.NoRoute(func(c *gin.Context) { - c.File("./web-app/dist/index.html") + c.File(webuiPath + "/index.html") }) // Start OCR worker pool numWorkers := 1 // Number of workers to start startWorkerPool(app, numWorkers) - log.Infoln("Server started on port :8080") - if err := router.Run(":8080"); err != nil { + if listenInterface == "" { + listenInterface = ":8080" + } + log.Infoln("Server started on interface", listenInterface) + if err := router.Run(listenInterface); err != nil { log.Fatalf("Failed to run server: %v", err) } } @@ -242,8 +289,32 @@ func isOcrEnabled() bool { return visionLlmModel != "" && visionLlmProvider != "" } -// validateEnvVars ensures all necessary environment variables are set -func validateEnvVars() { +// validateOrDefaultEnvVars ensures all necessary environment variables are set +func validateOrDefaultEnvVars() { + if manualTag == "" { + manualTag = "paperless-gpt" + } + fmt.Printf("Using %s as manual tag\n", manualTag) + + if autoTag == "" { + autoTag = "paperless-gpt-auto" + } + fmt.Printf("Using %s as auto tag\n", autoTag) + + if manualOcrTag == "" { + manualOcrTag = "paperless-gpt-ocr" + } + if isOcrEnabled() { + fmt.Printf("Using %s as manual OCR tag\n", manualOcrTag) + } + + if autoOcrTag == "" { + autoOcrTag = "paperless-gpt-ocr-auto" + } + if isOcrEnabled() { + fmt.Printf("Using %s as auto OCR tag\n", autoOcrTag) + } + if paperlessBaseURL == "" { log.Fatal("Please set the PAPERLESS_BASE_URL environment variable.") } @@ -287,9 +358,9 @@ func (app *App) processAutoTagDocuments() (int, error) { suggestionRequest := GenerateSuggestionsRequest{ Documents: documents, - GenerateTitles: true, - GenerateTags: true, - GenerateCorrespondents: true, + GenerateTitles: strings.ToLower(autoGenerateTitle) != "false", + GenerateTags: strings.ToLower(autoGenerateTags) != "false", + GenerateCorrespondents: strings.ToLower(autoGenerateCorrespondents) != "false", } suggestions, err := app.generateDocumentSuggestions(ctx, suggestionRequest) @@ -297,7 +368,7 @@ func (app *App) processAutoTagDocuments() (int, error) { return 0, fmt.Errorf("error generating suggestions: %w", err) } - err = app.Client.UpdateDocuments(ctx, suggestions) + err = app.Client.UpdateDocuments(ctx, suggestions, app.Database, false) if err != nil { return 0, fmt.Errorf("error updating documents: %w", err) } @@ -305,6 +376,44 @@ func (app *App) processAutoTagDocuments() (int, error) { return len(documents), nil } +// processAutoOcrTagDocuments handles the background auto-tagging of OCR documents +func (app *App) processAutoOcrTagDocuments() (int, error) { + ctx := context.Background() + + documents, err := app.Client.GetDocumentsByTags(ctx, []string{autoOcrTag}) + if err != nil { + return 0, fmt.Errorf("error fetching documents with autoOcrTag: %w", err) + } + + if len(documents) == 0 { + log.Debugf("No documents with tag %s found", autoOcrTag) + return 0, nil // No documents to process + } + + log.Debugf("Found at least %d remaining documents with tag %s", len(documents), autoOcrTag) + + documents = documents[:1] // Process only one document at a time + + ocrContent, err := app.ProcessDocumentOCR(ctx, documents[0].ID) + if err != nil { + return 0, fmt.Errorf("error processing document OCR: %w", err) + } + log.Debugf("OCR content for document %d: %s", documents[0].ID, ocrContent) + + err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{ + { + ID: documents[0].ID, + OriginalDocument: documents[0], + SuggestedContent: ocrContent, + }, + }, app.Database, false) + if err != nil { + return 0, fmt.Errorf("error updating documents: %w", err) + } + + return 1, nil // Processed one document +} + // removeTagFromList removes a specific tag from a list of tags func removeTagFromList(tags []string, tagToRemove string) []string { filteredTags := []string{} diff --git a/ocr.go b/ocr.go new file mode 100644 index 0000000..ca8ed28 --- /dev/null +++ b/ocr.go @@ -0,0 +1,39 @@ +package main + +import ( + "context" + "fmt" + "os" + "strings" +) + +// ProcessDocumentOCR processes a document through OCR and returns the combined text +func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string, error) { + imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, documentID) + defer func() { + for _, imagePath := range imagePaths { + os.Remove(imagePath) + } + }() + if err != nil { + return "", fmt.Errorf("error downloading document images: %w", err) + } + + var ocrTexts []string + for _, imagePath := range imagePaths { + imageContent, err := os.ReadFile(imagePath) + if err != nil { + return "", fmt.Errorf("error reading image file: %w", err) + } + + ocrText, err := app.doOCRViaLLM(ctx, imageContent) + if err != nil { + return "", fmt.Errorf("error performing OCR: %w", err) + } + log.Debugf("OCR text: %s", ocrText) + + ocrTexts = append(ocrTexts, ocrText) + } + + return strings.Join(ocrTexts, "\n\n"), nil +} diff --git a/paperless.go b/paperless.go index 2f26057..497620d 100644 --- a/paperless.go +++ b/paperless.go @@ -11,11 +11,13 @@ import ( "os" "path/filepath" "slices" + "sort" "strings" "sync" "github.com/gen2brain/go-fitz" "golang.org/x/sync/errgroup" + "gorm.io/gorm" ) // PaperlessClient struct to interact with the Paperless-NGX API @@ -26,6 +28,32 @@ type PaperlessClient struct { CacheFolder string } +func hasSameTags(original, suggested []string) bool { + if len(original) != len(suggested) { + return false + } + + // Create copies to avoid modifying original slices + orig := make([]string, len(original)) + sugg := make([]string, len(suggested)) + + copy(orig, original) + copy(sugg, suggested) + + // Sort both slices + sort.Strings(orig) + sort.Strings(sugg) + + // Compare elements + for i := range orig { + if orig[i] != sugg[i] { + return false + } + } + + return true +} + // NewPaperlessClient creates a new instance of PaperlessClient with a default HTTP client func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient { cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR") @@ -108,10 +136,10 @@ func (client *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int, func (client *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []string, pageSize int) ([]Document, error) { tagQueries := make([]string, len(tags)) for i, tag := range tags { - tagQueries[i] = fmt.Sprintf("tag:%s", tag) + tagQueries[i] = fmt.Sprintf("tags__name__iexact=%s", tag) } - searchQuery := strings.Join(tagQueries, " ") - path := fmt.Sprintf("api/documents/?query=%s&page_size=%d", urlEncode(searchQuery), pageSize) + searchQuery := strings.Join(tagQueries, "&") + path := fmt.Sprintf("api/documents/?%s&page_size=%d", urlEncode(searchQuery), pageSize) resp, err := client.Do(ctx, "GET", path, nil) if err != nil { @@ -218,7 +246,7 @@ func (client *PaperlessClient) GetDocument(ctx context.Context, documentID int) } // UpdateDocuments updates the specified documents with suggested changes -func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion) error { +func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion, db *gorm.DB, isUndo bool) error { // Fetch all available tags availableTags, err := client.GetAllTags(ctx) if err != nil { @@ -247,21 +275,44 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents [] for _, document := range documents { documentID := document.ID + // Original fields will store any updated fields to store records for + originalFields := make(map[string]interface{}) updatedFields := make(map[string]interface{}) newTags := []int{} tags := document.SuggestedTags - if len(tags) == 0 { - tags = document.OriginalDocument.Tags + originalTags := document.OriginalDocument.Tags + + originalTagsJSON, err := json.Marshal(originalTags) + if err != nil { + log.Errorf("Error marshalling JSON for document %d: %v", documentID, err) + return err } + // remove autoTag to prevent infinite loop (even if it is in the original tags) - tags = removeTagFromList(tags, autoTag) + originalTags = removeTagFromList(originalTags, autoTag) + originalTags = removeTagFromList(originalTags, autoOcrTag) + + if len(tags) == 0 { + tags = originalTags + } else { + // We have suggested tags to change + originalFields["tags"] = originalTags + // remove autoTag to prevent infinite loop - this is required in case of undo + tags = removeTagFromList(tags, autoTag) + } + + updatedTagsJSON, err := json.Marshal(tags) + if err != nil { + log.Errorf("Error marshalling JSON for document %d: %v", documentID, err) + return err + } // Map suggested tag names to IDs for _, tagName := range tags { if tagID, exists := availableTags[tagName]; exists { // Skip the tag that we are filtering - if tagName == manualTag { + if !isUndo && tagName == manualTag { continue } newTags = append(newTags, tagID) @@ -292,6 +343,7 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents [] suggestedTitle = suggestedTitle[:128] } if suggestedTitle != "" { + originalFields["title"] = document.OriginalDocument.Title updatedFields["title"] = suggestedTitle } else { log.Warnf("No valid title found for document %d, skipping.", documentID) @@ -300,8 +352,11 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents [] // Suggested Content suggestedContent := document.SuggestedContent if suggestedContent != "" { + originalFields["content"] = document.OriginalDocument.Content updatedFields["content"] = suggestedContent } + log.Debugf("Document %d: Original fields: %v", documentID, originalFields) + log.Debugf("Document %d: Updated fields: %v Tags: %v", documentID, updatedFields, tags) // Marshal updated fields to JSON jsonData, err := json.Marshal(updatedFields) @@ -323,6 +378,43 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents [] bodyBytes, _ := io.ReadAll(resp.Body) log.Errorf("Error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes)) return fmt.Errorf("error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes)) + } else { + for field, value := range originalFields { + log.Printf("Document %d: Updated %s from %v to %v", documentID, field, originalFields[field], value) + // Insert the modification record into the database + var modificationRecord ModificationHistory + if field == "tags" { + // Make sure we only store changes where tags are changed - not the same before and after + // And we have to use tags, not updatedFields as they are IDs not fields + if !hasSameTags(document.OriginalDocument.Tags, tags) { + modificationRecord = ModificationHistory{ + DocumentID: uint(documentID), + ModField: field, + PreviousValue: string(originalTagsJSON), + NewValue: string(updatedTagsJSON), + } + } + } else { + // Only store mod if field actually changed + if originalFields[field] != updatedFields[field] { + modificationRecord = ModificationHistory{ + DocumentID: uint(documentID), + ModField: field, + PreviousValue: fmt.Sprintf("%v", originalFields[field]), + NewValue: fmt.Sprintf("%v", updatedFields[field]), + } + } + } + + // Only store if we have a valid modification record + if (modificationRecord != ModificationHistory{}) { + err = InsertModification(db, &modificationRecord) + } + if err != nil { + log.Errorf("Error inserting modification record for document %d: %v", documentID, err) + return err + } + } } log.Printf("Document %d updated successfully.", documentID) diff --git a/paperless_test.go b/paperless_test.go index 959adaa..13cf103 100644 --- a/paperless_test.go +++ b/paperless_test.go @@ -13,6 +13,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "gorm.io/driver/sqlite" + "gorm.io/gorm" ) // Helper struct to hold common test data and methods @@ -22,6 +24,7 @@ type testEnv struct { client *PaperlessClient requestCount int mockResponses map[string]http.HandlerFunc + db *gorm.DB } // newTestEnv initializes a new test environment @@ -31,6 +34,11 @@ func newTestEnv(t *testing.T) *testEnv { mockResponses: make(map[string]http.HandlerFunc), } + // Initialize test database + db, err := InitializeTestDB() + require.NoError(t, err) + env.db = db + // Create a mock server with a handler that dispatches based on URL path env.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { env.requestCount++ @@ -50,6 +58,22 @@ func newTestEnv(t *testing.T) *testEnv { return env } +func InitializeTestDB() (*gorm.DB, error) { + // Use in-memory SQLite for testing + db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared"), &gorm.Config{}) + if err != nil { + return nil, err + } + + // Migrate schema + err = db.AutoMigrate(&ModificationHistory{}) + if err != nil { + return nil, err + } + + return db, nil +} + // teardown closes the mock server func (env *testEnv) teardown() { env.server.Close() @@ -203,7 +227,7 @@ func TestGetDocumentsByTags(t *testing.T) { // Set mock responses env.setMockResponse("/api/documents/", func(w http.ResponseWriter, r *http.Request) { // Verify query parameters - expectedQuery := "query=tag:tag1+tag:tag2&page_size=25" + expectedQuery := "tags__name__iexact=tag1&tags__name__iexact=tag2&page_size=25" assert.Equal(t, expectedQuery, r.URL.RawQuery) w.WriteHeader(http.StatusOK) json.NewEncoder(w).Encode(documentsResponse) @@ -327,7 +351,7 @@ func TestUpdateDocuments(t *testing.T) { }) ctx := context.Background() - err := env.client.UpdateDocuments(ctx, documents) + err := env.client.UpdateDocuments(ctx, documents, env.db, false) require.NoError(t, err) } diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..5db72dd --- /dev/null +++ b/renovate.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "config:recommended" + ] +} diff --git a/web-app/package-lock.json b/web-app/package-lock.json index 31ecd9b..fe204d6 100644 --- a/web-app/package-lock.json +++ b/web-app/package-lock.json @@ -10,17 +10,23 @@ "dependencies": { "@headlessui/react": "^2.1.8", "@heroicons/react": "^2.1.5", + "@mdi/js": "^7.4.47", + "@mdi/react": "^1.6.1", "axios": "^1.7.7", "classnames": "^2.5.1", + "date-fns": "^4.1.0", "prop-types": "^15.8.1", "react": "^18.3.1", "react-dom": "^18.3.1", "react-icons": "^5.3.0", "react-router-dom": "^6.27.0", - "react-tag-autocomplete": "^7.3.0" + "react-tag-autocomplete": "^7.3.0", + "react-tooltip": "^5.28.0", + "winston": "^3.17.0" }, "devDependencies": { "@eslint/js": "^9.9.0", + "@types/node": "^22.10.1", "@types/react": "^18.3.3", "@types/react-dom": "^18.3.0", "@vitejs/plugin-react-swc": "^3.5.0", @@ -48,6 +54,24 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/@colors/colors": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.6.0.tgz", + "integrity": "sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA==", + "engines": { + "node": ">=0.1.90" + } + }, + "node_modules/@dabh/diagnostics": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@dabh/diagnostics/-/diagnostics-2.0.3.tgz", + "integrity": "sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==", + "dependencies": { + "colorspace": "1.1.x", + "enabled": "2.0.x", + "kuler": "^2.0.0" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", @@ -723,6 +747,19 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@mdi/js": { + "version": "7.4.47", + "resolved": "https://registry.npmjs.org/@mdi/js/-/js-7.4.47.tgz", + "integrity": "sha512-KPnNOtm5i2pMabqZxpUz7iQf+mfrYZyKCZ8QNz85czgEt7cuHcGorWfdzUMWYA0SD+a6Hn4FmJ+YhzzzjkTZrQ==" + }, + "node_modules/@mdi/react": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@mdi/react/-/react-1.6.1.tgz", + "integrity": "sha512-4qZeDcluDFGFTWkHs86VOlHkm6gnKaMql13/gpIcUQ8kzxHgpj31NuCkD8abECVfbULJ3shc7Yt4HJ6Wu6SN4w==", + "dependencies": { + "prop-types": "^15.7.2" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -1313,6 +1350,16 @@ "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==", "dev": true }, + "node_modules/@types/node": { + "version": "22.10.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.5.tgz", + "integrity": "sha512-F8Q+SeGimwOo86fiovQh8qiXfFEh2/ocYv7tU5pJ3EXMSSxk1Joj5wefpFK2fHTf/N6HKGSxIDBT9f3gCxXPkQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.20.0" + } + }, "node_modules/@types/prop-types": { "version": "15.7.13", "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.13.tgz", @@ -1338,6 +1385,11 @@ "@types/react": "*" } }, + "node_modules/@types/triple-beam": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.5.tgz", + "integrity": "sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw==" + }, "node_modules/@typescript-eslint/eslint-plugin": { "version": "8.6.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.6.0.tgz", @@ -1557,15 +1609,16 @@ } }, "node_modules/@vitejs/plugin-react-swc": { - "version": "3.7.0", - "resolved": "https://registry.npmjs.org/@vitejs/plugin-react-swc/-/plugin-react-swc-3.7.0.tgz", - "integrity": "sha512-yrknSb3Dci6svCd/qhHqhFPDSw0QtjumcqdKMoNNzmOl5lMXTTiqzjWtG4Qask2HdvvzaNgSunbQGet8/GrKdA==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@vitejs/plugin-react-swc/-/plugin-react-swc-3.7.2.tgz", + "integrity": "sha512-y0byko2b2tSVVf5Gpng1eEhX1OvPC7x8yns1Fx8jDzlJp4LS6CMkCPfLw47cjyoMrshQDoQw4qcgjsU9VvlCew==", "dev": true, + "license": "MIT", "dependencies": { - "@swc/core": "^1.5.7" + "@swc/core": "^1.7.26" }, "peerDependencies": { - "vite": "^4 || ^5" + "vite": "^4 || ^5 || ^6" } }, "node_modules/acorn": { @@ -1660,6 +1713,11 @@ "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", "dev": true }, + "node_modules/async": { + "version": "3.2.6", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.6.tgz", + "integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==" + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -1887,6 +1945,15 @@ "node": ">=6" } }, + "node_modules/color": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz", + "integrity": "sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==", + "dependencies": { + "color-convert": "^1.9.3", + "color-string": "^1.6.0" + } + }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", @@ -1902,8 +1969,38 @@ "node_modules/color-name": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + }, + "node_modules/color-string": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", + "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", + "dependencies": { + "color-name": "^1.0.0", + "simple-swizzle": "^0.2.2" + } + }, + "node_modules/color/node_modules/color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dependencies": { + "color-name": "1.1.3" + } + }, + "node_modules/color/node_modules/color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" + }, + "node_modules/colorspace": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/colorspace/-/colorspace-1.1.4.tgz", + "integrity": "sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==", + "dependencies": { + "color": "^3.1.3", + "text-hex": "1.0.x" + } }, "node_modules/combined-stream": { "version": "1.0.8", @@ -1963,6 +2060,15 @@ "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", "dev": true }, + "node_modules/date-fns": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-4.1.0.tgz", + "integrity": "sha512-Ukq0owbQXxa/U3EGtsdVBkR1w7KOQ5gIBqdH2hkvknzZPYvBxb/aa6E8L7tmjFtkwZBu3UXBbjIgPo/Ez4xaNg==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/kossnocorp" + } + }, "node_modules/debug": { "version": "4.3.7", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", @@ -2024,6 +2130,11 @@ "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", "dev": true }, + "node_modules/enabled": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/enabled/-/enabled-2.0.0.tgz", + "integrity": "sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ==" + }, "node_modules/esbuild": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz", @@ -2155,12 +2266,13 @@ } }, "node_modules/eslint-plugin-react-refresh": { - "version": "0.4.12", - "resolved": "https://registry.npmjs.org/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.12.tgz", - "integrity": "sha512-9neVjoGv20FwYtCP6CB1dzR1vr57ZDNOXst21wd2xJ/cTlM2xLq0GWVlSNTdMn/4BtP6cHYBMCSp1wFBJ9jBsg==", + "version": "0.4.16", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.16.tgz", + "integrity": "sha512-slterMlxAhov/DZO8NScf6mEeMBBXodFUolijDvrtTxyezyLoTQaa73FyYus/VbTdftd8wBgBxPMRk3poleXNQ==", "dev": true, + "license": "MIT", "peerDependencies": { - "eslint": ">=7" + "eslint": ">=8.40" } }, "node_modules/eslint-scope": { @@ -2305,6 +2417,11 @@ "reusify": "^1.0.4" } }, + "node_modules/fecha": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz", + "integrity": "sha512-OP2IUU6HeYKJi3i0z4A19kHMQoLVs4Hc+DPqqxI2h/DPZHTm/vjsfC6P0b4jCMy14XizLBqvndQ+UilD7707Jw==" + }, "node_modules/file-entry-cache": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz", @@ -2364,6 +2481,11 @@ "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==", "dev": true }, + "node_modules/fn.name": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz", + "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==" + }, "node_modules/follow-redirects": { "version": "1.15.9", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz", @@ -2505,10 +2627,11 @@ } }, "node_modules/globals": { - "version": "15.9.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-15.9.0.tgz", - "integrity": "sha512-SmSKyLLKFbSr6rptvP8izbyxJL4ILwqO9Jg23UA0sDlGlu58V59D1//I3vlc0KJphVdUR7vMjHIplYnzBxorQA==", + "version": "15.14.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-15.14.0.tgz", + "integrity": "sha512-OkToC372DtlQeje9/zHIo5CT8lRP/FUgEOKBEhU4e0abL7J7CD24fD9ohiLN5hagG/kWCYj4K5oaxxtj2Z0Dig==", "dev": true, + "license": "MIT", "engines": { "node": ">=18" }, @@ -2577,6 +2700,16 @@ "node": ">=0.8.19" } }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "node_modules/is-arrayish": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz", + "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==" + }, "node_modules/is-binary-path": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", @@ -2652,6 +2785,17 @@ "node": ">=8" } }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -2726,6 +2870,11 @@ "json-buffer": "3.0.1" } }, + "node_modules/kuler": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz", + "integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==" + }, "node_modules/levn": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", @@ -2775,6 +2924,22 @@ "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", "dev": true }, + "node_modules/logform": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/logform/-/logform-2.7.0.tgz", + "integrity": "sha512-TFYA4jnP7PVbmlBIfhlSe+WKxs9dklXMTEGcBCIvLhE/Tn3H6Gk1norupVW7m5Cnd4bLcr08AytbyV/xj7f/kQ==", + "dependencies": { + "@colors/colors": "1.6.0", + "@types/triple-beam": "^1.3.2", + "fecha": "^4.2.0", + "ms": "^2.1.1", + "safe-stable-stringify": "^2.3.1", + "triple-beam": "^1.3.0" + }, + "engines": { + "node": ">= 12.0.0" + } + }, "node_modules/loose-envify": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", @@ -2857,8 +3022,7 @@ "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, "node_modules/mz": { "version": "2.7.0", @@ -2936,6 +3100,14 @@ "node": ">= 6" } }, + "node_modules/one-time": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/one-time/-/one-time-1.0.0.tgz", + "integrity": "sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==", + "dependencies": { + "fn.name": "1.x.x" + } + }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -3042,10 +3214,11 @@ } }, "node_modules/picocolors": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.0.tgz", - "integrity": "sha512-TQ92mBOW0l3LeMeyLV6mzy/kWr8lkd/hp3mTg7wYK7zJhuBStmGMBG0BdeDZS/dZx1IukaX6Bk11zcln25o1Aw==", - "dev": true + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true, + "license": "ISC" }, "node_modules/picomatch": { "version": "2.3.1", @@ -3078,9 +3251,9 @@ } }, "node_modules/postcss": { - "version": "8.4.47", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.47.tgz", - "integrity": "sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==", + "version": "8.4.49", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.49.tgz", + "integrity": "sha512-OCVPnIObs4N29kxTjzLfUryOkvZEq+pf8jTF0lg8E7uETuWHA+v7j3c/xJmiqpX450191LlmZfUKkXxkTry7nA==", "dev": true, "funding": [ { @@ -3096,9 +3269,10 @@ "url": "https://github.com/sponsors/ai" } ], + "license": "MIT", "dependencies": { "nanoid": "^3.3.7", - "picocolors": "^1.1.0", + "picocolors": "^1.1.1", "source-map-js": "^1.2.1" }, "engines": { @@ -3362,6 +3536,19 @@ "react": "^18.0.0" } }, + "node_modules/react-tooltip": { + "version": "5.28.0", + "resolved": "https://registry.npmjs.org/react-tooltip/-/react-tooltip-5.28.0.tgz", + "integrity": "sha512-R5cO3JPPXk6FRbBHMO0rI9nkUG/JKfalBSQfZedZYzmqaZQgq7GLzF8vcCWx6IhUCKg0yPqJhXIzmIO5ff15xg==", + "dependencies": { + "@floating-ui/dom": "^1.6.1", + "classnames": "^2.3.0" + }, + "peerDependencies": { + "react": ">=16.14.0", + "react-dom": ">=16.14.0" + } + }, "node_modules/read-cache": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", @@ -3371,6 +3558,19 @@ "pify": "^2.3.0" } }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/readdirp": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", @@ -3477,6 +3677,33 @@ "queue-microtask": "^1.2.2" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/safe-stable-stringify": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz", + "integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==", + "engines": { + "node": ">=10" + } + }, "node_modules/scheduler": { "version": "0.23.2", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", @@ -3530,6 +3757,14 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/simple-swizzle": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", + "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==", + "dependencies": { + "is-arrayish": "^0.3.1" + } + }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -3539,6 +3774,22 @@ "node": ">=0.10.0" } }, + "node_modules/stack-trace": { + "version": "0.0.10", + "resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz", + "integrity": "sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg==", + "engines": { + "node": "*" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, "node_modules/string-width": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", @@ -3729,6 +3980,11 @@ "node": ">=14.0.0" } }, + "node_modules/text-hex": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz", + "integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==" + }, "node_modules/text-table": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", @@ -3768,6 +4024,14 @@ "node": ">=8.0" } }, + "node_modules/triple-beam": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz", + "integrity": "sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==", + "engines": { + "node": ">= 14.0.0" + } + }, "node_modules/ts-api-utils": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.3.0.tgz", @@ -3804,10 +4068,11 @@ } }, "node_modules/typescript": { - "version": "5.6.2", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz", - "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==", + "version": "5.7.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz", + "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==", "dev": true, + "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -3839,6 +4104,12 @@ } } }, + "node_modules/undici-types": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", + "dev": true + }, "node_modules/update-browserslist-db": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.0.tgz", @@ -3881,8 +4152,7 @@ "node_modules/util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "dev": true + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" }, "node_modules/vite": { "version": "5.4.7", @@ -3958,6 +4228,40 @@ "node": ">= 8" } }, + "node_modules/winston": { + "version": "3.17.0", + "resolved": "https://registry.npmjs.org/winston/-/winston-3.17.0.tgz", + "integrity": "sha512-DLiFIXYC5fMPxaRg832S6F5mJYvePtmO5G9v9IgUFPhXm9/GkXarH/TUrBAVzhTCzAj9anE/+GjrgXp/54nOgw==", + "dependencies": { + "@colors/colors": "^1.6.0", + "@dabh/diagnostics": "^2.0.2", + "async": "^3.2.3", + "is-stream": "^2.0.0", + "logform": "^2.7.0", + "one-time": "^1.0.0", + "readable-stream": "^3.4.0", + "safe-stable-stringify": "^2.3.1", + "stack-trace": "0.0.x", + "triple-beam": "^1.3.0", + "winston-transport": "^4.9.0" + }, + "engines": { + "node": ">= 12.0.0" + } + }, + "node_modules/winston-transport": { + "version": "4.9.0", + "resolved": "https://registry.npmjs.org/winston-transport/-/winston-transport-4.9.0.tgz", + "integrity": "sha512-8drMJ4rkgaPo1Me4zD/3WLfI/zPdA9o2IipKODunnGDcuqbHwjsbB79ylv04LCGGzU0xQ6vTznOMpQGaLhhm6A==", + "dependencies": { + "logform": "^2.7.0", + "readable-stream": "^3.6.2", + "triple-beam": "^1.3.0" + }, + "engines": { + "node": ">= 12.0.0" + } + }, "node_modules/word-wrap": { "version": "1.2.5", "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", diff --git a/web-app/package.json b/web-app/package.json index 09b6007..c8cd1ef 100644 --- a/web-app/package.json +++ b/web-app/package.json @@ -13,17 +13,23 @@ "dependencies": { "@headlessui/react": "^2.1.8", "@heroicons/react": "^2.1.5", + "@mdi/js": "^7.4.47", + "@mdi/react": "^1.6.1", "axios": "^1.7.7", "classnames": "^2.5.1", + "date-fns": "^4.1.0", "prop-types": "^15.8.1", "react": "^18.3.1", "react-dom": "^18.3.1", "react-icons": "^5.3.0", "react-router-dom": "^6.27.0", - "react-tag-autocomplete": "^7.3.0" + "react-tag-autocomplete": "^7.3.0", + "react-tooltip": "^5.28.0", + "winston": "^3.17.0" }, "devDependencies": { "@eslint/js": "^9.9.0", + "@types/node": "^22.10.1", "@types/react": "^18.3.3", "@types/react-dom": "^18.3.0", "@vitejs/plugin-react-swc": "^3.5.0", diff --git a/web-app/src/App.tsx b/web-app/src/App.tsx index 4b89f18..d5a2052 100644 --- a/web-app/src/App.tsx +++ b/web-app/src/App.tsx @@ -1,16 +1,24 @@ // App.tsx or App.jsx import React from 'react'; import { Route, BrowserRouter as Router, Routes } from 'react-router-dom'; +import Sidebar from './components/Sidebar'; import DocumentProcessor from './DocumentProcessor'; import ExperimentalOCR from './ExperimentalOCR'; // New component +import History from './History'; const App: React.FC = () => { return ( - - } /> - } /> - +
+ console.log(page)} /> +
+ + } /> + } /> + } /> + +
+
); }; diff --git a/web-app/src/DocumentProcessor.tsx b/web-app/src/DocumentProcessor.tsx index d83709c..093a5e1 100644 --- a/web-app/src/DocumentProcessor.tsx +++ b/web-app/src/DocumentProcessor.tsx @@ -1,6 +1,5 @@ import axios from "axios"; import React, { useCallback, useEffect, useState } from "react"; -import { Link } from "react-router-dom"; import "react-tag-autocomplete/example/src/styles.css"; // Ensure styles are loaded import DocumentsToProcess from "./components/DocumentsToProcess"; import NoDocuments from "./components/NoDocuments"; @@ -46,22 +45,17 @@ const DocumentProcessor: React.FC = () => { const [generateTags, setGenerateTags] = useState(true); const [error, setError] = useState(null); - // Temporary feature flags - const [ocrEnabled, setOcrEnabled] = useState(false); - // Custom hook to fetch initial data const fetchInitialData = useCallback(async () => { try { - const [filterTagRes, documentsRes, tagsRes, ocrEnabledRes] = await Promise.all([ + const [filterTagRes, documentsRes, tagsRes] = await Promise.all([ axios.get<{ tag: string }>("/api/filter-tag"), axios.get("/api/documents"), axios.get>("/api/tags"), - axios.get<{enabled: boolean}>("/api/experimental/ocr"), ]); setFilterTag(filterTagRes.data.tag); setDocuments(documentsRes.data); - setOcrEnabled(ocrEnabledRes.data.enabled); const tags = Object.keys(tagsRes.data).map((tag) => ({ id: tag, name: tag, @@ -199,16 +193,6 @@ const DocumentProcessor: React.FC = () => {

Paperless GPT

- {ocrEnabled && ( -
- - OCR via LLMs (Experimental) - -
- )}
{error && ( diff --git a/web-app/src/History.tsx b/web-app/src/History.tsx new file mode 100644 index 0000000..e68fb57 --- /dev/null +++ b/web-app/src/History.tsx @@ -0,0 +1,126 @@ +import React, { useEffect, useState } from 'react'; +import UndoCard from './components/UndoCard'; + +interface ModificationHistory { + ID: number; + DocumentID: number; + DateChanged: string; + ModField: string; + PreviousValue: string; + NewValue: string; + Undone: boolean; + UndoneDate: string | null; +} + +const History: React.FC = () => { + const [modifications, setModifications] = useState([]); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + const [paperlessUrl, setPaperlessUrl] = useState(''); + + // Get Paperless URL + useEffect(() => { + const fetchUrl = async () => { + try { + const response = await fetch('/api/paperless-url'); + if (!response.ok) { + throw new Error('Failed to fetch public URL'); + } + const { url } = await response.json(); + setPaperlessUrl(url); + } catch (err) { + console.error('Error fetching Paperless URL:', err); + } + }; + + fetchUrl(); + }, []); + + // Get all modifications + useEffect(() => { + fetchModifications(); + }, []); + + const fetchModifications = async () => { + try { + const response = await fetch('/api/modifications'); + if (!response.ok) { + throw new Error('Failed to fetch modifications'); + } + const data = await response.json(); + setModifications(data); + } catch (err) { + setError(err instanceof Error ? err.message : 'Unknown error occurred'); + } finally { + setLoading(false); + } + }; + + const handleUndo = async (id: number) => { + try { + const response = await fetch(`/api/undo-modification/${id}`, { + method: 'POST', + }); + + if (!response.ok) { + throw new Error('Failed to undo modification'); + } + + // Use ISO 8601 format for consistency + const now = new Date().toISOString(); + + setModifications(mods => mods.map(mod => + mod.ID === id + ? { ...mod, Undone: true, UndoneDate: now } + : mod + )); + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to undo modification'); + } + }; + + if (loading) { + return ( +
+
+
+ ); + } + + if (error) { + return ( +
+ Error: {error} +
+ ); + } + + return ( +
+

+ Modification History +

+
+ Note: when undoing tag changes, this will not re-add 'paperless-gpt-auto' +
+ {modifications.length === 0 ? ( +

+ No modifications found +

+ ) : ( +
+ {modifications.map((modification) => ( + + ))} +
+ )} +
+ ); +}; + +export default History; \ No newline at end of file diff --git a/web-app/src/assets/logo.svg b/web-app/src/assets/logo.svg new file mode 100644 index 0000000..347b1e7 --- /dev/null +++ b/web-app/src/assets/logo.svg @@ -0,0 +1,12 @@ + + + + + + diff --git a/web-app/src/components/Sidebar.css b/web-app/src/components/Sidebar.css new file mode 100644 index 0000000..2459a05 --- /dev/null +++ b/web-app/src/components/Sidebar.css @@ -0,0 +1,75 @@ +.sidebar { + width: 250px; + background-color: #2c3e50; + color: #ecf0f1; + display: flex; + flex-direction: column; + transition: width 0.3s; + } + + .sidebar.collapsed { + width: 60px; + + } + + .sidebar-header { + display: flex; + align-items: center; + padding: 10px; + background-color: #34495e; + justify-content: space-between; + } + + .sidebar-header.collapsed { + justify-content: center; + } + + .logo { + height: 40px; + margin-right: 10px; + } + + .menu-items { + list-style: none; + padding: 0; + margin: 0; + } + + .menu-items li { + padding: 15px 20px; + cursor: pointer; + } + + .menu-items li.active { + background-color: darkslategray; + padding: 15px 20px; + cursor: pointer; + } + + .menu-items li:hover { + background-color: #1abc9c; + } + + .menu-items li a { + text-decoration: none; + color: inherit; + font-size: 18px; + } + + .toggle-btn { + background: none; + border: none; + color: white; + font-size: 24px; + cursor: pointer; + } + + .sidebar.collapsed .menu-items li a { + display: none; + } + + .sidebar.collapsed .logo { + height: 40px; + margin: auto; + } + \ No newline at end of file diff --git a/web-app/src/components/Sidebar.tsx b/web-app/src/components/Sidebar.tsx new file mode 100644 index 0000000..ae17d27 --- /dev/null +++ b/web-app/src/components/Sidebar.tsx @@ -0,0 +1,81 @@ +import axios from "axios"; +import React, { useCallback, useEffect, useState } from 'react'; +import "./Sidebar.css"; +import { Link, useLocation } from 'react-router-dom'; +import { Icon } from '@mdi/react'; +import { mdiHomeOutline, mdiTextBoxSearchOutline, mdiHistory } from '@mdi/js'; +import logo from "../assets/logo.svg"; + + +interface SidebarProps { + onSelectPage: (page: string) => void; +} + +const Sidebar: React.FC = ({ onSelectPage }) => { + const [collapsed, setCollapsed] = useState(false); + const location = useLocation(); + + const toggleSidebar = () => { + setCollapsed(!collapsed); + }; + + const handlePageClick = (page: string) => { + onSelectPage(page); + }; + + // Get whether experimental OCR is enabled + const [ocrEnabled, setOcrEnabled] = useState(false); + const fetchOcrEnabled = useCallback(async () => { + try { + const res = await axios.get<{ enabled: boolean }>("/api/experimental/ocr"); + setOcrEnabled(res.data.enabled); + } catch (err) { + console.error(err); + } + }, []); + + useEffect(() => { + fetchOcrEnabled(); + }, [fetchOcrEnabled]); + + const menuItems = [ + { name: 'home', path: '/', icon: mdiHomeOutline, title: 'Home' }, + { name: 'history', path: '/history', icon: mdiHistory, title: 'History' }, + ]; + + // If OCR is enabled, add the OCR menu item + if (ocrEnabled) { + menuItems.push({ name: 'ocr', path: '/experimental-ocr', icon: mdiTextBoxSearchOutline, title: 'OCR' }); + } + + return ( +
+
+ {!collapsed && Logo} + +
+
    + {menuItems.map((item) => ( +
  • + handlePageClick(item.name)} + style={{ display: 'flex', alignItems: 'center' }} + > + {/* + {!collapsed &&   {item.title}} */} +
    + +
    + {!collapsed && {item.title}} + +
  • + ))} +
+
+ ); +}; + +export default Sidebar; diff --git a/web-app/src/components/UndoCard.tsx b/web-app/src/components/UndoCard.tsx new file mode 100644 index 0000000..d3c2254 --- /dev/null +++ b/web-app/src/components/UndoCard.tsx @@ -0,0 +1,193 @@ +// UndoCard.tsx +import React from 'react'; +import { Tooltip } from 'react-tooltip' + +interface ModificationProps { + ID: number; + DocumentID: number; + DateChanged: string; + ModField: string; + PreviousValue: string; + NewValue: string; + Undone: boolean; + UndoneDate: string | null; + onUndo: (id: number) => void; + paperlessUrl: string; +} + +const formatDate = (dateString: string | null): string => { + if (!dateString) return ''; + + try { + const date = new Date(dateString); + // Check if date is valid + if (isNaN(date.getTime())) { + return 'Invalid date'; + } + return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')} ${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}`; + } catch { + return 'Invalid date'; + } +}; + +const buildPaperlessUrl = (paperlessUrl: string, documentId: number): string => { + return `${paperlessUrl}/documents/${documentId}/details`; +}; + +const UndoCard: React.FC = ({ + ID, + DocumentID, + DateChanged, + ModField, + PreviousValue, + NewValue, + Undone, + UndoneDate, + onUndo, + paperlessUrl, +}) => { + const formatValue = (value: string, field: string) => { + if (field === 'tags') { + try { + const tags = JSON.parse(value) as string[]; + return ( +
+ {tags.map((tag) => ( + + {tag} + + ))} +
+ ); + } catch { + return value; + } + } else if (field.toLowerCase().includes('date')) { + return formatDate(value); + } + return value; + }; + + return ( +
+
+
{/* Left content */} +
+
+
+ Date Modified +
+
+ {DateChanged && formatDate(DateChanged)} +
+
+ + +
+
+ Modified Field +
+
+ {ModField} +
+
+
+
+
+
+ Previous:   + 100 ? { + 'data-tooltip-id': `tooltip-${ID}-prev` + } : {})} + > + {formatValue(PreviousValue, ModField)} + +
+
+ New:   + 100 ? { + 'data-tooltip-id': `tooltip-${ID}-new` + } : {})} + > + {formatValue(NewValue, ModField)} + +
+
+ + {PreviousValue} + + + {NewValue} + +
+
+
{/* Button content */} + +
+
+
+ ); +}; + +export default UndoCard; \ No newline at end of file