Publish paperless-gpt

This commit is contained in:
Dominik Schröter 2024-09-23 14:59:50 +02:00
commit 833bd9b1b7
36 changed files with 5632 additions and 0 deletions

2
.dockerignore Normal file
View file

@ -0,0 +1,2 @@
.env
Dockerfile

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
.env
.DS_Store

16
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch Package",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${fileDirname}",
"envFile": "${workspaceFolder}/.env"
}
]
}

60
Dockerfile Normal file
View file

@ -0,0 +1,60 @@
# Stage 1: Build the Go binary
FROM golang:1.22 AS builder
# Set the working directory inside the container
WORKDIR /app
# Copy go.mod and go.sum files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy the rest of the application code
COPY . .
# Build the Go binary
RUN CGO_ENABLED=0 GOOS=linux go build -o paperless-gpt main.go
# Stage 2: Build Vite frontend
FROM node:20 AS frontend
# Set the working directory inside the container
WORKDIR /app
# Copy package.json and package-lock.json
COPY web-app/package.json web-app/package-lock.json ./
# Install dependencies
RUN npm install
# Copy the frontend code
COPY web-app /app/
# Build the frontend
RUN npm run build
# Stage 3: Create a lightweight image with the Go binary
FROM alpine:latest
# Install necessary CA certificates
RUN apk --no-cache add ca-certificates
# Set the working directory inside the container
WORKDIR /root/
# Copy the Go binary from the builder stage
COPY --from=builder /app/paperless-gpt .
# Copy the frontend build
COPY --from=frontend /app/dist /root/web-app/dist
# Expose the port the app runs on
EXPOSE 8080
# Validate that the binary is executable
RUN chmod +x paperless-gpt
# Command to run the binary
CMD ["./paperless-gpt"]

21
LICENSE Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 Icereed
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

170
README.md Normal file
View file

@ -0,0 +1,170 @@
# paperless-gpt
[![License](https://img.shields.io/github/license/icereed/paperless-gpt)](LICENSE)
[![Docker Pulls](https://img.shields.io/docker/pulls/icereed/paperless-gpt)](https://hub.docker.com/r/icereed/paperless-gpt)
**paperless-gpt** is a tool designed to generate accurate and meaningful document titles for [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx) using Large Language Models (LLMs). It supports multiple LLM providers, including **OpenAI** and **Ollama**. With paperless-gpt, you can streamline your document management by automatically suggesting appropriate titles based on the content of your scanned documents.
[![Demo](./demo.gif)](./demo.gif)
## Features
- **Multiple LLM Support**: Choose between OpenAI and Ollama for generating document titles.
- **Easy Integration**: Works seamlessly with your existing paperless-ngx setup.
- **User-Friendly Interface**: Intuitive web interface for reviewing and applying suggested titles.
- **Dockerized Deployment**: Simple setup using Docker and Docker Compose.
## Table of Contents
- [paperless-gpt](#paperless-gpt)
- [Features](#features)
- [Table of Contents](#table-of-contents)
- [Getting Started](#getting-started)
- [Prerequisites](#prerequisites)
- [Installation](#installation)
- [Docker Compose](#docker-compose)
- [Manual Setup](#manual-setup)
- [Configuration](#configuration)
- [Environment Variables](#environment-variables)
- [Usage](#usage)
- [Contributing](#contributing)
- [License](#license)
## Getting Started
### Prerequisites
- [Docker](https://www.docker.com/get-started) installed on your system.
- A running instance of [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx).
- Access to an LLM provider:
- **OpenAI**: An API key with access to models like `gpt-4o` or `gpt-3.5-turbo`.
- **Ollama**: A running Ollama server with models like `llama2` installed.
### Installation
#### Docker Compose
The easiest way to get started is by using Docker Compose. Below is an example `docker-compose.yml` file to set up paperless-gpt alongside paperless-ngx.
```yaml
version: '3.7'
services:
paperless-ngx:
image: ghcr.io/paperless-ngx/paperless-ngx:latest
# ... (your existing paperless-ngx configuration)
paperless-gpt:
image: icereed/paperless-gpt:latest
environment:
PAPERLESS_BASE_URL: 'http://paperless-ngx:8000'
PAPERLESS_API_TOKEN: 'your_paperless_api_token'
LLM_PROVIDER: 'openai' # or 'ollama'
LLM_MODEL: 'gpt-4' # or 'llama2'
OPENAI_API_KEY: 'your_openai_api_key' # Required if using OpenAI
LLM_LANGUAGE: 'English' # Optional, default is 'English'
ports:
- '8080:8080'
depends_on:
- paperless-ngx
```
**Note:** Replace the placeholder values with your actual configuration.
#### Manual Setup
If you prefer to run the application manually:
1. **Clone the Repository:**
```bash
git clone https://github.com/yourusername/paperless-gpt.git
cd paperless-gpt
```
2. **Build the Docker Image:**
```bash
docker build -t paperless-gpt .
```
3. **Run the Container:**
```bash
docker run -d \
-e PAPERLESS_BASE_URL='http://your_paperless_ngx_url' \
-e PAPERLESS_API_TOKEN='your_paperless_api_token' \
-e LLM_PROVIDER='openai' \
-e LLM_MODEL='gpt-4' \
-e OPENAI_API_KEY='your_openai_api_key' \
-e LLM_LANGUAGE='English' \
-p 8080:8080 \
paperless-gpt
```
## Configuration
### Environment Variables
| Variable | Description | Required |
|-----------------------|-----------------------------------------------------------------------------------------------------|----------|
| `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes |
| `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes |
| `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes |
| `LLM_MODEL` | The model name to use (e.g., `gpt-4`, `gpt-3.5-turbo`, `llama2`). | Yes |
| `OPENAI_API_KEY` | Your OpenAI API key. Required if using OpenAI as the LLM provider. | Cond. |
| `LLM_LANGUAGE` | The likely language of your documents (e.g., `English`, `German`). Default is `English`. | No |
**Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
## Usage
1. **Tag Documents in paperless-ngx:**
- Add the tag `paperless-gpt` to documents you want to process. This tag is configurable via the `tagToFilter` variable in the code (default is `paperless-gpt`).
2. **Access the paperless-gpt Interface:**
- Open your browser and navigate to `http://localhost:8080`.
3. **Process Documents:**
- Click on **"Generate Suggestions"** to let the LLM generate title suggestions based on the document content.
4. **Review and Apply Titles:**
- Review the suggested titles. You can edit them if necessary.
- Click on **"Apply Suggestions"** to update the document titles in paperless-ngx.
## Contributing
Contributions are welcome! Please read the [contributing guidelines](CONTRIBUTING.md) before submitting a pull request.
1. **Fork the Repository**
2. **Create a Feature Branch**
```bash
git checkout -b feature/my-new-feature
```
3. **Commit Your Changes**
```bash
git commit -am 'Add some feature'
```
4. **Push to the Branch**
```bash
git push origin feature/my-new-feature
```
5. **Create a Pull Request**
## License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
---
**Disclaimer:** This project is not affiliated with the official paperless-ngx project. Use at your own discretion.

BIN
demo.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

8
docker-build-and-push.sh Executable file
View file

@ -0,0 +1,8 @@
#!/bin/sh -e
# Build the Docker image for amd64 and arm64
docker build --platform linux/arm64 -t icereed/paperless-gpt:latest .
docker build --platform linux/amd64 -t icereed/paperless-gpt:latest .
# Push the Docker image to Docker Hub
docker push icereed/paperless-gpt:latest

11
docker-compose.yml Normal file
View file

@ -0,0 +1,11 @@
version: '3.8'
services:
app:
build:
context: .
dockerfile: Dockerfile
ports:
- "8080:8080"
env_file:
- .env

42
go.mod Normal file
View file

@ -0,0 +1,42 @@
module paperless-gpt
go 1.22.0
toolchain go1.22.2
require (
github.com/gin-gonic/gin v1.10.0
github.com/tmc/langchaingo v0.1.12
)
require (
github.com/bytedance/sonic v1.11.6 // indirect
github.com/bytedance/sonic/loader v0.1.1 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.20.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/pkoukk/tiktoken-go v0.1.6 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.23.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

99
go.sum Normal file
View file

@ -0,0 +1,99 @@
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAcUsw=
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tmc/langchaingo v0.1.12 h1:yXwSu54f3b1IKw0jJ5/DWu+qFVH1NBblwC0xddBzGJE=
github.com/tmc/langchaingo v0.1.12/go.mod h1:cd62xD6h+ouk8k/QQFhOsjRYBSA1JJ5UVKXSIgm7Ni4=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=

68
index.html Normal file
View file

@ -0,0 +1,68 @@
<!DOCTYPE html>
<html>
<head>
<title>Paperless-GPT Verarbeiter</title>
<style>
body {
font-family: Arial, sans-serif;
padding: 50px;
}
button {
background-color: #4CAF50;
color: white;
padding: 10px 20px;
font-size: 16px;
border: none;
cursor: pointer;
border-radius: 5px;
margin-top: 20px;
}
button:hover {
background-color: #45a049;
}
h1 {
text-align: center;
}
table {
width: 100%;
border-collapse: collapse;
margin-top: 20px;
}
table, th, td {
border: 1px solid #dddddd;
}
th, td {
padding: 12px;
text-align: left;
}
th {
background-color: #f2f2f2;
}
</style>
</head>
<body>
<h1>Paperless-GPT Verarbeiter</h1>
<p>Die folgenden Dokumente werden verarbeitet:</p>
<table>
<tr>
<th>Dokument ID</th>
<th>Aktueller Titel</th>
</tr>
{{range .}}
<tr>
<td>{{.ID}}</td>
<td>{{.Title}}</td>
</tr>
{{else}}
<tr>
<td colspan="2">Keine Dokumente zu verarbeiten.</td>
</tr>
{{end}}
</table>
{{if .}}
<form action="/process" method="post">
<button type="submit">Dokumente verarbeiten</button>
</form>
{{end}}
</body>
</html>

422
main.go Normal file
View file

@ -0,0 +1,422 @@
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"strings"
"sync"
"time"
"github.com/gin-gonic/gin"
"github.com/tmc/langchaingo/llms"
"github.com/tmc/langchaingo/llms/ollama"
"github.com/tmc/langchaingo/llms/openai"
)
type GetDocumentsApiResponse struct {
Count int `json:"count"`
Next interface{} `json:"next"`
Previous interface{} `json:"previous"`
All []int `json:"all"`
Results []struct {
ID int `json:"id"`
Correspondent interface{} `json:"correspondent"`
DocumentType interface{} `json:"document_type"`
StoragePath interface{} `json:"storage_path"`
Title string `json:"title"`
Content string `json:"content"`
Tags []int `json:"tags"`
Created time.Time `json:"created"`
CreatedDate string `json:"created_date"`
Modified time.Time `json:"modified"`
Added time.Time `json:"added"`
ArchiveSerialNumber interface{} `json:"archive_serial_number"`
OriginalFileName string `json:"original_file_name"`
ArchivedFileName string `json:"archived_file_name"`
Owner int `json:"owner"`
UserCanChange bool `json:"user_can_change"`
Notes []interface{} `json:"notes"`
SearchHit struct {
Score float64 `json:"score"`
Highlights string `json:"highlights"`
NoteHighlights string `json:"note_highlights"`
Rank int `json:"rank"`
} `json:"__search_hit__"`
} `json:"results"`
}
type Document struct {
ID int `json:"id"`
Title string `json:"title"`
Content string `json:"content"`
Tags []int `json:"tags"`
SuggestedTitle string `json:"suggested_title,omitempty"`
}
var (
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
openaiAPIKey = os.Getenv("OPENAI_API_KEY")
tagToFilter = "paperless-gpt"
llmProvider = os.Getenv("LLM_PROVIDER")
llmModel = os.Getenv("LLM_MODEL")
)
func main() {
if paperlessBaseURL == "" || paperlessAPIToken == "" {
log.Fatal("Please set the PAPERLESS_BASE_URL and PAPERLESS_API_TOKEN environment variables.")
}
if llmProvider == "" || llmModel == "" {
log.Fatal("Please set the LLM_PROVIDER and LLM_MODEL environment variables.")
}
if llmProvider == "openai" && openaiAPIKey == "" {
log.Fatal("Please set the OPENAI_API_KEY environment variable for OpenAI provider.")
}
// Create a Gin router with default middleware (logger and recovery)
router := gin.Default()
// API routes
api := router.Group("/api")
{
api.GET("/documents", documentsHandler)
api.POST("/generate-suggestions", generateSuggestionsHandler)
api.PATCH("/update-documents", updateDocumentsHandler)
api.GET("/filter-tag", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"tag": tagToFilter})
})
}
// Serve static files for the frontend under /static
router.StaticFS("/assets", gin.Dir("./web-app/dist/assets", true))
router.StaticFile("/vite.svg", "./web-app/dist/vite.svg")
// Catch-all route for serving the frontend
router.NoRoute(func(c *gin.Context) {
c.File("./web-app/dist/index.html")
})
log.Println("Server started on port :8080")
if err := router.Run(":8080"); err != nil {
log.Fatalf("Failed to run server: %v", err)
}
}
// createLLM creates the appropriate LLM client based on the provider
func createLLM() (llms.Model, error) {
switch strings.ToLower(llmProvider) {
case "openai":
if openaiAPIKey == "" {
return nil, fmt.Errorf("OpenAI API key is not set")
}
return openai.New(
openai.WithModel(llmModel),
openai.WithToken(openaiAPIKey),
)
case "ollama":
return ollama.New(
ollama.WithModel(llmModel),
)
default:
return nil, fmt.Errorf("unsupported LLM provider: %s", llmProvider)
}
}
// documentsHandler returns documents with the specific tag
func documentsHandler(c *gin.Context) {
ctx := c.Request.Context()
documents, err := getDocumentsByTags(ctx, paperlessBaseURL, paperlessAPIToken, []string{tagToFilter})
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error fetching documents: %v", err)})
log.Printf("Error fetching documents: %v", err)
return
}
c.JSON(http.StatusOK, documents)
}
// generateSuggestionsHandler generates title suggestions for documents
func generateSuggestionsHandler(c *gin.Context) {
ctx := c.Request.Context()
var documents []Document
if err := c.ShouldBindJSON(&documents); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid request payload: %v", err)})
log.Printf("Invalid request payload: %v", err)
return
}
results, err := processDocuments(ctx, documents)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error processing documents: %v", err)})
log.Printf("Error processing documents: %v", err)
return
}
c.JSON(http.StatusOK, results)
}
// updateDocumentsHandler updates documents with new titles
func updateDocumentsHandler(c *gin.Context) {
ctx := c.Request.Context()
tagIDMapping, err := getIDMappingForTags(ctx, paperlessBaseURL, paperlessAPIToken, []string{tagToFilter})
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error fetching tag ID: %v", err)})
log.Printf("Error fetching tag ID: %v", err)
return
}
paperlessGptTagID := tagIDMapping[tagToFilter]
var documents []Document
if err := c.ShouldBindJSON(&documents); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid request payload: %v", err)})
log.Printf("Invalid request payload: %v", err)
return
}
err = updateDocuments(ctx, paperlessBaseURL, paperlessAPIToken, documents, paperlessGptTagID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error updating documents: %v", err)})
log.Printf("Error updating documents: %v", err)
return
}
c.Status(http.StatusOK)
}
func getIDMappingForTags(ctx context.Context, baseURL, apiToken string, tagsToFilter []string) (map[string]int, error) {
url := fmt.Sprintf("%s/api/tags/", baseURL)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", fmt.Sprintf("Token %s", apiToken))
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("Error fetching tags: %d, %s", resp.StatusCode, string(bodyBytes))
}
var tagsResponse struct {
Results []struct {
ID int `json:"id"`
Name string `json:"name"`
} `json:"results"`
}
err = json.NewDecoder(resp.Body).Decode(&tagsResponse)
if err != nil {
return nil, err
}
tagIDMapping := make(map[string]int)
for _, tag := range tagsResponse.Results {
for _, filterTag := range tagsToFilter {
if tag.Name == filterTag {
tagIDMapping[tag.Name] = tag.ID
}
}
}
return tagIDMapping, nil
}
func getDocumentsByTags(ctx context.Context, baseURL, apiToken string, tags []string) ([]Document, error) {
tagQueries := make([]string, len(tags))
for i, tag := range tags {
tagQueries[i] = fmt.Sprintf("tag:%s", tag)
}
searchQuery := strings.Join(tagQueries, " ")
url := fmt.Sprintf("%s/api/documents/?query=%s", baseURL, searchQuery)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", fmt.Sprintf("Token %s", apiToken))
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("Error searching documents: %d, %s", resp.StatusCode, string(bodyBytes))
}
var documentsResponse GetDocumentsApiResponse
err = json.NewDecoder(resp.Body).Decode(&documentsResponse)
if err != nil {
return nil, err
}
documents := make([]Document, 0, len(documentsResponse.Results))
for _, result := range documentsResponse.Results {
documents = append(documents, Document{
ID: result.ID,
Title: result.Title,
Content: result.Content,
Tags: result.Tags,
})
}
return documents, nil
}
func processDocuments(ctx context.Context, documents []Document) ([]Document, error) {
llm, err := createLLM()
if err != nil {
return nil, fmt.Errorf("failed to create LLM client: %v", err)
}
var wg sync.WaitGroup
var mu sync.Mutex
errors := make([]error, 0)
for i := range documents {
wg.Add(1)
go func(doc *Document) {
defer wg.Done()
documentID := doc.ID
log.Printf("Processing Document %v...", documentID)
content := doc.Content
if len(content) > 5000 {
content = content[:5000]
}
suggestedTitle, err := getSuggestedTitle(ctx, llm, content)
if err != nil {
mu.Lock()
errors = append(errors, fmt.Errorf("Document %d: %v", documentID, err))
mu.Unlock()
log.Printf("Error processing document %d: %v", documentID, err)
return
}
mu.Lock()
doc.SuggestedTitle = suggestedTitle
mu.Unlock()
log.Printf("Document %d processed successfully.", documentID)
}(&documents[i])
}
wg.Wait()
if len(errors) > 0 {
return nil, errors[0]
}
return documents, nil
}
func getSuggestedTitle(ctx context.Context, llm llms.Model, content string) (string, error) {
likelyLanguage, ok := os.LookupEnv("LLM_LANGUAGE")
if !ok {
likelyLanguage = "English"
} else {
likelyLanguage = strings.Title(strings.ToLower(likelyLanguage))
}
prompt := fmt.Sprintf(`I will provide you with the content of a document that has been partially read by OCR (so it may contain errors).
Your task is to find a suitable document title that I can use as the title in the paperless-ngx program.
Respond only with the title, without any additional information. The content is likely in %s.
Content:
%s
`, likelyLanguage, content)
completion, err := llm.GenerateContent(ctx, []llms.MessageContent{
{
Parts: []llms.ContentPart{
llms.TextContent{
Text: prompt,
},
},
Role: llms.ChatMessageTypeHuman,
},
})
if err != nil {
return "", fmt.Errorf("Error getting response from LLM: %v", err)
}
return strings.TrimSpace(strings.Trim(completion.Choices[0].Content, "\"")), nil
}
func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []Document, paperlessGptTagID int) error {
client := &http.Client{}
for _, document := range documents {
documentID := document.ID
updatedFields := make(map[string]interface{})
newTags := []int{}
for _, tagID := range document.Tags {
if tagID != paperlessGptTagID {
newTags = append(newTags, tagID)
}
}
updatedFields["tags"] = newTags
suggestedTitle := document.SuggestedTitle
if len(suggestedTitle) > 128 {
suggestedTitle = suggestedTitle[:128]
}
updatedFields["title"] = suggestedTitle
url := fmt.Sprintf("%s/api/documents/%d/", baseURL, documentID)
jsonData, err := json.Marshal(updatedFields)
if err != nil {
log.Printf("Error marshalling JSON for document %d: %v", documentID, err)
return err
}
req, err := http.NewRequestWithContext(ctx, "PATCH", url, bytes.NewBuffer(jsonData))
if err != nil {
log.Printf("Error creating request for document %d: %v", documentID, err)
return err
}
req.Header.Set("Authorization", fmt.Sprintf("Token %s", apiToken))
req.Header.Set("Content-Type", "application/json")
resp, err := client.Do(req)
if err != nil {
log.Printf("Error updating document %d: %v", documentID, err)
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
log.Printf("Error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
return fmt.Errorf("Error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
}
log.Printf("Document %d updated successfully.", documentID)
}
return nil
}

52
result.html Normal file
View file

@ -0,0 +1,52 @@
<!DOCTYPE html>
<html>
<head>
<title>Verarbeitung abgeschlossen</title>
<style>
body {
font-family: Arial, sans-serif;
padding: 50px;
}
h1 {
text-align: center;
}
table {
width: 100%;
border-collapse: collapse;
margin-top: 20px;
}
table, th, td {
border: 1px solid #dddddd;
}
th, td {
padding: 12px;
text-align: left;
}
th {
background-color: #f2f2f2;
}
.success {
color: green;
}
</style>
</head>
<body>
<h1>Verarbeitung abgeschlossen</h1>
<p>Hier ist der Vorher-Nachher-Vergleich der Dokumenttitel:</p>
<table>
<tr>
<th>Dokument ID</th>
<th>Alter Titel</th>
<th>Neuer Titel</th>
</tr>
{{range .}}
<tr>
<td>{{.ID}}</td>
<td>{{.Title}}</td>
<td>{{.SuggestedTitle}}</td>
</tr>
{{end}}
</table>
<p><a href="/">Zurück zur Hauptseite</a></p>
</body>
</html>

24
web-app/.gitignore vendored Normal file
View file

@ -0,0 +1,24 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

50
web-app/README.md Normal file
View file

@ -0,0 +1,50 @@
# React + TypeScript + Vite
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
Currently, two official plugins are available:
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
## Expanding the ESLint configuration
If you are developing a production application, we recommend updating the configuration to enable type aware lint rules:
- Configure the top-level `parserOptions` property like this:
```js
export default tseslint.config({
languageOptions: {
// other options...
parserOptions: {
project: ['./tsconfig.node.json', './tsconfig.app.json'],
tsconfigRootDir: import.meta.dirname,
},
},
})
```
- Replace `tseslint.configs.recommended` to `tseslint.configs.recommendedTypeChecked` or `tseslint.configs.strictTypeChecked`
- Optionally add `...tseslint.configs.stylisticTypeChecked`
- Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and update the config:
```js
// eslint.config.js
import react from 'eslint-plugin-react'
export default tseslint.config({
// Set the react version
settings: { react: { version: '18.3' } },
plugins: {
// Add the react plugin
react,
},
rules: {
// other rules...
// Enable its recommended rules
...react.configs.recommended.rules,
...react.configs['jsx-runtime'].rules,
},
})
```

28
web-app/eslint.config.js Normal file
View file

@ -0,0 +1,28 @@
import js from '@eslint/js'
import globals from 'globals'
import reactHooks from 'eslint-plugin-react-hooks'
import reactRefresh from 'eslint-plugin-react-refresh'
import tseslint from 'typescript-eslint'
export default tseslint.config(
{ ignores: ['dist'] },
{
extends: [js.configs.recommended, ...tseslint.configs.recommended],
files: ['**/*.{ts,tsx}'],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
},
plugins: {
'react-hooks': reactHooks,
'react-refresh': reactRefresh,
},
rules: {
...reactHooks.configs.recommended.rules,
'react-refresh/only-export-components': [
'warn',
{ allowConstantExport: true },
],
},
},
)

13
web-app/index.html Normal file
View file

@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Paperless GPT</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

4008
web-app/package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

35
web-app/package.json Normal file
View file

@ -0,0 +1,35 @@
{
"name": "paperless-gpt",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc -b && vite build",
"lint": "eslint .",
"preview": "vite preview"
},
"dependencies": {
"@headlessui/react": "^2.1.8",
"@heroicons/react": "^2.1.5",
"axios": "^1.7.7",
"react": "^18.3.1",
"react-dom": "^18.3.1"
},
"devDependencies": {
"@eslint/js": "^9.9.0",
"@types/react": "^18.3.3",
"@types/react-dom": "^18.3.0",
"@vitejs/plugin-react-swc": "^3.5.0",
"autoprefixer": "^10.4.20",
"eslint": "^9.9.0",
"eslint-plugin-react-hooks": "^5.1.0-rc.0",
"eslint-plugin-react-refresh": "^0.4.9",
"globals": "^15.9.0",
"postcss": "^8.4.47",
"tailwindcss": "^3.4.12",
"typescript": "^5.5.3",
"typescript-eslint": "^8.0.1",
"vite": "^5.4.1"
}
}

View file

@ -0,0 +1,6 @@
export default {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
}

1
web-app/public/vite.svg Normal file
View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

42
web-app/src/App.css Normal file
View file

@ -0,0 +1,42 @@
#root {
max-width: 1280px;
margin: 0 auto;
padding: 2rem;
text-align: center;
}
.logo {
height: 6em;
padding: 1.5em;
will-change: filter;
transition: filter 300ms;
}
.logo:hover {
filter: drop-shadow(0 0 2em #646cffaa);
}
.logo.react:hover {
filter: drop-shadow(0 0 2em #61dafbaa);
}
@keyframes logo-spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
@media (prefers-reduced-motion: no-preference) {
a:nth-of-type(2) .logo {
animation: logo-spin infinite 20s linear;
}
}
.card {
padding: 2em;
}
.read-the-docs {
color: #888;
}

13
web-app/src/App.tsx Normal file
View file

@ -0,0 +1,13 @@
import React from 'react';
import DocumentProcessor from './components/DocumentProcessor';
import './index.css';
const App: React.FC = () => {
return (
<div className="App">
<DocumentProcessor />
</div>
);
};
export default App;

View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>

After

Width:  |  Height:  |  Size: 4 KiB

View file

@ -0,0 +1,346 @@
import {
Dialog,
DialogTitle,
Transition,
TransitionChild,
} from "@headlessui/react";
import { ArrowPathIcon, CheckCircleIcon } from "@heroicons/react/24/outline";
import axios from "axios";
import React, { Fragment, useEffect, useState } from "react";
interface Document {
id: number;
title: string;
content: string;
suggested_title?: string;
}
const DocumentProcessor: React.FC = () => {
const [documents, setDocuments] = useState<Document[]>([]);
const [loading, setLoading] = useState<boolean>(true);
const [processing, setProcessing] = useState<boolean>(false);
const [updating, setUpdating] = useState<boolean>(false);
const [successModalOpen, setSuccessModalOpen] = useState<boolean>(false);
const [filterTag, setFilterTag] = useState<string | undefined>(undefined);
const fetchFilterTag = async () => {
try {
const response = await axios.get("/api/filter-tag"); // API endpoint to fetch filter tag
setFilterTag(response.data?.tag);
} catch (error) {
console.error("Error fetching filter tag:", error);
}
}
const fetchDocuments = async () => {
try {
const response = await axios.get("/api/documents"); // API endpoint to fetch documents
setDocuments(response.data);
} catch (error) {
console.error("Error fetching documents:", error);
} finally {
setLoading(false);
}
};
useEffect(() => {
fetchFilterTag();
fetchDocuments();
}, []);
const handleProcessDocuments = async () => {
setProcessing(true);
try {
const response = await axios.post("/api/generate-suggestions", documents);
setDocuments(response.data);
} catch (error) {
console.error("Error generating suggestions:", error);
} finally {
setProcessing(false);
}
};
const handleUpdateDocuments = async () => {
setUpdating(true);
try {
await axios.patch("/api/update-documents", documents);
setSuccessModalOpen(true);
} catch (error) {
console.error("Error updating documents:", error);
} finally {
setUpdating(false);
}
};
const resetSuggestions = () => {
const resetDocs = documents.map((doc) => ({
...doc,
suggested_title: undefined,
}));
setDocuments(resetDocs);
};
// while no documents are found we keep refreshing in the background
useEffect(() => {
if (documents.length === 0) {
const interval = setInterval(() => {
fetchDocuments();
}, 500);
return () => clearInterval(interval);
}
}, [documents]);
if (loading) {
return (
<div className="flex items-center justify-center h-screen">
<div className="text-xl font-semibold">Loading documents...</div>
</div>
);
}
return (
<div className="max-w-5xl mx-auto p-6">
<h1 className="text-4xl font-bold mb-8 text-center text-gray-800">
Paperless GPT
</h1>
{/* Handle empty documents with reload button */}
{documents.length === 0 && (
<div className="flex items-center justify-center h-screen">
<div className="text-xl font-semibold">
No documents found with filter tag{" "}
<span className="bg-blue-100 text-blue-800 text-sm font-medium me-2 px-2.5 py-0.5 rounded dark:bg-blue-900 dark:text-blue-300bg-blue-100 text-blue-800 text-xs font-medium me-2 px-2.5 py-0.5 rounded-full dark:bg-blue-900 dark:text-blue-300">{filterTag}</span>
{" "}found. Try{" "}
<button
onClick={() => {
setDocuments([]);
setLoading(true);
fetchDocuments();
}}
className="text-blue-600 hover:underline focus:outline-none"
>
reloading <ArrowPathIcon className="h-5 w-5 inline" />
</button>
.
</div>
</div>
)}
{/* Step 1: Document Preview */}
{!documents.some((doc) => doc.suggested_title) && (
<div className="space-y-6">
<div className="flex justify-between items-center">
<h2 className="text-2xl font-semibold text-gray-700">
Documents to Process
</h2>
<button
onClick={() => {
setDocuments([]);
setLoading(true);
fetchDocuments();
}}
disabled={processing}
className="bg-blue-600 text-white px-4 py-2 rounded hover:bg-blue-700 focus:outline-none"
>
<ArrowPathIcon className="h-5 w-5 inline" />
</button>
<button
onClick={handleProcessDocuments}
disabled={processing}
className="bg-blue-600 text-white px-4 py-2 rounded hover:bg-blue-700 focus:outline-none"
>
{processing ? "Processing..." : "Generate Suggestions"}
</button>
</div>
<div className="bg-white shadow rounded-md overflow-x-auto">
<table className="min-w-full divide-y divide-gray-200">
<thead className="bg-gray-50">
<tr>
<th className="px-6 py-3 text-left text-sm font-medium text-gray-500">
ID
</th>
<th className="px-6 py-3 text-left text-sm font-medium text-gray-500">
Title
</th>
</tr>
</thead>
<tbody className="bg-white divide-y divide-gray-200">
{documents.map((doc) => (
<tr key={doc.id}>
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
{doc.id}
</td>
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
{doc.title}
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)}
{/* Step 2: Review Suggestions */}
{documents.some((doc) => doc.suggested_title) && (
<div className="space-y-6">
<h2 className="text-2xl font-semibold text-gray-700">
Review and Edit Suggested Titles
</h2>
<div className="bg-white shadow rounded-md overflow-x-auto">
<table className="min-w-full divide-y divide-gray-200">
<thead className="bg-gray-50">
<tr>
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
ID
</th>
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
Original Title
</th>
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
Suggested Title
</th>
</tr>
</thead>
<tbody className="bg-white divide-y divide-gray-200">
{documents.map(
(doc) =>
doc.suggested_title && (
<tr key={doc.id}>
<td className="px-4 py-3 text-sm text-gray-500">
{doc.id}
</td>
<td className="px-4 py-3 text-sm text-gray-900">
{doc.title}
</td>
<td className="px-4 py-3 text-sm text-gray-900">
<input
type="text"
value={doc.suggested_title}
onChange={(e) => {
const updatedDocuments = documents.map((d) =>
d.id === doc.id
? { ...d, suggested_title: e.target.value }
: d
);
setDocuments(updatedDocuments);
}}
className="w-full border border-gray-300 rounded px-2 py-1 focus:outline-none focus:ring-2 focus:ring-blue-500"
/>
</td>
</tr>
)
)}
</tbody>
</table>
</div>
<div className="flex justify-end space-x-4">
<button
onClick={resetSuggestions}
className="bg-gray-200 text-gray-700 px-4 py-2 rounded hover:bg-gray-300 focus:outline-none"
>
Back
</button>
<button
onClick={handleUpdateDocuments}
disabled={updating}
className={`${
updating ? "bg-green-400" : "bg-green-600 hover:bg-green-700"
} text-white px-4 py-2 rounded focus:outline-none`}
>
{updating ? "Updating..." : "Apply Suggestions"}
</button>
</div>
</div>
)}
{/* Success Modal */}
<Transition show={successModalOpen} as={Fragment}>
<Dialog
as="div"
static
className="fixed z-10 inset-0 overflow-y-auto"
open={successModalOpen}
onClose={setSuccessModalOpen}
>
<div className="flex items-end justify-center min-h-screen pt-4 px-4 pb-20 text-center sm:block sm:p-0">
{/* Background overlay */}
<TransitionChild
as="div"
enter="ease-out duration-300"
enterFrom="opacity-0"
enterTo="opacity-100"
leave="ease-in duration-200"
leaveFrom="opacity-100"
leaveTo="opacity-0"
>
<div className="fixed inset-0 bg-gray-500 bg-opacity-75 transition-opacity" />
</TransitionChild>
{/* Centering trick */}
<span
className="hidden sm:inline-block sm:align-middle sm:h-screen"
aria-hidden="true"
>
&#8203;
</span>
{/* Modal content */}
<TransitionChild
as={Fragment}
enter="ease-out duration-300"
enterFrom="opacity-0 translate-y-4 sm:translate-y-0 sm:scale-95"
enterTo="opacity-100 translate-y-0 sm:scale-100"
leave="ease-in duration-200"
leaveFrom="opacity-100 translate-y-0 sm:scale-100"
leaveTo="opacity-0 translate-y-4 sm:translate-y-0 sm:scale-95"
>
<div className="inline-block align-bottom bg-white rounded-lg px-6 pt-5 pb-4 text-left overflow-hidden shadow-xl transform transition-all sm:align-middle sm:max-w-lg sm:w-full sm:p-6">
<div className="sm:flex sm:items-start">
<div className="mx-auto flex-shrink-0 flex items-center justify-center h-12 w-12 rounded-full bg-green-100 sm:mx-0 sm:h-12 sm:w-12">
<CheckCircleIcon
className="h-6 w-6 text-green-600"
aria-hidden="true"
/>
</div>
<div className="mt-3 text-center sm:mt-0 sm:ml-4 sm:text-left">
<DialogTitle
as="h3"
className="text-lg leading-6 font-medium text-gray-900"
>
Documents Updated
</DialogTitle>
<div className="mt-2">
<p className="text-sm text-gray-500">
The documents have been successfully updated with the
new titles.
</p>
</div>
</div>
</div>
<div className="mt-5 sm:mt-4 sm:flex sm:flex-row-reverse">
<button
onClick={() => {
setSuccessModalOpen(false);
// Optionally reset or fetch documents again
setDocuments([]);
setLoading(true);
axios.get("/api/documents").then((response) => {
setDocuments(response.data);
setLoading(false);
});
}}
className="w-full inline-flex justify-center rounded-md border border-transparent shadow-sm px-4 py-2 bg-green-600 text-base font-medium text-white hover:bg-green-700 focus:outline-none sm:ml-3 sm:w-auto sm:text-sm"
>
OK
</button>
</div>
</div>
</TransitionChild>
</div>
</Dialog>
</Transition>
</div>
);
};
export default DocumentProcessor;

3
web-app/src/index.css Normal file
View file

@ -0,0 +1,3 @@
@tailwind base;
@tailwind components;
@tailwind utilities;

10
web-app/src/main.tsx Normal file
View file

@ -0,0 +1,10 @@
import { StrictMode } from 'react'
import { createRoot } from 'react-dom/client'
import App from './App.tsx'
import './index.css'
createRoot(document.getElementById('root')!).render(
<StrictMode>
<App />
</StrictMode>,
)

1
web-app/src/vite-env.d.ts vendored Normal file
View file

@ -0,0 +1 @@
/// <reference types="vite/client" />

View file

@ -0,0 +1,8 @@
/** @type {import('tailwindcss').Config} */
module.exports = {
content: ['./index.html', './src/**/*.{js,ts,jsx,tsx}'],
theme: {
extend: {},
},
plugins: [],
};

24
web-app/tsconfig.app.json Normal file
View file

@ -0,0 +1,24 @@
{
"compilerOptions": {
"target": "ES2020",
"useDefineForClassFields": true,
"lib": ["ES2020", "DOM", "DOM.Iterable"],
"module": "ESNext",
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"isolatedModules": true,
"moduleDetection": "force",
"noEmit": true,
"jsx": "react-jsx",
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true
},
"include": ["src"]
}

View file

@ -0,0 +1 @@
{"root":["./src/app.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/documentprocessor.tsx"],"version":"5.6.2"}

7
web-app/tsconfig.json Normal file
View file

@ -0,0 +1,7 @@
{
"files": [],
"references": [
{ "path": "./tsconfig.app.json" },
{ "path": "./tsconfig.node.json" }
]
}

View file

@ -0,0 +1,22 @@
{
"compilerOptions": {
"target": "ES2022",
"lib": ["ES2023"],
"module": "ESNext",
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"isolatedModules": true,
"moduleDetection": "force",
"noEmit": true,
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true
},
"include": ["vite.config.ts"]
}

View file

@ -0,0 +1 @@
{"root":["./vite.config.ts"],"version":"5.6.2"}

15
web-app/vite.config.ts Normal file
View file

@ -0,0 +1,15 @@
import react from '@vitejs/plugin-react-swc';
import { defineConfig } from 'vite';
export default defineConfig({
plugins: [react()],
server: {
proxy: {
'/api': {
target: 'http://localhost:8080', // Ihr Go-Webservice
changeOrigin: true,
// rewrite: (path) => path.replace(/^\/api/, ''), // Entfernen Sie '/api' aus dem Pfad
},
},
},
});