Compare commits

..

No commits in common. "main" and "v0.3.0" have entirely different histories.
main ... v0.3.0

72 changed files with 1495 additions and 10689 deletions

View file

@ -1,7 +1,2 @@
.env
Dockerfile
web-app/e2e
web-app/node_modules
web-app/playwright_report
web-app/test-results
.github
Dockerfile

1
.github/FUNDING.yml vendored
View file

@ -1 +0,0 @@
github: icereed

View file

@ -1,9 +1,5 @@
name: Build and Push Docker Images
permissions:
pull-requests: write
contents: read
on:
push:
branches:
@ -19,242 +15,77 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v3
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version: 1.22
- name: Install dependencies
run: go mod download
- name: Run Go tests
run: go test ./...
- name: Set up Node.js
uses: actions/setup-node@v4
uses: actions/setup-node@v3
with:
node-version: 20
- name: Cache npm dependencies
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-
- name: Install frontend dependencies
run: npm install
working-directory: web-app
- name: Build frontend
run: npm run build && cp -r dist/ ../dist/
working-directory: web-app
- name: Run frontend tests
run: npm test
working-directory: web-app
- name: Install mupdf
run: sudo apt-get install -y mupdf
- name: Set library path
run: echo "/usr/lib" | sudo tee -a /etc/ld.so.conf.d/mupdf.conf && sudo ldconfig
- name: Install dependencies
run: go mod download
- name: Run Go tests
run: go test ./...
build-amd64:
build-and-push:
runs-on: ubuntu-latest
needs: test
outputs:
digest: ${{ steps.build_amd64.outputs.digest }}
image_tag: ${{ steps.set_image_tag.outputs.image_tag }}
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@v2
- name: Log in to Docker Hub
uses: docker/login-action@v3
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set Docker tags
id: set_tags
run: |
if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
echo "TAGS=icereed/paperless-gpt:pr-${GITHUB_SHA}-amd64" >> $GITHUB_ENV
echo "TAGS=icereed/paperless-gpt:unreleased" >> $GITHUB_ENV
if [[ "${GITHUB_REF}" == "refs/heads/main" ]]; then
echo "TAGS=icereed/paperless-gpt:unreleased" >> $GITHUB_ENV
elif [[ "${GITHUB_REF_TYPE}" == "tag" ]]; then
VERSION=${GITHUB_REF#refs/tags/}
echo "TAGS=icereed/paperless-gpt:${VERSION}-amd64" >> $GITHUB_ENV
else
echo "TAGS=icereed/paperless-gpt:unreleased-amd64" >> $GITHUB_ENV
echo "TAGS=icereed/paperless-gpt:latest,icereed/paperless-gpt:${VERSION}" >> $GITHUB_ENV
fi
- name: Build and push AMD64 image
id: build_amd64
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
tags: ${{ env.TAGS }}
build-args: |
VERSION=${{ github.ref_type == 'tag' && github.ref_name || github.sha }}
COMMIT=${{ github.sha }}
BUILD_DATE=${{ github.event.repository.pushed_at }}
- name: Set image tag output
id: set_image_tag
run: echo "image_tag=${TAGS}" >> $GITHUB_OUTPUT
- name: Export digest for amd64
run: |
mkdir -p ${{ runner.temp }}/digests
echo "${{ steps.build_amd64.outputs.digest }}" | sed 's/^sha256://g' > ${{ runner.temp }}/digests/digest-amd64.txt
- name: Upload amd64 digest
uses: actions/upload-artifact@v4
with:
name: digest-amd64
path: ${{ runner.temp }}/digests/digest-amd64.txt
build-arm64:
runs-on: ubuntu-24.04-arm
needs: test
outputs:
digest: ${{ steps.build_arm64.outputs.digest }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Docker Hub
if: ${{ github.event_name != 'pull_request' }}
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Set Docker tags
id: set_tags
run: |
if [[ "${GITHUB_REF_TYPE}" == "tag" ]]; then
VERSION=${GITHUB_REF#refs/tags/}
echo "TAGS=icereed/paperless-gpt:${VERSION}-arm64" >> $GITHUB_ENV
else
echo "TAGS=icereed/paperless-gpt:unreleased-arm64" >> $GITHUB_ENV
fi
- name: Build and push ARM64 image
id: build_arm64
uses: docker/build-push-action@v6
- name: Build and push Docker images
id: docker_build
uses: docker/build-push-action@v4
with:
context: .
platforms: linux/arm64
platforms: linux/amd64,linux/arm64
push: ${{ github.event_name != 'pull_request' }}
cache-from: type=gha
cache-to: type=gha,mode=max
tags: ${{ env.TAGS }}
build-args: |
VERSION=${{ github.ref_type == 'tag' && github.ref_name || github.sha }}
COMMIT=${{ github.sha }}
BUILD_DATE=${{ github.event.repository.pushed_at }}
- name: Export digest for arm64
run: |
mkdir -p ${{ runner.temp }}/digests
echo "${{ steps.build_arm64.outputs.digest }}" | sed 's/^sha256://g' > ${{ runner.temp }}/digests/digest-arm64.txt
- name: Upload arm64 digest
uses: actions/upload-artifact@v4
with:
name: digest-arm64
path: ${{ runner.temp }}/digests/digest-arm64.txt
merge-manifests:
needs: [build-amd64, build-arm64]
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
env:
DOCKERHUB_REPO: icereed/paperless-gpt
steps:
- name: Download amd64 digest
uses: actions/download-artifact@v4
with:
name: digest-amd64
path: ${{ runner.temp }}/digests
- name: Download arm64 digest
uses: actions/download-artifact@v4
with:
name: digest-arm64
path: ${{ runner.temp }}/digests
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Determine version/tag
id: get_version
run: |
if [[ "${GITHUB_REF_TYPE}" == "tag" ]]; then
VERSION=${GITHUB_REF#refs/tags/}
echo "VERSION=${VERSION}" >> $GITHUB_ENV
else
echo "VERSION=unreleased" >> $GITHUB_ENV
fi
- name: Create and push manifest list
run: |
AMD64_DIGEST=$(cat ${{ runner.temp }}/digests/digest-amd64.txt)
ARM64_DIGEST=$(cat ${{ runner.temp }}/digests/digest-arm64.txt)
# Create manifest with the single-arch image digests
docker buildx imagetools create -t ${DOCKERHUB_REPO}:${VERSION} \
${DOCKERHUB_REPO}@sha256:${AMD64_DIGEST} ${DOCKERHUB_REPO}@sha256:${ARM64_DIGEST}
# Also push "latest" tag when on a tag
if [[ "${GITHUB_REF_TYPE}" == "tag" ]]; then
docker buildx imagetools create -t ${DOCKERHUB_REPO}:latest \
${DOCKERHUB_REPO}@sha256:${AMD64_DIGEST} ${DOCKERHUB_REPO}@sha256:${ARM64_DIGEST}
fi
- name: Inspect manifest
run: |
docker buildx imagetools inspect ${DOCKERHUB_REPO}:${VERSION}
if [[ "${GITHUB_REF_TYPE}" == "tag" ]]; then
docker buildx imagetools inspect ${DOCKERHUB_REPO}:latest
fi
e2e-tests:
name: E2E Tests
needs: build-amd64
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./web-app
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set PAPERLESS_GPT_IMAGE
run: |
if [ "${GITHUB_EVENT_NAME}" = "pull_request" ]; then
IMAGE="icereed/paperless-gpt:pr-${GITHUB_SHA}-amd64"
elif [ "${GITHUB_REF_TYPE}" = "tag" ]; then
IMAGE="icereed/paperless-gpt:${GITHUB_REF_NAME}-amd64"
else
IMAGE="icereed/paperless-gpt:unreleased-amd64"
fi
echo "PAPERLESS_GPT_IMAGE=${IMAGE}" >> $GITHUB_ENV
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '22'
cache: 'npm'
cache-dependency-path: './web-app/package-lock.json'
- name: Install dependencies
run: npm ci
- name: Install Playwright browsers
run: npx playwright install chromium --with-deps
- name: Run Playwright tests
run: npm run test:e2e
env:
CI: true
DEBUG: testcontainers:containers
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PAPERLESS_GPT_IMAGE: ${{ env.PAPERLESS_GPT_IMAGE }}
- name: Upload Playwright Report
if: always()
uses: actions/upload-artifact@v4
with:
name: playwright-report
path: web-app/playwright-report/
retention-days: 30
- name: Upload test screenshots
if: always()
uses: actions/upload-artifact@v4
with:
name: test-results
path: web-app/test-results/
retention-days: 30
tags: ${{ env.TAGS }}

9
.gitignore vendored
View file

@ -1,10 +1,3 @@
.env
.DS_Store
prompts/
tests/tmp
tmp/
db/
web-app/playwright-report/
web-app/test-results/.last-run.json
web-app/test-results
dist/
prompts/

View file

@ -1,132 +0,0 @@
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or advances of
any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official email address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at <github@icereed.net>.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series of
actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within the
community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
[https://www.contributor-covenant.org/translations][translations].
[homepage]: https://www.contributor-covenant.org
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations

View file

@ -1,206 +0,0 @@
# Contributing to paperless-gpt
Thank you for considering contributing to **paperless-gpt**! We welcome contributions of all kinds, including bug reports, feature requests, documentation improvements, and code contributions. By participating in this project, you agree to abide by our [Code of Conduct](#code-of-conduct).
## Table of Contents
- [Contributing to paperless-gpt](#contributing-to-paperless-gpt)
- [Table of Contents](#table-of-contents)
- [Code of Conduct](#code-of-conduct)
- [How Can I Contribute?](#how-can-i-contribute)
- [Reporting Bugs](#reporting-bugs)
- [Suggesting Enhancements](#suggesting-enhancements)
- [Submitting Pull Requests](#submitting-pull-requests)
- [Development Setup](#development-setup)
- [Prerequisites](#prerequisites)
- [Backend Setup](#backend-setup)
- [Frontend Setup](#frontend-setup)
- [Coding Guidelines](#coding-guidelines)
- [Style Guidelines](#style-guidelines)
- [Testing](#testing)
- [Documentation](#documentation)
- [Communication](#communication)
- [License](#license)
---
## Code of Conduct
This project and everyone participating in it is governed by the [Contributor Covenant Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to the project maintainer.
## How Can I Contribute?
### Reporting Bugs
If you find a bug, please open an issue on GitHub. Before doing so, please check if the issue has already been reported.
- **Use a clear and descriptive title** for the issue.
- **Describe the steps to reproduce the bug**.
- **Include any relevant logs, screenshots, or code snippets**.
- **Provide information about your environment** (OS, Docker version, LLM provider, etc.).
### Suggesting Enhancements
We appreciate new ideas and enhancements.
- **Search existing issues** to see if your idea has already been discussed.
- **Open a new issue** with a descriptive title.
- **Provide a detailed description** of the enhancement and its benefits.
### Submitting Pull Requests
We welcome pull requests (PRs). Please follow these guidelines:
1. **Fork the repository** and create your branch from `main`.
2. **Ensure your code follows** the [Coding Guidelines](#coding-guidelines).
3. **Write clear commit messages** following the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification.
4. **Test your changes** thoroughly.
5. **Update documentation** if necessary.
6. **Submit a pull request** and provide a clear description of your changes.
7. **Link related issues** in your PR description.
## Development Setup
### Prerequisites
- **Go** (version 1.20 or later)
- **Node.js** (version 18 or later)
- **npm** (comes with Node.js)
- **Docker** and **Docker Compose**
### Backend Setup
1. **Clone the repository**:
```bash
git clone https://github.com/icereed/paperless-gpt.git
cd paperless-gpt
```
2. **Set environment variables**:
- Create a `.env` file in the project root.
- Set the required environment variables as per the [README](README.md).
3. **Install Go dependencies**:
```bash
go mod download
```
4. **Run the backend server**:
```bash
mkdir dist
touch dist/index.html
go build
./paperless-gpt
```
5. **Run the backend server with frontend built in**:
```bash
cd web-app && npm install && npm run build && cp -r dist ..
go build
./paperless-gpt
```
### Frontend Setup
1. **Navigate to the frontend directory**:
```bash
cd web-app
```
2. **Install Node.js dependencies**:
```bash
npm install
```
3. **Start the frontend development server**:
```bash
npm run dev
```
The application should now be accessible at `http://localhost:8080`.
## Coding Guidelines
- **Languages**: Go for the backend, TypeScript with React for the frontend.
- **Formatting**:
- Use `gofmt` or `goimports` for Go code.
- Use Prettier and ESLint for frontend code (`npm run lint`).
- **Code Structure**:
- Keep code modular and reusable.
- Write clear and concise code with comments where necessary.
- **Dependencies**:
- Manage Go dependencies with `go mod`.
- Manage frontend dependencies with `npm`.
## Style Guidelines
- **Commit Messages**:
- Follow the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) format.
- Examples:
- `feat: add support for custom server-side prompts`
- `fix: resolve API pagination issue for tags`
- Use the imperative mood in the subject line.
- **Branch Naming**:
- Use descriptive names:
- `feat/your-feature-name`
- `fix/issue-number-description`
- `docs/update-readme`
- **Pull Requests**:
- Keep PRs focused; avoid unrelated changes.
- Provide a detailed description of your changes.
- Reference any related issues (`Closes #123`).
## Testing
- **Backend Tests**:
- Write unit tests using Go's `testing` and `github.com/stretchr/testify/assert` packages.
- Run tests with `go test ./...`.
- **Frontend Tests**:
- Use testing libraries like Jest and React Testing Library.
- Run tests with `npm run test`.
- **Continuous Integration**:
- Ensure all tests pass before submitting a PR.
## Documentation
- **Update Documentation**:
- Update the [README](README.md) and other relevant docs for any user-facing changes.
- Include usage examples and configuration instructions.
- **Comment Your Code**:
- Use clear and descriptive comments for complex logic.
- Document exported functions and methods in Go.
## Communication
- **GitHub Issues**: Use for bug reports, feature requests, and questions.
- **Discussions**: Engage in discussions for broader topics.
- **Contact Maintainer**: For sensitive matters, contact the maintainer via email.
## License
By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).
---
Thank you for your interest in contributing to paperless-gpt! We value your input and look forward to your contributions.

View file

@ -1,18 +1,29 @@
# Define top-level build arguments
ARG VERSION=docker-dev
ARG COMMIT=unknown
ARG BUILD_DATE=unknown
# Stage 1: Build Vite frontend
FROM node:22-alpine AS frontend
# Stage 1: Build the Go binary
FROM golang:1.22 AS builder
# Set the working directory inside the container
WORKDIR /app
# Install necessary packages
RUN apk add --no-cache git
# Copy go.mod and go.sum files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy the rest of the application code
COPY . .
# Build the Go binary
RUN CGO_ENABLED=0 GOOS=linux go build -o paperless-gpt .
# Stage 2: Build Vite frontend
FROM node:20 AS frontend
# Set the working directory inside the container
WORKDIR /app
# Copy package.json and package-lock.json
COPY web-app/package.json web-app/package-lock.json ./
# Install dependencies
@ -24,80 +35,23 @@ COPY web-app /app/
# Build the frontend
RUN npm run build
# Stage 2: Build the Go binary
FROM golang:1.24.1-alpine3.21 AS builder
# Stage 3: Create a lightweight image with the Go binary
FROM alpine:latest
# Install necessary CA certificates
RUN apk --no-cache add ca-certificates
# Set the working directory inside the container
WORKDIR /app
# Package versions for Renovate
# renovate: datasource=repology depName=alpine_3_21/gcc versioning=loose
ENV GCC_VERSION="14.2.0-r4"
# renovate: datasource=repology depName=alpine_3_21/musl-dev versioning=loose
ENV MUSL_DEV_VERSION="1.2.5-r9"
# renovate: datasource=repology depName=alpine_3_21/mupdf versioning=loose
ENV MUPDF_VERSION="1.24.10-r0"
# renovate: datasource=repology depName=alpine_3_21/mupdf-dev versioning=loose
ENV MUPDF_DEV_VERSION="1.24.10-r0"
# renovate: datasource=repology depName=alpine_3_21/sed versioning=loose
ENV SED_VERSION="4.9-r2"
# Install necessary packages with pinned versions
RUN apk add --no-cache \
"gcc=${GCC_VERSION}" \
"musl-dev=${MUSL_DEV_VERSION}" \
"mupdf=${MUPDF_VERSION}" \
"mupdf-dev=${MUPDF_DEV_VERSION}" \
"sed=${SED_VERSION}"
# Copy go.mod and go.sum files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Pre-compile go-sqlite3 to avoid doing this every time
RUN CGO_ENABLED=1 go build -tags musl -o /dev/null github.com/mattn/go-sqlite3
# Copy the frontend build
COPY --from=frontend /app/dist /app/web-app/dist
# Copy the Go source files
COPY *.go .
COPY ocr ./ocr
# Import ARGs from top level
ARG VERSION
ARG COMMIT
ARG BUILD_DATE
# Update version information
RUN sed -i \
-e "s/devVersion/${VERSION}/" \
-e "s/devBuildDate/${BUILD_DATE}/" \
-e "s/devCommit/${COMMIT}/" \
version.go
# Build the binary using caching for both go modules and build cache
RUN CGO_ENABLED=1 GOMAXPROCS=$(nproc) go build -tags musl -o paperless-gpt .
# Stage 3: Create a lightweight image with just the binary
FROM alpine:3.21.3
ENV GIN_MODE=release
# Install necessary runtime dependencies
RUN apk add --no-cache \
ca-certificates
# Set the working directory inside the container
WORKDIR /app/
WORKDIR /root/
# Copy the Go binary from the builder stage
COPY --from=builder /app/paperless-gpt .
# Copy the frontend build
COPY --from=frontend /app/dist /root/web-app/dist
# Expose the port the app runs on
EXPOSE 8080
# Command to run the binary
CMD ["/app/paperless-gpt"]
CMD ["./paperless-gpt"]

696
README.md
View file

@ -1,173 +1,111 @@
# paperless-gpt
[![License](https://img.shields.io/github/license/icereed/paperless-gpt)](LICENSE)
[![Discord Banner](https://img.shields.io/badge/Join%20us%20on-Discord-blue?logo=discord)](https://discord.gg/fJQppDH2J7)
[![Docker Pulls](https://img.shields.io/docker/pulls/icereed/paperless-gpt)](https://hub.docker.com/r/icereed/paperless-gpt)
[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](CODE_OF_CONDUCT.md)
![Screenshot](./paperless-gpt-screenshot.png)
**paperless-gpt** seamlessly pairs with [paperless-ngx][paperless-ngx] to generate **AI-powered document titles** and **tags**, saving you hours of manual sorting. While other tools may offer AI chat features, **paperless-gpt** stands out by **supercharging OCR with LLMs**-ensuring high accuracy, even with tricky scans. If youre craving next-level text extraction and effortless document organization, this is your solution.
**paperless-gpt** is a tool designed to generate accurate and meaningful document titles and tags for [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx) using Large Language Models (LLMs). It supports multiple LLM providers, including **OpenAI** and **Ollama**. With paperless-gpt, you can streamline your document management by automatically suggesting appropriate titles and tags based on the content of your scanned documents.
https://github.com/user-attachments/assets/bd5d38b9-9309-40b9-93ca-918dfa4f3fd4
[![Demo](./demo.gif)](./demo.gif)
---
## Features
## Key Highlights
1. **LLM-Enhanced OCR**
Harness Large Language Models (OpenAI or Ollama) for **better-than-traditional** OCR—turn messy or low-quality scans into context-aware, high-fidelity text.
2. **Use specialized AI OCR services**
- **LLM OCR**: Use OpenAI or Ollama to extract text from images.
- **Google Document AI**: Leverage Google's powerful Document AI for OCR tasks.
- **Azure Document Intelligence**: Use Microsoft's enterprise OCR solution.
3. **Automatic Title & Tag Generation**
No more guesswork. Let the AI do the naming and categorizing. You can easily review suggestions and refine them if needed.
4. **Supports DeepSeek reasoning models in Ollama**
Greatly enhance accuracy by using a reasoning model like `deepseek-r1:8b`. The perfect tradeoff between privacy and performance! Of course, if you got enough GPUs or NPUs, a bigger model will enhance the experience.
5. **Automatic Correspondent Generation**
Automatically identify and generate correspondents from your documents, making it easier to track and organize your communications.
6. **Extensive Customization**
- **Prompt Templates**: Tweak your AI prompts to reflect your domain, style, or preference.
- **Tagging**: Decide how documents get tagged—manually, automatically, or via OCR-based flows.
7. **Simple Docker Deployment**
A few environment variables, and you're off! Compose it alongside paperless-ngx with minimal fuss.
8. **Unified Web UI**
- **Manual Review**: Approve or tweak AI's suggestions.
- **Auto Processing**: Focus only on edge cases while the rest is sorted for you.
---
- **Multiple LLM Support**: Choose between OpenAI and Ollama for generating document titles and tags.
- **Customizable Prompts**: Modify the prompt templates to suit your specific needs.
- **Easy Integration**: Works seamlessly with your existing paperless-ngx setup.
- **User-Friendly Interface**: Intuitive web interface for reviewing and applying suggested titles and tags.
- **Dockerized Deployment**: Simple setup using Docker and Docker Compose.
## Table of Contents
- [Key Highlights](#key-highlights)
- [Getting Started](#getting-started)
- [Prerequisites](#prerequisites)
- [Installation](#installation)
- [Docker Compose](#docker-compose)
- [Manual Setup](#manual-setup)
- [OCR Providers](#ocr-providers)
- [LLM-based OCR](#1-llm-based-ocr-default)
- [Azure Document Intelligence](#2-azure-document-intelligence)
- [Google Document AI](#3-google-document-ai)
- [Comparing OCR Providers](#comparing-ocr-providers)
- [Choosing the Right Provider](#choosing-the-right-provider)
- [Configuration](#configuration)
- [Environment Variables](#environment-variables)
- [Custom Prompt Templates](#custom-prompt-templates)
- [Prompt Templates Directory](#prompt-templates-directory)
- [Mounting the Prompts Directory](#mounting-the-prompts-directory)
- [Editing the Prompt Templates](#editing-the-prompt-templates)
- [Template Syntax and Variables](#template-syntax-and-variables)
- [OCR using AI](#llm-based-ocr-compare-for-yourself)
- [Usage](#usage)
- [Contributing](#contributing)
- [License](#license)
- [Star History](#star-history)
- [Disclaimer](#disclaimer)
---
- [paperless-gpt](#paperless-gpt)
- [Features](#features)
- [Table of Contents](#table-of-contents)
- [Getting Started](#getting-started)
- [Prerequisites](#prerequisites)
- [Installation](#installation)
- [Docker Compose](#docker-compose)
- [Manual Setup](#manual-setup)
- [Configuration](#configuration)
- [Environment Variables](#environment-variables)
- [Custom Prompt Templates](#custom-prompt-templates)
- [Prompt Templates Directory](#prompt-templates-directory)
- [Mounting the Prompts Directory](#mounting-the-prompts-directory)
- [Editing the Prompt Templates](#editing-the-prompt-templates)
- [Template Syntax and Variables](#template-syntax-and-variables)
- [Usage](#usage)
- [Contributing](#contributing)
- [License](#license)
## Getting Started
### Prerequisites
- [Docker][docker-install] installed.
- A running instance of [paperless-ngx][paperless-ngx].
- [Docker](https://www.docker.com/get-started) installed on your system.
- A running instance of [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx).
- Access to an LLM provider:
- **OpenAI**: An API key with models like `gpt-4o` or `gpt-3.5-turbo`.
- **Ollama**: A running Ollama server with models like `deepseek-r1:8b`.
- **OpenAI**: An API key with access to models like `gpt-4o` or `gpt-3.5-turbo`.
- **Ollama**: A running Ollama server with models like `llama2` installed.
### Installation
#### Docker Compose
Here's an example `docker-compose.yml` to spin up **paperless-gpt** alongside paperless-ngx:
The easiest way to get started is by using Docker Compose. Below is an example `docker-compose.yml` file to set up paperless-gpt alongside paperless-ngx.
```yaml
version: '3.7'
services:
paperless-ngx:
image: ghcr.io/paperless-ngx/paperless-ngx:latest
# ... (your existing paperless-ngx config)
# ... (your existing paperless-ngx configuration)
paperless-gpt:
image: icereed/paperless-gpt:latest
environment:
PAPERLESS_BASE_URL: "http://paperless-ngx:8000"
PAPERLESS_API_TOKEN: "your_paperless_api_token"
PAPERLESS_PUBLIC_URL: "http://paperless.mydomain.com" # Optional
MANUAL_TAG: "paperless-gpt" # Optional, default: paperless-gpt
AUTO_TAG: "paperless-gpt-auto" # Optional, default: paperless-gpt-auto
LLM_PROVIDER: "openai" # or 'ollama'
LLM_MODEL: "gpt-4o" # or 'deepseek-r1:8b'
# Optional, but recommended for Ollama
TOKEN_LIMIT: 1000
OPENAI_API_KEY: "your_openai_api_key"
# Optional - OPENAI_BASE_URL: 'https://litellm.yourinstallationof.it.com/v1'
LLM_LANGUAGE: "English" # Optional, default: English
# OCR Configuration - Choose one:
# Option 1: LLM-based OCR
OCR_PROVIDER: "llm" # Default OCR provider
VISION_LLM_PROVIDER: "ollama" # openai or ollama
VISION_LLM_MODEL: "minicpm-v" # minicpm-v (ollama) or gpt-4o (openai)
OLLAMA_HOST: "http://host.docker.internal:11434" # If using Ollama
# Option 2: Google Document AI
# OCR_PROVIDER: 'google_docai' # Use Google Document AI
# GOOGLE_PROJECT_ID: 'your-project' # Your GCP project ID
# GOOGLE_LOCATION: 'us' # Document AI region
# GOOGLE_PROCESSOR_ID: 'processor-id' # Your processor ID
# GOOGLE_APPLICATION_CREDENTIALS: '/app/credentials.json' # Path to service account key
# Option 3: Azure Document Intelligence
# OCR_PROVIDER: 'azure' # Use Azure Document Intelligence
# AZURE_DOCAI_ENDPOINT: 'your-endpoint' # Your Azure endpoint URL
# AZURE_DOCAI_KEY: 'your-key' # Your Azure API key
# AZURE_DOCAI_MODEL_ID: 'prebuilt-read' # Optional, defaults to prebuilt-read
# AZURE_DOCAI_TIMEOUT_SECONDS: '120' # Optional, defaults to 120 seconds
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto
OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit.
LOG_LEVEL: "info" # Optional: debug, warn, error
PAPERLESS_BASE_URL: 'http://paperless-ngx:8000'
PAPERLESS_API_TOKEN: 'your_paperless_api_token'
LLM_PROVIDER: 'openai' # or 'ollama'
LLM_MODEL: 'gpt-4o' # or 'llama2'
OPENAI_API_KEY: 'your_openai_api_key' # Required if using OpenAI
LLM_LANGUAGE: 'English' # Optional, default is 'English'
OLLAMA_HOST: 'http://host.docker.internal:11434' # If using Ollama
volumes:
- ./prompts:/app/prompts # Mount the prompts directory
# For Google Document AI:
- ${HOME}/.config/gcloud/application_default_credentials.json:/app/credentials.json
ports:
- "8080:8080"
- '8080:8080'
depends_on:
- paperless-ngx
```
**Pro Tip**: Replace placeholders with real values and read the logs if something looks off.
**Note:** Replace the placeholder values with your actual configuration.
#### Manual Setup
1. **Clone the Repository**
If you prefer to run the application manually:
1. **Clone the Repository:**
```bash
git clone https://github.com/icereed/paperless-gpt.git
cd paperless-gpt
```
2. **Create a `prompts` Directory**
2. **Create a `prompts` Directory:**
```bash
mkdir prompts
```
3. **Build the Docker Image**
3. **Build the Docker Image:**
```bash
docker build -t paperless-gpt .
```
4. **Run the Container**
4. **Run the Container:**
```bash
docker run -d \
-e PAPERLESS_BASE_URL='http://your_paperless_ngx_url' \
@ -176,450 +114,176 @@ services:
-e LLM_MODEL='gpt-4o' \
-e OPENAI_API_KEY='your_openai_api_key' \
-e LLM_LANGUAGE='English' \
-e VISION_LLM_PROVIDER='ollama' \
-e VISION_LLM_MODEL='minicpm-v' \
-e LOG_LEVEL='info' \
-v $(pwd)/prompts:/app/prompts \
-v $(pwd)/prompts:/app/prompts \ # Mount the prompts directory
-p 8080:8080 \
paperless-gpt
```
---
## OCR Providers
paperless-gpt supports three different OCR providers, each with unique strengths and capabilities:
### 1. LLM-based OCR (Default)
- **Key Features**:
- Uses vision-capable LLMs like gpt-4o or MiniCPM-V
- High accuracy with complex layouts and difficult scans
- Context-aware text recognition
- Self-correcting capabilities for OCR errors
- **Best For**:
- Complex or unusual document layouts
- Poor quality scans
- Documents with mixed languages
- **Configuration**:
```yaml
OCR_PROVIDER: "llm"
VISION_LLM_PROVIDER: "openai" # or "ollama"
VISION_LLM_MODEL: "gpt-4o" # or "minicpm-v"
```
### 2. Azure Document Intelligence
- **Key Features**:
- Enterprise-grade OCR solution
- Prebuilt models for common document types
- Layout preservation and table detection
- Fast processing speeds
- **Best For**:
- Business documents and forms
- High-volume processing
- Documents requiring layout analysis
- **Configuration**:
```yaml
OCR_PROVIDER: "azure"
AZURE_DOCAI_ENDPOINT: "https://your-endpoint.cognitiveservices.azure.com/"
AZURE_DOCAI_KEY: "your-key"
AZURE_DOCAI_MODEL_ID: "prebuilt-read" # optional
AZURE_DOCAI_TIMEOUT_SECONDS: "120" # optional
```
### 3. Google Document AI
- **Key Features**:
- Specialized document processors
- Strong form field detection
- Multi-language support
- High accuracy on structured documents
- **Best For**:
- Forms and structured documents
- Documents with tables
- Multi-language documents
- **Configuration**:
```yaml
OCR_PROVIDER: "google_docai"
GOOGLE_PROJECT_ID: "your-project"
GOOGLE_LOCATION: "us"
GOOGLE_PROCESSOR_ID: "processor-id"
```
## Configuration
### Environment Variables
# **Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
| Variable | Description | Required |
|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
| `PAPERLESS_BASE_URL` | The base URL of your paperless-ngx instance (e.g., `http://paperless-ngx:8000`). | Yes |
| `PAPERLESS_API_TOKEN` | API token for accessing paperless-ngx. You can generate one in the paperless-ngx admin interface. | Yes |
| `LLM_PROVIDER` | The LLM provider to use (`openai` or `ollama`). | Yes |
| `LLM_MODEL` | The model name to use (e.g., `gpt-4o`, `gpt-3.5-turbo`, `llama2`). | Yes |
| `OPENAI_API_KEY` | Your OpenAI API key. Required if using OpenAI as the LLM provider. | Cond. |
| `LLM_LANGUAGE` | The likely language of your documents (e.g., `English`, `German`). Default is `English`. | No |
| `OLLAMA_HOST` | The URL of the Ollama server (e.g., `http://host.docker.internal:11434`). Useful if using Ollama. Default is `http://127.0.0.1:11434`. | No |
| Variable | Description | Required | Default |
| -------------------------------- | ---------------------------------------------------------------------------------------------------------------- | -------- | ---------------------- |
| `PAPERLESS_BASE_URL` | URL of your paperless-ngx instance (e.g. `http://paperless-ngx:8000`). | Yes | |
| `PAPERLESS_API_TOKEN` | API token for paperless-ngx. Generate one in paperless-ngx admin. | Yes | |
| `PAPERLESS_PUBLIC_URL` | Public URL for Paperless (if different from `PAPERLESS_BASE_URL`). | No | |
| `MANUAL_TAG` | Tag for manual processing. | No | paperless-gpt |
| `AUTO_TAG` | Tag for auto processing. | No | paperless-gpt-auto |
| `LLM_PROVIDER` | AI backend (`openai` or `ollama`). | Yes | |
| `LLM_MODEL` | AI model name, e.g. `gpt-4o`, `gpt-3.5-turbo`, `deepseek-r1:8b`. | Yes | |
| `OPENAI_API_KEY` | OpenAI API key (required if using OpenAI). | Cond. | |
| `OPENAI_BASE_URL` | OpenAI base URL (optional, if using a custom OpenAI compatible service like LiteLLM). | No | |
| `LLM_LANGUAGE` | Likely language for documents (e.g. `English`). | No | English |
| `OLLAMA_HOST` | Ollama server URL (e.g. `http://host.docker.internal:11434`). | No | |
| `OCR_PROVIDER` | OCR provider to use (`llm`, `azure`, or `google_docai`). | No | llm |
| `VISION_LLM_PROVIDER` | AI backend for LLM OCR (`openai` or `ollama`). Required if OCR_PROVIDER is `llm`. | Cond. | |
| `VISION_LLM_MODEL` | Model name for LLM OCR (e.g. `minicpm-v`). Required if OCR_PROVIDER is `llm`. | Cond. | |
| `AZURE_DOCAI_ENDPOINT` | Azure Document Intelligence endpoint. Required if OCR_PROVIDER is `azure`. | Cond. | |
| `AZURE_DOCAI_KEY` | Azure Document Intelligence API key. Required if OCR_PROVIDER is `azure`. | Cond. | |
| `AZURE_DOCAI_MODEL_ID` | Azure Document Intelligence model ID. Optional if using `azure` provider. | No | prebuilt-read |
| `AZURE_DOCAI_TIMEOUT_SECONDS` | Azure Document Intelligence timeout in seconds. | No | 120 |
| `GOOGLE_PROJECT_ID` | Google Cloud project ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
| `GOOGLE_LOCATION` | Google Cloud region (e.g. `us`, `eu`). Required if OCR_PROVIDER is `google_docai`. | Cond. | |
| `GOOGLE_PROCESSOR_ID` | Document AI processor ID. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
| `GOOGLE_APPLICATION_CREDENTIALS` | Path to the mounted Google service account key. Required if OCR_PROVIDER is `google_docai`. | Cond. | |
| `AUTO_OCR_TAG` | Tag for automatically processing docs with OCR. | No | paperless-gpt-ocr-auto |
| `LOG_LEVEL` | Application log level (`info`, `debug`, `warn`, `error`). | No | info |
| `LISTEN_INTERFACE` | Network interface to listen on. | No | 8080 |
| `AUTO_GENERATE_TITLE` | Generate titles automatically if `paperless-gpt-auto` is used. | No | true |
| `AUTO_GENERATE_TAGS` | Generate tags automatically if `paperless-gpt-auto` is used. | No | true |
| `AUTO_GENERATE_CORRESPONDENTS` | Generate correspondents automatically if `paperless-gpt-auto` is used. | No | true |
| `OCR_LIMIT_PAGES` | Limit the number of pages for OCR. Set to `0` for no limit. | No | 5 |
| `TOKEN_LIMIT` | Maximum tokens allowed for prompts/content. Set to `0` to disable limit. Useful for smaller LLMs. | No | |
| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`. | No | |
**Note:** When using Ollama, ensure that the Ollama server is running and accessible from the paperless-gpt container.
### Custom Prompt Templates
paperless-gpt's flexible **prompt templates** let you shape how AI responds:
You can customize the prompt templates used by paperless-gpt to generate titles and tags. By default, the application uses built-in templates, but you can modify them by editing the template files.
1. **`title_prompt.tmpl`**: For document titles.
2. **`tag_prompt.tmpl`**: For tagging logic.
3. **`ocr_prompt.tmpl`**: For LLM OCR.
4. **`correspondent_prompt.tmpl`**: For correspondent identification.
#### Prompt Templates Directory
Mount them into your container via:
The prompt templates are stored in the `prompts` directory inside the application. The two main template files are:
- `title_prompt.tmpl`: Template used for generating document titles.
- `tag_prompt.tmpl`: Template used for generating document tags.
#### Mounting the Prompts Directory
To modify the prompt templates, you need to mount a local `prompts` directory into the container.
**Docker Compose Example:**
```yaml
volumes:
- ./prompts:/app/prompts
services:
paperless-gpt:
image: icereed/paperless-gpt:latest
# ... (other configurations)
volumes:
- ./prompts:/app/prompts # Mount the prompts directory
```
Then tweak at will—**paperless-gpt** reloads them automatically on startup!
**Docker Run Command Example:**
#### Template Variables
```bash
docker run -d \
# ... (other configurations)
-v $(pwd)/prompts:/app/prompts \
paperless-gpt
```
Each template has access to specific variables:
#### Editing the Prompt Templates
**title_prompt.tmpl**:
- `{{.Language}}` - Target language (e.g., "English")
- `{{.Content}}` - Document content text
- `{{.Title}}` - Original document title
1. **Start the Container:**
**tag_prompt.tmpl**:
- `{{.Language}}` - Target language
- `{{.AvailableTags}}` - List of existing tags in paperless-ngx
- `{{.OriginalTags}}` - Document's current tags
- `{{.Title}}` - Document title
- `{{.Content}}` - Document content text
When you first start the container with the `prompts` directory mounted, it will automatically create the default template files in your local `prompts` directory if they do not exist.
**ocr_prompt.tmpl**:
- `{{.Language}}` - Target language
2. **Edit the Template Files:**
**correspondent_prompt.tmpl**:
- `{{.Language}}` - Target language
- `{{.AvailableCorrespondents}}` - List of existing correspondents
- `{{.BlackList}}` - List of blacklisted correspondent names
- `{{.Title}}` - Document title
- `{{.Content}}` - Document content text
- Open `prompts/title_prompt.tmpl` and `prompts/tag_prompt.tmpl` with your favorite text editor.
- Modify the templates using Go's `text/template` syntax.
- Save the changes.
The templates use Go's text/template syntax. paperless-gpt automatically reloads template changes on startup.
3. **Restart the Container (if necessary):**
---
The application automatically reloads the templates when it starts. If the container is already running, you may need to restart it to apply the changes.
#### Template Syntax and Variables
The templates use Go's `text/template` syntax and have access to the following variables:
- **For `title_prompt.tmpl`:**
- `{{.Language}}`: The language specified in `LLM_LANGUAGE` (default is `English`).
- `{{.Content}}`: The content of the document.
- **For `tag_prompt.tmpl`:**
- `{{.Language}}`: The language specified in `LLM_LANGUAGE`.
- `{{.AvailableTags}}`: A list (array) of available tags from paperless-ngx.
- `{{.Title}}`: The suggested title for the document.
- `{{.Content}}`: The content of the document.
**Example `title_prompt.tmpl`:**
```text
I will provide you with the content of a document that has been partially read by OCR (so it may contain errors).
Your task is to find a suitable document title that I can use as the title in the paperless-ngx program.
Respond only with the title, without any additional information. The content is likely in {{.Language}}.
Be sure to add one fitting emoji at the beginning of the title to make it more visually appealing.
Content:
{{.Content}}
```
**Example `tag_prompt.tmpl`:**
```text
I will provide you with the content and the title of a document. Your task is to select appropriate tags for the document from the list of available tags I will provide. Only select tags from the provided list. Respond only with the selected tags as a comma-separated list, without any additional information. The content is likely in {{.Language}}.
Available Tags:
{{.AvailableTags | join ","}}
Title:
{{.Title}}
Content:
{{.Content}}
Please concisely select the {{.Language}} tags from the list above that best describe the document.
Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable.
```
**Note:** Advanced users can utilize additional functions from the [Sprig](http://masterminds.github.io/sprig/) template library, as it is included in the application.
## Usage
1. **Tag Documents**
1. **Tag Documents in paperless-ngx:**
- Add `paperless-gpt` tag to documents for manual processing
- Add `paperless-gpt-auto` for automatic processing
- Add `paperless-gpt-ocr-auto` for automatic OCR processing
- Add the tag `paperless-gpt` to documents you want to process. This tag is configurable via the `tagToFilter` variable in the code (default is `paperless-gpt`).
2. **Visit Web UI**
2. **Access the paperless-gpt Interface:**
- Go to `http://localhost:8080` (or your host) in your browser
- Review documents tagged for processing
- Open your browser and navigate to `http://localhost:8080`.
3. **Generate & Apply Suggestions**
3. **Process Documents:**
- Click "Generate Suggestions" to see AI-proposed titles/tags/correspondents
- Review and approve or edit suggestions
- Click "Apply" to save changes to paperless-ngx
- Click on **"Generate Suggestions"** to let the LLM generate title suggestions based on the document content.
4. **OCR Processing**
- Tag documents with appropriate OCR tag to process them
- Monitor progress in the Web UI
- Review results and apply changes
---
4. **Review and Apply Titles and Tags:**
## LLM-Based OCR: Compare for Yourself
<details>
<summary>Click to expand the vanilla OCR vs. AI-powered OCR comparison</summary>
### Example 1
**Image**:
![Image](demo/ocr-example1.jpg)
**Vanilla Paperless-ngx OCR**:
```
La Grande Recre
Gentre Gommercial 1'Esplanade
1349 LOLNAIN LA NEWWE
TA BERBOGAAL Tel =. 010 45,96 12
Ticket 1440112 03/11/2006 a 13597:
4007176614518. DINOS. TYRAMNESA
TOTAET.T.LES
ReslE par Lask-Euron
Rencu en Cash Euro
V.14.6 -Hotgese = VALERTE
TICKET A-GONGERVER PORR TONT. EEHANGE
HERET ET A BIENTOT
```
**LLM-Powered OCR (OpenAI gpt-4o)**:
```
La Grande Récré
Centre Commercial l'Esplanade
1348 LOUVAIN LA NEUVE
TVA 860826401 Tel : 010 45 95 12
Ticket 14421 le 03/11/2006 à 15:27:18
4007176614518 DINOS TYRANNOSA 14.90
TOTAL T.T.C. 14.90
Réglé par Cash Euro 50.00
Rendu en Cash Euro 35.10
V.14.6 Hôtesse : VALERIE
TICKET A CONSERVER POUR TOUT ECHANGE
MERCI ET A BIENTOT
```
---
### Example 2
**Image**:
![Image](demo/ocr-example2.jpg)
**Vanilla Paperless-ngx OCR**:
```
Invoice Number: 1-996-84199
Fed: Invoica Date: Sep01, 2014
Accaunt Number: 1334-8037-4
Page: 1012
Fod£x Tax ID 71.0427007
IRISINC
SHARON ANDERSON
4731 W ATLANTIC AVE STE BI
DELRAY BEACH FL 33445-3897 a
Invoice Questions?
Bing, Account Shipping Address: Contact FedEx Reı
ISINC
4731 W ATLANTIC AVE Phone: (800) 622-1147 M-F 7-6 (CST)
DELRAY BEACH FL 33445-3897 US Fax: (800) 548-3020
Internet: www.fedex.com
Invoice Summary Sep 01, 2014
FodEx Ground Services
Other Charges 11.00
Total Charges 11.00 Da £
>
polo) Fz// /G
TOTAL THIS INVOICE .... usps 11.00 P 2/1 f
The only charges accrued for this period is the Weekly Service Charge.
The Fedix Ground aceounts teferencedin his involce have been transteired and assigned 10, are owned by,andare payable to FedEx Express:
To onsurs propor credit, plasa raturn this portion wirh your payment 10 FodEx
Please do not staple or fold. Ploase make your chack payablı to FedEx.
[TI For change ol address, hc har and camphat lrm or never ide
Remittance Advice
Your payment is due by Sep 16, 2004
Number Number Dus
1334803719968 41993200000110071
AT 01 0391292 468448196 A**aDGT
IRISINC Illallun elalalssollallansdHilalellund
SHARON ANDERSON
4731 W ATLANTIC AVE STEBI FedEx
DELRAY BEACH FL 334453897 PO. Box 94516
PALATINE IL 60094-4515
```
**LLM-Powered OCR (OpenAI gpt-4o)**:
```
FedEx. Invoice Number: 1-996-84199
Invoice Date: Sep 01, 2014
Account Number: 1334-8037-4
Page: 1 of 2
FedEx Tax ID: 71-0427007
I R I S INC
SHARON ANDERSON
4731 W ATLANTIC AVE STE B1
DELRAY BEACH FL 33445-3897
Invoice Questions?
Billing Account Shipping Address: Contact FedEx Revenue Services
I R I S INC Phone: (800) 622-1147 M-F 7-6 (CST)
4731 W ATLANTIC AVE Fax: (800) 548-3020
DELRAY BEACH FL 33445-3897 US Internet: www.fedex.com
Invoice Summary Sep 01, 2014
FedEx Ground Services
Other Charges 11.00
Total Charges .......................................................... USD $ 11.00
TOTAL THIS INVOICE .............................................. USD $ 11.00
The only charges accrued for this period is the Weekly Service Charge.
RECEIVED
SEP _ 8 REC'D
BY: _
posted 9/21/14
The FedEx Ground accounts referenced in this invoice have been transferred and assigned to, are owned by, and are payable to FedEx Express.
To ensure proper credit, please return this portion with your payment to FedEx.
Please do not staple or fold. Please make your check payable to FedEx.
❑ For change of address, check here and complete form on reverse side.
Remittance Advice
Your payment is due by Sep 16, 2004
Invoice
Number
1-996-84199
Account
Number
1334-8037-4
Amount
Due
USD $ 11.00
133480371996841993200000110071
AT 01 031292 468448196 A**3DGT
I R I S INC
SHARON ANDERSON
4731 W ATLANTIC AVE STE B1
DELRAY BEACH FL 33445-3897
FedEx
P.O. Box 94515
```
---
</details>
**Why Does It Matter?**
- Traditional OCR often jumbles text from complex or low-quality scans.
- Large Language Models interpret context and correct likely errors, producing results that are more precise and readable.
- You can integrate these cleaned-up texts into your **paperless-ngx** pipeline for better tagging, searching, and archiving.
### How It Works
- **Vanilla OCR** typically uses classical methods or Tesseract-like engines to extract text, which can result in garbled outputs for complex fonts or poor-quality scans.
- **LLM-Powered OCR** uses your chosen AI backend—OpenAI or Ollama—to interpret the images text in a more context-aware manner. This leads to fewer errors and more coherent text.
---
## Troubleshooting
### Working with Local LLMs
When using local LLMs (like those through Ollama), you might need to adjust certain settings to optimize performance:
#### Token Management
- Use `TOKEN_LIMIT` environment variable to control the maximum number of tokens sent to the LLM
- Smaller models might truncate content unexpectedly if given too much text
- Start with a conservative limit (e.g., 1000 tokens) and adjust based on your model's capabilities
- Set to `0` to disable the limit (use with caution)
Example configuration for smaller models:
```yaml
environment:
TOKEN_LIMIT: "2000" # Adjust based on your model's context window
LLM_PROVIDER: "ollama"
LLM_MODEL: "deepseek-r1:8b" # Or other local model
```
Common issues and solutions:
- If you see truncated or incomplete responses, try lowering the `TOKEN_LIMIT`
- If processing is too limited, gradually increase the limit while monitoring performance
- For models with larger context windows, you can increase the limit or disable it entirely
- Review the suggested titles. You can edit them if necessary.
- Click on **"Apply Suggestions"** to update the document titles in paperless-ngx.
## Contributing
**Pull requests** and **issues** are welcome!
Contributions are welcome! Please read the [contributing guidelines](CONTRIBUTING.md) before submitting a pull request.
1. Fork the repo
2. Create a branch (`feature/my-awesome-update`)
3. Commit changes (`git commit -m "Improve X"`)
4. Open a PR
1. **Fork the Repository**
Check out our [contributing guidelines](CONTRIBUTING.md) for details.
2. **Create a Feature Branch**
---
```bash
git checkout -b feature/my-new-feature
```
3. **Commit Your Changes**
```bash
git commit -am 'Add some feature'
```
4. **Push to the Branch**
```bash
git push origin feature/my-new-feature
```
5. **Create a Pull Request**
## License
paperless-gpt is licensed under the [MIT License](LICENSE). Feel free to adapt and share!
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
---
## Star History
[![Star History Chart](https://api.star-history.com/svg?repos=icereed/paperless-gpt&type=Date)](https://star-history.com/#icereed/paperless-gpt&Date)
---
## Disclaimer
This project is **not** officially affiliated with [paperless-ngx][paperless-ngx]. Use at your own risk.
---
**paperless-gpt**: The **LLM-based** companion your doc management has been waiting for. Enjoy effortless, intelligent document titles, tags, and next-level OCR.
[paperless-ngx]: https://github.com/paperless-ngx/paperless-ngx
[docker-install]: https://docs.docker.com/get-docker/
**Disclaimer:** This project is not affiliated with the official paperless-ngx project. Use at your own discretion.

View file

@ -1,352 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"net/http"
"os"
"strconv"
"text/template"
"time"
"github.com/gin-gonic/gin"
)
// getPromptsHandler handles the GET /api/prompts endpoint
func getPromptsHandler(c *gin.Context) {
templateMutex.RLock()
defer templateMutex.RUnlock()
// Read the templates from files or use default content
titleTemplateContent, err := os.ReadFile("prompts/title_prompt.tmpl")
if err != nil {
titleTemplateContent = []byte(defaultTitleTemplate)
}
tagTemplateContent, err := os.ReadFile("prompts/tag_prompt.tmpl")
if err != nil {
tagTemplateContent = []byte(defaultTagTemplate)
}
c.JSON(http.StatusOK, gin.H{
"title_template": string(titleTemplateContent),
"tag_template": string(tagTemplateContent),
})
}
// updatePromptsHandler handles the POST /api/prompts endpoint
func updatePromptsHandler(c *gin.Context) {
var req struct {
TitleTemplate string `json:"title_template"`
TagTemplate string `json:"tag_template"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request payload"})
return
}
templateMutex.Lock()
defer templateMutex.Unlock()
// Update title template
if req.TitleTemplate != "" {
t, err := template.New("title").Parse(req.TitleTemplate)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid title template: %v", err)})
return
}
titleTemplate = t
err = os.WriteFile("prompts/title_prompt.tmpl", []byte(req.TitleTemplate), 0644)
if err != nil {
log.Errorf("Failed to write title_prompt.tmpl: %v", err)
}
}
// Update tag template
if req.TagTemplate != "" {
t, err := template.New("tag").Parse(req.TagTemplate)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid tag template: %v", err)})
return
}
tagTemplate = t
err = os.WriteFile("prompts/tag_prompt.tmpl", []byte(req.TagTemplate), 0644)
if err != nil {
log.Errorf("Failed to write tag_prompt.tmpl: %v", err)
}
}
c.Status(http.StatusOK)
}
// getAllTagsHandler handles the GET /api/tags endpoint
func (app *App) getAllTagsHandler(c *gin.Context) {
ctx := c.Request.Context()
tags, err := app.Client.GetAllTags(ctx)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error fetching tags: %v", err)})
log.Errorf("Error fetching tags: %v", err)
return
}
c.JSON(http.StatusOK, tags)
}
// documentsHandler handles the GET /api/documents endpoint
func (app *App) documentsHandler(c *gin.Context) {
ctx := c.Request.Context()
documents, err := app.Client.GetDocumentsByTags(ctx, []string{manualTag}, 25)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error fetching documents: %v", err)})
log.Errorf("Error fetching documents: %v", err)
return
}
c.JSON(http.StatusOK, documents)
}
// generateSuggestionsHandler handles the POST /api/generate-suggestions endpoint
func (app *App) generateSuggestionsHandler(c *gin.Context) {
ctx := c.Request.Context()
var suggestionRequest GenerateSuggestionsRequest
if err := c.ShouldBindJSON(&suggestionRequest); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid request payload: %v", err)})
log.Errorf("Invalid request payload: %v", err)
return
}
results, err := app.generateDocumentSuggestions(ctx, suggestionRequest, log.WithContext(ctx))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error processing documents: %v", err)})
log.Errorf("Error processing documents: %v", err)
return
}
c.JSON(http.StatusOK, results)
}
// updateDocumentsHandler handles the PATCH /api/update-documents endpoint
func (app *App) updateDocumentsHandler(c *gin.Context) {
ctx := c.Request.Context()
var documents []DocumentSuggestion
if err := c.ShouldBindJSON(&documents); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid request payload: %v", err)})
log.Errorf("Invalid request payload: %v", err)
return
}
err := app.Client.UpdateDocuments(ctx, documents, app.Database, false)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error updating documents: %v", err)})
log.Errorf("Error updating documents: %v", err)
return
}
c.Status(http.StatusOK)
}
func (app *App) submitOCRJobHandler(c *gin.Context) {
documentIDStr := c.Param("id")
documentID, err := strconv.Atoi(documentIDStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid document ID"})
return
}
// Create a new job
jobID := generateJobID() // Implement a function to generate unique job IDs
job := &Job{
ID: jobID,
DocumentID: documentID,
Status: "pending",
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
// Add job to store and queue
jobStore.addJob(job)
jobQueue <- job
// Return the job ID to the client
c.JSON(http.StatusAccepted, gin.H{"job_id": jobID})
}
func (app *App) getJobStatusHandler(c *gin.Context) {
jobID := c.Param("job_id")
job, exists := jobStore.getJob(jobID)
if !exists {
c.JSON(http.StatusNotFound, gin.H{"error": "Job not found"})
return
}
response := gin.H{
"job_id": job.ID,
"status": job.Status,
"created_at": job.CreatedAt,
"updated_at": job.UpdatedAt,
"pages_done": job.PagesDone,
}
if job.Status == "completed" {
response["result"] = job.Result
} else if job.Status == "failed" {
response["error"] = job.Result
}
c.JSON(http.StatusOK, response)
}
func (app *App) getAllJobsHandler(c *gin.Context) {
jobs := jobStore.GetAllJobs()
jobList := make([]gin.H, 0, len(jobs))
for _, job := range jobs {
response := gin.H{
"job_id": job.ID,
"status": job.Status,
"created_at": job.CreatedAt,
"updated_at": job.UpdatedAt,
"pages_done": job.PagesDone,
}
if job.Status == "completed" {
response["result"] = job.Result
} else if job.Status == "failed" {
response["error"] = job.Result
}
jobList = append(jobList, response)
}
c.JSON(http.StatusOK, jobList)
}
// getDocumentHandler handles the retrieval of a document by its ID
func (app *App) getDocumentHandler() gin.HandlerFunc {
return func(c *gin.Context) {
id := c.Param("id")
parsedID, err := strconv.Atoi(id)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid document ID"})
return
}
document, err := app.Client.GetDocument(c, parsedID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
log.Errorf("Error fetching document: %v", err)
return
}
c.JSON(http.StatusOK, document)
}
}
// Section for local-db actions
func (app *App) getModificationHistoryHandler(c *gin.Context) {
// Parse pagination parameters
page := 1
pageSize := 20
if p, err := strconv.Atoi(c.DefaultQuery("page", "1")); err == nil && p > 0 {
page = p
}
if ps, err := strconv.Atoi(c.DefaultQuery("pageSize", "20")); err == nil && ps > 0 && ps <= 100 {
pageSize = ps
}
// Get paginated modifications and total count
modifications, total, err := GetPaginatedModifications(app.Database, page, pageSize)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve modification history"})
log.Errorf("Failed to retrieve modification history: %v", err)
return
}
totalPages := (int(total) + pageSize - 1) / pageSize
c.JSON(http.StatusOK, gin.H{
"items": modifications,
"totalItems": total,
"totalPages": totalPages,
"currentPage": page,
"pageSize": pageSize,
})
}
func (app *App) undoModificationHandler(c *gin.Context) {
id := c.Param("id")
modID, err := strconv.Atoi(id)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid modification ID"})
log.Errorf("Invalid modification ID: %v", err)
return
}
modification, err := GetModification(app.Database, uint(modID))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve modification"})
log.Errorf("Failed to retrieve modification: %v", err)
return
}
if modification.Undone {
c.JSON(http.StatusBadRequest, gin.H{"error": "Modification has already been undone"})
log.Errorf("Modification has already been undone: %v", id)
return
}
// Ok, we're actually doing the update:
ctx := c.Request.Context()
// Make the document suggestions for UpdateDocuments
var suggestion DocumentSuggestion
suggestion.ID = int(modification.DocumentID)
suggestion.OriginalDocument, err = app.Client.GetDocument(ctx, int(modification.DocumentID))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to retrieve original document"})
log.Errorf("Failed to retrieve original document: %v", err)
return
}
switch modification.ModField {
case "title":
suggestion.SuggestedTitle = modification.PreviousValue
case "tags":
var tags []string
err := json.Unmarshal([]byte(modification.PreviousValue), &tags)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to unmarshal previous tags"})
log.Errorf("Failed to unmarshal previous tags: %v", err)
return
}
suggestion.SuggestedTags = tags
case "content":
suggestion.SuggestedContent = modification.PreviousValue
default:
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid modification field"})
log.Errorf("Invalid modification field: %v", modification.ModField)
return
}
// Update the document
err = app.Client.UpdateDocuments(ctx, []DocumentSuggestion{suggestion}, app.Database, true)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update document"})
log.Errorf("Failed to update document: %v", err)
return
}
// Successful, so set modification as undone
err = SetModificationUndone(app.Database, modification)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to mark modification as undone"})
return
}
// Else all was ok
c.Status(http.StatusOK)
}

View file

@ -1,423 +0,0 @@
package main
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"image"
"slices"
"strings"
"sync"
_ "image/jpeg"
"github.com/sirupsen/logrus"
"github.com/tmc/langchaingo/llms"
)
// getSuggestedCorrespondent generates a suggested correspondent for a document using the LLM
func (app *App) getSuggestedCorrespondent(ctx context.Context, content string, suggestedTitle string, availableCorrespondents []string, correspondentBlackList []string) (string, error) {
likelyLanguage := getLikelyLanguage()
templateMutex.RLock()
defer templateMutex.RUnlock()
// Get available tokens for content
templateData := map[string]interface{}{
"Language": likelyLanguage,
"AvailableCorrespondents": availableCorrespondents,
"BlackList": correspondentBlackList,
"Title": suggestedTitle,
}
availableTokens, err := getAvailableTokensForContent(correspondentTemplate, templateData)
if err != nil {
return "", fmt.Errorf("error calculating available tokens: %v", err)
}
// Truncate content if needed
truncatedContent, err := truncateContentByTokens(content, availableTokens)
if err != nil {
return "", fmt.Errorf("error truncating content: %v", err)
}
// Execute template with truncated content
var promptBuffer bytes.Buffer
templateData["Content"] = truncatedContent
err = correspondentTemplate.Execute(&promptBuffer, templateData)
if err != nil {
return "", fmt.Errorf("error executing correspondent template: %v", err)
}
prompt := promptBuffer.String()
log.Debugf("Correspondent suggestion prompt: %s", prompt)
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
{
Parts: []llms.ContentPart{
llms.TextContent{
Text: prompt,
},
},
Role: llms.ChatMessageTypeHuman,
},
})
if err != nil {
return "", fmt.Errorf("error getting response from LLM: %v", err)
}
response := stripReasoning(strings.TrimSpace(completion.Choices[0].Content))
return response, nil
}
// getSuggestedTags generates suggested tags for a document using the LLM
func (app *App) getSuggestedTags(
ctx context.Context,
content string,
suggestedTitle string,
availableTags []string,
originalTags []string,
logger *logrus.Entry) ([]string, error) {
likelyLanguage := getLikelyLanguage()
templateMutex.RLock()
defer templateMutex.RUnlock()
// Remove all paperless-gpt related tags from available tags
availableTags = removeTagFromList(availableTags, manualTag)
availableTags = removeTagFromList(availableTags, autoTag)
availableTags = removeTagFromList(availableTags, autoOcrTag)
// Get available tokens for content
templateData := map[string]interface{}{
"Language": likelyLanguage,
"AvailableTags": availableTags,
"OriginalTags": originalTags,
"Title": suggestedTitle,
}
availableTokens, err := getAvailableTokensForContent(tagTemplate, templateData)
if err != nil {
logger.Errorf("Error calculating available tokens: %v", err)
return nil, fmt.Errorf("error calculating available tokens: %v", err)
}
// Truncate content if needed
truncatedContent, err := truncateContentByTokens(content, availableTokens)
if err != nil {
logger.Errorf("Error truncating content: %v", err)
return nil, fmt.Errorf("error truncating content: %v", err)
}
// Execute template with truncated content
var promptBuffer bytes.Buffer
templateData["Content"] = truncatedContent
err = tagTemplate.Execute(&promptBuffer, templateData)
if err != nil {
logger.Errorf("Error executing tag template: %v", err)
return nil, fmt.Errorf("error executing tag template: %v", err)
}
prompt := promptBuffer.String()
logger.Debugf("Tag suggestion prompt: %s", prompt)
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
{
Parts: []llms.ContentPart{
llms.TextContent{
Text: prompt,
},
},
Role: llms.ChatMessageTypeHuman,
},
})
if err != nil {
logger.Errorf("Error getting response from LLM: %v", err)
return nil, fmt.Errorf("error getting response from LLM: %v", err)
}
response := stripReasoning(completion.Choices[0].Content)
suggestedTags := strings.Split(response, ",")
for i, tag := range suggestedTags {
suggestedTags[i] = strings.TrimSpace(tag)
}
// append the original tags to the suggested tags
suggestedTags = append(suggestedTags, originalTags...)
// Remove duplicates
slices.Sort(suggestedTags)
suggestedTags = slices.Compact(suggestedTags)
// Filter out tags that are not in the available tags list
filteredTags := []string{}
for _, tag := range suggestedTags {
for _, availableTag := range availableTags {
if strings.EqualFold(tag, availableTag) {
filteredTags = append(filteredTags, availableTag)
break
}
}
}
return filteredTags, nil
}
func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte, logger *logrus.Entry) (string, error) {
templateMutex.RLock()
defer templateMutex.RUnlock()
likelyLanguage := getLikelyLanguage()
var promptBuffer bytes.Buffer
err := ocrTemplate.Execute(&promptBuffer, map[string]interface{}{
"Language": likelyLanguage,
})
if err != nil {
return "", fmt.Errorf("error executing tag template: %v", err)
}
prompt := promptBuffer.String()
// Log the image dimensions
img, _, err := image.Decode(bytes.NewReader(jpegBytes))
if err != nil {
return "", fmt.Errorf("error decoding image: %v", err)
}
bounds := img.Bounds()
logger.Debugf("Image dimensions: %dx%d", bounds.Dx(), bounds.Dy())
// If not OpenAI then use binary part for image, otherwise, use the ImageURL part with encoding from https://platform.openai.com/docs/guides/vision
var parts []llms.ContentPart
if strings.ToLower(visionLlmProvider) != "openai" {
// Log image size in kilobytes
logger.Debugf("Image size: %d KB", len(jpegBytes)/1024)
parts = []llms.ContentPart{
llms.BinaryPart("image/jpeg", jpegBytes),
llms.TextPart(prompt),
}
} else {
base64Image := base64.StdEncoding.EncodeToString(jpegBytes)
// Log image size in kilobytes
logger.Debugf("Image size: %d KB", len(base64Image)/1024)
parts = []llms.ContentPart{
llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)),
llms.TextPart(prompt),
}
}
// Convert the image to text
completion, err := app.VisionLLM.GenerateContent(ctx, []llms.MessageContent{
{
Parts: parts,
Role: llms.ChatMessageTypeHuman,
},
})
if err != nil {
return "", fmt.Errorf("error getting response from LLM: %v", err)
}
result := completion.Choices[0].Content
fmt.Println(result)
return result, nil
}
// getSuggestedTitle generates a suggested title for a document using the LLM
func (app *App) getSuggestedTitle(ctx context.Context, content string, originalTitle string, logger *logrus.Entry) (string, error) {
likelyLanguage := getLikelyLanguage()
templateMutex.RLock()
defer templateMutex.RUnlock()
// Get available tokens for content
templateData := map[string]interface{}{
"Language": likelyLanguage,
"Content": content,
"Title": originalTitle,
}
availableTokens, err := getAvailableTokensForContent(titleTemplate, templateData)
if err != nil {
logger.Errorf("Error calculating available tokens: %v", err)
return "", fmt.Errorf("error calculating available tokens: %v", err)
}
// Truncate content if needed
truncatedContent, err := truncateContentByTokens(content, availableTokens)
if err != nil {
logger.Errorf("Error truncating content: %v", err)
return "", fmt.Errorf("error truncating content: %v", err)
}
// Execute template with truncated content
var promptBuffer bytes.Buffer
templateData["Content"] = truncatedContent
err = titleTemplate.Execute(&promptBuffer, templateData)
if err != nil {
return "", fmt.Errorf("error executing title template: %v", err)
}
prompt := promptBuffer.String()
logger.Debugf("Title suggestion prompt: %s", prompt)
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
{
Parts: []llms.ContentPart{
llms.TextContent{
Text: prompt,
},
},
Role: llms.ChatMessageTypeHuman,
},
})
if err != nil {
return "", fmt.Errorf("error getting response from LLM: %v", err)
}
result := stripReasoning(completion.Choices[0].Content)
return strings.TrimSpace(strings.Trim(result, "\"")), nil
}
// generateDocumentSuggestions generates suggestions for a set of documents
func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionRequest GenerateSuggestionsRequest, logger *logrus.Entry) ([]DocumentSuggestion, error) {
// Fetch all available tags from paperless-ngx
availableTagsMap, err := app.Client.GetAllTags(ctx)
if err != nil {
return nil, fmt.Errorf("failed to fetch available tags: %v", err)
}
// Prepare a list of tag names
availableTagNames := make([]string, 0, len(availableTagsMap))
for tagName := range availableTagsMap {
if tagName == manualTag {
continue
}
availableTagNames = append(availableTagNames, tagName)
}
// Prepare a list of document correspodents
availableCorrespondentsMap, err := app.Client.GetAllCorrespondents(ctx)
if err != nil {
return nil, fmt.Errorf("failed to fetch available correspondents: %v", err)
}
// Prepare a list of correspondent names
availableCorrespondentNames := make([]string, 0, len(availableCorrespondentsMap))
for correspondentName := range availableCorrespondentsMap {
availableCorrespondentNames = append(availableCorrespondentNames, correspondentName)
}
documents := suggestionRequest.Documents
documentSuggestions := []DocumentSuggestion{}
var wg sync.WaitGroup
var mu sync.Mutex
errorsList := make([]error, 0)
for i := range documents {
wg.Add(1)
go func(doc Document) {
defer wg.Done()
documentID := doc.ID
docLogger := documentLogger(documentID)
docLogger.Printf("Processing Document ID %d...", documentID)
content := doc.Content
suggestedTitle := doc.Title
var suggestedTags []string
var suggestedCorrespondent string
if suggestionRequest.GenerateTitles {
suggestedTitle, err = app.getSuggestedTitle(ctx, content, suggestedTitle, docLogger)
if err != nil {
mu.Lock()
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
mu.Unlock()
docLogger.Errorf("Error processing document %d: %v", documentID, err)
return
}
}
if suggestionRequest.GenerateTags {
suggestedTags, err = app.getSuggestedTags(ctx, content, suggestedTitle, availableTagNames, doc.Tags, docLogger)
if err != nil {
mu.Lock()
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
mu.Unlock()
logger.Errorf("Error generating tags for document %d: %v", documentID, err)
return
}
}
if suggestionRequest.GenerateCorrespondents {
suggestedCorrespondent, err = app.getSuggestedCorrespondent(ctx, content, suggestedTitle, availableCorrespondentNames, correspondentBlackList)
if err != nil {
mu.Lock()
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
mu.Unlock()
log.Errorf("Error generating correspondents for document %d: %v", documentID, err)
return
}
}
mu.Lock()
suggestion := DocumentSuggestion{
ID: documentID,
OriginalDocument: doc,
}
// Titles
if suggestionRequest.GenerateTitles {
docLogger.Printf("Suggested title for document %d: %s", documentID, suggestedTitle)
suggestion.SuggestedTitle = suggestedTitle
} else {
suggestion.SuggestedTitle = doc.Title
}
// Tags
if suggestionRequest.GenerateTags {
docLogger.Printf("Suggested tags for document %d: %v", documentID, suggestedTags)
suggestion.SuggestedTags = suggestedTags
} else {
suggestion.SuggestedTags = doc.Tags
}
// Correspondents
if suggestionRequest.GenerateCorrespondents {
log.Printf("Suggested correspondent for document %d: %s", documentID, suggestedCorrespondent)
suggestion.SuggestedCorrespondent = suggestedCorrespondent
} else {
suggestion.SuggestedCorrespondent = ""
}
// Remove manual tag from the list of suggested tags
suggestion.RemoveTags = []string{manualTag, autoTag}
documentSuggestions = append(documentSuggestions, suggestion)
mu.Unlock()
docLogger.Printf("Document %d processed successfully.", documentID)
}(documents[i])
}
wg.Wait()
if len(errorsList) > 0 {
return nil, errorsList[0] // Return the first error encountered
}
return documentSuggestions, nil
}
// stripReasoning removes the reasoning from the content indicated by <think> and </think> tags.
func stripReasoning(content string) string {
// Remove reasoning from the content
reasoningStart := strings.Index(content, "<think>")
if reasoningStart != -1 {
reasoningEnd := strings.Index(content, "</think>")
if reasoningEnd != -1 {
content = content[:reasoningStart] + content[reasoningEnd+len("</think>"):]
}
}
// Trim whitespace
content = strings.TrimSpace(content)
return content
}

View file

@ -1,293 +0,0 @@
package main
import (
"context"
"fmt"
"os"
"testing"
"text/template"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/tmc/langchaingo/llms"
"github.com/tmc/langchaingo/textsplitter"
)
// Mock LLM for testing
type mockLLM struct {
lastPrompt string
}
func (m *mockLLM) CreateEmbedding(_ context.Context, texts []string) ([][]float32, error) {
return nil, nil
}
func (m *mockLLM) Call(_ context.Context, prompt string, _ ...llms.CallOption) (string, error) {
m.lastPrompt = prompt
return "test response", nil
}
func (m *mockLLM) GenerateContent(ctx context.Context, messages []llms.MessageContent, opts ...llms.CallOption) (*llms.ContentResponse, error) {
m.lastPrompt = messages[0].Parts[0].(llms.TextContent).Text
return &llms.ContentResponse{
Choices: []*llms.ContentChoice{
{
Content: "test response",
},
},
}, nil
}
// Mock templates for testing
const (
testTitleTemplate = `
Language: {{.Language}}
Title: {{.Title}}
Content: {{.Content}}
`
testTagTemplate = `
Language: {{.Language}}
Tags: {{.AvailableTags}}
Content: {{.Content}}
`
testCorrespondentTemplate = `
Language: {{.Language}}
Content: {{.Content}}
`
)
func TestPromptTokenLimits(t *testing.T) {
testLogger := logrus.WithField("test", "test")
// Initialize test templates
var err error
titleTemplate, err = template.New("title").Parse(testTitleTemplate)
require.NoError(t, err)
tagTemplate, err = template.New("tag").Parse(testTagTemplate)
require.NoError(t, err)
correspondentTemplate, err = template.New("correspondent").Parse(testCorrespondentTemplate)
require.NoError(t, err)
// Save current env and restore after test
originalLimit := os.Getenv("TOKEN_LIMIT")
defer os.Setenv("TOKEN_LIMIT", originalLimit)
// Create a test app with mock LLM
mockLLM := &mockLLM{}
app := &App{
LLM: mockLLM,
}
// Set up test template
testTemplate := template.Must(template.New("test").Parse(`
Language: {{.Language}}
Content: {{.Content}}
`))
tests := []struct {
name string
tokenLimit int
content string
}{
{
name: "no limit",
tokenLimit: 0,
content: "This is the original content that should not be truncated.",
},
{
name: "content within limit",
tokenLimit: 100,
content: "Short content",
},
{
name: "content exceeds limit",
tokenLimit: 50,
content: "This is a much longer content that should definitely be truncated to fit within token limits",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// Set token limit for this test
os.Setenv("TOKEN_LIMIT", fmt.Sprintf("%d", tc.tokenLimit))
resetTokenLimit()
// Prepare test data
data := map[string]interface{}{
"Language": "English",
}
// Calculate available tokens
availableTokens, err := getAvailableTokensForContent(testTemplate, data)
require.NoError(t, err)
// Truncate content if needed
truncatedContent, err := truncateContentByTokens(tc.content, availableTokens)
require.NoError(t, err)
// Test with the app's LLM
ctx := context.Background()
_, err = app.getSuggestedTitle(ctx, truncatedContent, "Test Title", testLogger)
require.NoError(t, err)
// Verify truncation
if tc.tokenLimit > 0 {
// Count tokens in final prompt received by LLM
splitter := textsplitter.NewTokenSplitter()
tokens, err := splitter.SplitText(mockLLM.lastPrompt)
require.NoError(t, err)
// Verify prompt is within limits
assert.LessOrEqual(t, len(tokens), tc.tokenLimit,
"Final prompt should be within token limit")
if len(tc.content) > len(truncatedContent) {
// Content was truncated
t.Logf("Content truncated from %d to %d characters",
len(tc.content), len(truncatedContent))
}
} else {
// No limit set, content should be unchanged
assert.Contains(t, mockLLM.lastPrompt, tc.content,
"Original content should be in prompt when no limit is set")
}
})
}
}
func TestTokenLimitInCorrespondentGeneration(t *testing.T) {
// Save current env and restore after test
originalLimit := os.Getenv("TOKEN_LIMIT")
defer os.Setenv("TOKEN_LIMIT", originalLimit)
// Create a test app with mock LLM
mockLLM := &mockLLM{}
app := &App{
LLM: mockLLM,
}
// Test content that would exceed reasonable token limits
longContent := "This is a very long content that would normally exceed token limits. " +
"It contains multiple sentences and should be truncated appropriately " +
"based on the token limit that we set."
// Set a small token limit
os.Setenv("TOKEN_LIMIT", "50")
resetTokenLimit()
// Call getSuggestedCorrespondent
ctx := context.Background()
availableCorrespondents := []string{"Test Corp", "Example Inc"}
correspondentBlackList := []string{"Blocked Corp"}
_, err := app.getSuggestedCorrespondent(ctx, longContent, "Test Title", availableCorrespondents, correspondentBlackList)
require.NoError(t, err)
// Verify the final prompt size
splitter := textsplitter.NewTokenSplitter()
tokens, err := splitter.SplitText(mockLLM.lastPrompt)
require.NoError(t, err)
// Final prompt should be within token limit
assert.LessOrEqual(t, len(tokens), 50, "Final prompt should be within token limit")
}
func TestTokenLimitInTagGeneration(t *testing.T) {
testLogger := logrus.WithField("test", "test")
// Save current env and restore after test
originalLimit := os.Getenv("TOKEN_LIMIT")
defer os.Setenv("TOKEN_LIMIT", originalLimit)
// Create a test app with mock LLM
mockLLM := &mockLLM{}
app := &App{
LLM: mockLLM,
}
// Test content that would exceed reasonable token limits
longContent := "This is a very long content that would normally exceed token limits. " +
"It contains multiple sentences and should be truncated appropriately."
// Set a small token limit
os.Setenv("TOKEN_LIMIT", "50")
resetTokenLimit()
// Call getSuggestedTags
ctx := context.Background()
availableTags := []string{"test", "example"}
originalTags := []string{"original"}
_, err := app.getSuggestedTags(ctx, longContent, "Test Title", availableTags, originalTags, testLogger)
require.NoError(t, err)
// Verify the final prompt size
splitter := textsplitter.NewTokenSplitter()
tokens, err := splitter.SplitText(mockLLM.lastPrompt)
require.NoError(t, err)
// Final prompt should be within token limit
assert.LessOrEqual(t, len(tokens), 50, "Final prompt should be within token limit")
}
func TestTokenLimitInTitleGeneration(t *testing.T) {
testLogger := logrus.WithField("test", "test")
// Save current env and restore after test
originalLimit := os.Getenv("TOKEN_LIMIT")
defer os.Setenv("TOKEN_LIMIT", originalLimit)
// Create a test app with mock LLM
mockLLM := &mockLLM{}
app := &App{
LLM: mockLLM,
}
// Test content that would exceed reasonable token limits
longContent := "This is a very long content that would normally exceed token limits. " +
"It contains multiple sentences and should be truncated appropriately."
// Set a small token limit
os.Setenv("TOKEN_LIMIT", "50")
resetTokenLimit()
// Call getSuggestedTitle
ctx := context.Background()
_, err := app.getSuggestedTitle(ctx, longContent, "Original Title", testLogger)
require.NoError(t, err)
// Verify the final prompt size
splitter := textsplitter.NewTokenSplitter()
tokens, err := splitter.SplitText(mockLLM.lastPrompt)
require.NoError(t, err)
// Final prompt should be within token limit
assert.LessOrEqual(t, len(tokens), 50, "Final prompt should be within token limit")
}
func TestStripReasoning(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "No reasoning tags",
input: "This is a test content without reasoning tags.",
expected: "This is a test content without reasoning tags.",
},
{
name: "Reasoning tags at the start",
input: "<think>Start reasoning</think>\n\nContent \n\n",
expected: "Content",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
result := stripReasoning(tc.input)
assert.Equal(t, tc.expected, result)
})
}
}

View file

@ -1,7 +0,0 @@
#!/usr/bin/env bash
set -o allexport
source .env
set +o allexport
go build
./paperless-gpt

View file

@ -1,49 +0,0 @@
# Active Context
## Current Task
Creating and initializing the memory bank documentation system for the paperless-gpt project.
## Recent Changes
1. Created memory bank directory structure
2. Initialized core documentation files:
- productContext.md: Project purpose and functionality
- systemPatterns.md: Architecture and design patterns
- techContext.md: Technical stack and setup
- activeContext.md: This file (current state)
- progress.md: To be created next
## Current State
- Initial documentation setup phase
- Core project understanding established
- Key systems and patterns documented
- Technical requirements captured
## Next Steps
### Immediate Tasks
1. Create progress.md to track project status
2. Verify all memory bank files are complete
3. Review documentation for any gaps
4. Ensure all critical information is captured
### Future Considerations
1. Keep documentation updated with:
- New feature implementations
- Architecture changes
- Configuration updates
- Bug fixes and improvements
2. Documentation maintenance:
- Regular reviews for accuracy
- Updates for new developments
- Removal of obsolete information
- Addition of new patterns/technologies
## Active Questions/Issues
None currently - initial setup phase
## Recent Decisions
1. Created comprehensive documentation structure
2. Organized information into logical sections
3. Prioritized key system components
4. Established documentation patterns

View file

@ -1,57 +0,0 @@
# Product Context
## Project Purpose
paperless-gpt is an AI-powered companion application designed to enhance the document management capabilities of paperless-ngx by automating document organization tasks through advanced AI technologies.
## Problems Solved
1. Manual Document Organization
- Eliminates time-consuming manual tagging and title creation
- Reduces human error in document categorization
- Streamlines document processing workflow
2. OCR Quality
- Improves text extraction from poor quality scans
- Provides context-aware OCR capabilities
- Handles complex document layouts better than traditional OCR
3. Document Categorization
- Automates correspondent identification
- Provides intelligent tag suggestions
- Generates meaningful document titles
## Core Functionality
### 1. LLM-Enhanced OCR
- Uses Large Language Models for better text extraction
- Handles messy or low-quality scans effectively
- Provides context-aware text interpretation
### 2. Automatic Document Processing
- Title Generation: Creates descriptive titles based on content
- Tag Generation: Suggests relevant tags from existing tag set
- Correspondent Identification: Automatically detects document senders/recipients
### 3. Integration Features
- Seamless paperless-ngx integration
- Docker-based deployment
- Customizable prompt templates
- Support for multiple LLM providers (OpenAI, Ollama)
### 4. User Interface
- Web-based management interface
- Manual review capabilities
- Batch processing support
- Auto-processing workflow option
## Usage Flow
1. Documents are tagged with specific markers (e.g., 'paperless-gpt')
2. System processes documents using AI/LLM capabilities
3. Results can be automatically applied or manually reviewed
4. Processed documents are updated in paperless-ngx
## Configuration Options
- Manual vs. automatic processing
- LLM provider selection
- Language preferences
- Processing limits and constraints
- Custom prompt templates

View file

@ -1,96 +0,0 @@
# Progress Tracking
## Implemented Features
### Core Functionality
✅ LLM Integration
- OpenAI support
- Ollama support
- Vision model integration for OCR
- Template-based prompts
✅ Document Processing
- Title generation
- Tag suggestion
- Correspondent identification
- LLM-enhanced OCR
✅ Frontend Interface
- Document review UI
- Suggestion management
- Batch processing
- Success feedback
✅ System Integration
- paperless-ngx API integration
- Docker deployment
- Environment configuration
- Custom prompt templates
## Working Components
### Backend Systems
- Go API server
- LLM provider abstraction
- Template engine
- Concurrent document processing
- Error handling
- Logging system
### Frontend Features
- React/TypeScript application
- Document processing interface
- Review system
- Component architecture
- Tailwind styling
### Infrastructure
- Docker containerization
- Docker Compose setup
- Documentation
- Testing framework
## Remaining Tasks
### Features to Implement
None identified - core functionality complete
### Known Issues
- None currently documented
### Potential Improvements
1. Performance Optimizations
- Token usage optimization
- Processing speed improvements
- Caching strategies
2. Feature Enhancements
- Additional LLM providers
- Extended template capabilities
- Enhanced error recovery
- Advanced OCR options
3. User Experience
- Advanced configuration UI
- Batch processing improvements
- Progress indicators
- Enhanced error messaging
4. Documentation
- Additional usage examples
- Troubleshooting guides
- Performance tuning guide
- Development guidelines
## Project Status
- 🟢 Core Features: Complete
- 🟢 Documentation: Initialized
- 🟢 Testing: Implemented
- 🟢 Deployment: Ready
- 🟡 Optimization: Ongoing
## Next Development Priorities
1. Monitor for user feedback
2. Address any discovered issues
3. Implement performance improvements
4. Enhance documentation based on user needs

View file

@ -1,179 +0,0 @@
# System Patterns
## Architecture Overview
### 1. Microservices Architecture
- **paperless-gpt**: AI processing service (Go)
- **paperless-ngx**: Document management system (external)
- Communication via REST API
- Docker-based deployment
### 2. Backend Architecture (Go)
#### Core Components
- **API Server**: HTTP handlers for document processing
- **LLM Integration**: Abstraction for multiple AI providers
- **Template Engine**: Dynamic prompt generation
- **Document Processor**: Handles OCR and metadata generation
#### Key Patterns
- **Template-Based Prompts**: Customizable templates for different AI tasks
- **Content Truncation**: Smart content limiting based on token counts
- **Concurrent Processing**: Goroutines for parallel document processing
- **Mutex-Protected Resources**: Thread-safe template access
- **Error Propagation**: Structured error handling across layers
### 3. Frontend Architecture (React/TypeScript)
#### Components
- Document Processor
- Suggestion Review
- Document Cards
- Sidebar Navigation
- Success Modal
#### State Management
- Local component state
- Props for component communication
- API integration for data fetching
### 4. Integration Patterns
#### API Communication
- RESTful endpoints
- JSON payload structure
- Token-based authentication
- Error response handling
#### LLM Provider Integration
- Provider abstraction layer
- Support for multiple providers (OpenAI, Ollama)
- Configurable models and parameters
- Vision model support for OCR
### 5. Data Flow
#### Document Processing Flow (Manual)
1. Document tagged in paperless-ngx
2. paperless-gpt detects tagged documents
3. AI processing (title/tags/correspondent generation)
4. Manual review or auto-apply
5. Update back to paperless-ngx
#### Document Processing Flow (Auto)
1. Document tagged in paperless-ngx with some 'auto' tag (env: AUTO_TAG)
2. paperless-gpt automatically processes documents
3. AI processing (title/tags/correspondent generation)
4. Auto-apply results back to paperless-ngx
#### OCR Processing Flow
1. Image/PDF input
2. Vision model processing
3. Text extraction and cleanup
4. Integration with document processing
### 6. Security Patterns
- API token authentication
- Environment-based configuration
- Docker container isolation
- Rate limiting and token management
### 7. Development Patterns
- Clear separation of concerns
- Dependency injection
- Interface-based design
- Concurrent processing with safety
- Comprehensive error handling
- Template-based customization
### 8. Testing Patterns
- Unit tests for core logic
- Integration tests for API
- E2E tests for web interface
- Test fixtures and mocks
- Playwright for frontend testing
## OCR System Patterns
### OCR Provider Architecture
#### 1. Provider Interface
- Common interface for all OCR implementations
- Methods for image processing
- Configuration through standardized Config struct
- Resource management patterns
#### 2. LLM Provider Implementation
- Supports OpenAI and Ollama vision models
- Base64 encoding for OpenAI requests
- Binary format for Ollama requests
- Template-based OCR prompts
#### 3. Google Document AI Provider
- Enterprise-grade OCR processing
- MIME type validation
- Processor configuration via environment
- Regional endpoint support
### Logging Patterns
#### 1. Provider Initialization
```
[INFO] Initializing OCR provider: llm
[INFO] Using LLM OCR provider (provider=ollama, model=minicpm-v)
```
#### 2. Processing Logs
```
[DEBUG] Starting OCR processing
[DEBUG] Image dimensions (width=800, height=1200)
[DEBUG] Using binary image format for non-OpenAI provider
[DEBUG] Sending request to vision model
[INFO] Successfully processed image (content_length=1536)
```
#### 3. Error Logging
```
[ERROR] Failed to decode image: invalid format
[ERROR] Unsupported file type: image/webp
[ERROR] Failed to get response from vision model
```
### Error Handling Patterns
#### 1. Configuration Validation
- Required parameter checks
- Environment variable validation
- Provider-specific configuration
- Connection testing
#### 2. Processing Errors
- Image format validation
- MIME type checking
- Content processing errors
- Provider-specific error handling
#### 3. Error Propagation
- Detailed error contexts
- Original error wrapping
- Logging with error context
- Recovery mechanisms
### Processing Flow
#### 1. Document Processing
```
Document Tagged → OCR Provider Selected → Image Processing → Text Extraction → Content Update
```
#### 2. Provider Selection
```
Config Check → Provider Initialization → Resource Setup → Provider Ready
```
#### 3. Error Recovery
```
Error Detection → Logging → Cleanup → Error Propagation
```
These patterns ensure consistent behavior across OCR providers while maintaining proper logging and error handling throughout the system.

View file

@ -1,136 +0,0 @@
# Technical Context
## Technology Stack
### Backend (Go)
- **Runtime**: Go
- **Key Libraries**:
- langchaingo: LLM integration
- logrus: Structured logging
- net/http: API server
### Frontend (React/TypeScript)
- **Framework**: React with TypeScript
- **Build Tool**: Vite
- **Testing**: Playwright
- **Styling**: Tailwind CSS
- **Package Manager**: npm
### Infrastructure
- **Containerization**: Docker
- **Deployment**: Docker Compose
- **CI/CD**: GitHub Actions
## Development Setup
### Prerequisites
1. Docker and Docker Compose
2. Go development environment
3. Node.js and npm
4. Access to LLM provider (OpenAI or Ollama)
### Local Development Steps
1. Clone repository
2. Configure environment variables
3. Start paperless-ngx instance
4. Build and run paperless-gpt
5. Access web interface
### Testing Steps (Required Before Commits)
1. **Unit Tests**:
```bash
go test .
```
2. **E2E Tests**:
```bash
docker build . -t icereed/paperless-gpt:e2e
cd web-app && npm run test:e2e
```
These tests MUST be run and pass before considering any task complete.
## Configuration
### Environment Variables
#### Required Variables
```
PAPERLESS_BASE_URL=http://paperless-ngx:8000
PAPERLESS_API_TOKEN=your_paperless_api_token
LLM_PROVIDER=openai|ollama
LLM_MODEL=model_name
```
#### Optional Variables
```
PAPERLESS_PUBLIC_URL=public_url
MANUAL_TAG=paperless-gpt
AUTO_TAG=paperless-gpt-auto
OPENAI_API_KEY=key (if using OpenAI)
OPENAI_BASE_URL=custom_url
LLM_LANGUAGE=English
OLLAMA_HOST=host_url
VISION_LLM_PROVIDER=provider
VISION_LLM_MODEL=model
AUTO_OCR_TAG=tag
OCR_LIMIT_PAGES=5
LOG_LEVEL=info
```
### Docker Configuration
- Network configuration for service communication
- Volume mounts for prompts and persistence
- Resource limits and scaling options
- Port mappings for web interface
### LLM Provider Setup
#### OpenAI Configuration
- API key management
- Model selection
- Base URL configuration (for custom endpoints)
- Vision API access for OCR
#### Ollama Configuration
- Server setup and hosting
- Model installation and management
- Network access configuration
- Resource allocation
### Custom Prompts
#### Template Files
- title_prompt.tmpl
- tag_prompt.tmpl
- ocr_prompt.tmpl
- correspondent_prompt.tmpl
#### Template Variables
- Language
- Content
- AvailableTags
- OriginalTags
- Title
- AvailableCorrespondents
- BlackList
## Technical Constraints
### Performance Considerations
- Token limits for LLM requests
- OCR page limits
- Concurrent processing limits
- Network bandwidth requirements
### Security Requirements
- API token security
- Environment variable management
- Network isolation
- Data privacy considerations
### Integration Requirements
- paperless-ngx compatibility
- LLM provider API compatibility
- Docker environment compatibility
- Web browser compatibility

BIN
demo.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

BIN
demo.mp4

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 73 KiB

View file

@ -1,3 +1,5 @@
version: '3.8'
services:
app:
build:

View file

@ -1,54 +0,0 @@
package main
import (
"embed"
"io"
"io/fs"
"net/http"
"path"
"strings"
"github.com/gin-gonic/gin"
)
//go:embed web-app/dist/*
var webappContent embed.FS
// CreateEmbeddedFileServer creates a http.FileSystem from our embedded files
func createEmbeddedFileServer() http.FileSystem {
// Strip the "dist" prefix from the embedded files
stripped, err := fs.Sub(webappContent, "web-app/dist")
if err != nil {
panic(err)
}
return http.FS(stripped)
}
// ServeEmbeddedFile serves a file from the embedded filesystem
func serveEmbeddedFile(c *gin.Context, prefix string, filepath string) {
// If the path is empty or ends with "/", serve index.html
if filepath == "" || strings.HasSuffix(filepath, "/") {
filepath = path.Join(filepath, "index.html")
}
// Try to open the file from our embedded filesystem
fullPath := path.Join("web-app/dist", prefix, filepath)
f, err := webappContent.Open(fullPath)
if err != nil {
// If file not found, serve 404
log.Warnf("File not found: %s", fullPath)
http.Error(c.Writer, http.StatusText(http.StatusNotFound), http.StatusNotFound)
c.Status(http.StatusNotFound)
return
}
defer f.Close()
stat, err := f.Stat()
if err != nil {
c.Status(http.StatusInternalServerError)
return
}
// Serve the file
http.ServeContent(c.Writer, c.Request, stat.Name(), stat.ModTime(), f.(io.ReadSeeker))
}

83
go.mod
View file

@ -1,98 +1,51 @@
module paperless-gpt
go 1.23.0
go 1.22.0
toolchain go1.24.1
toolchain go1.22.2
require (
cloud.google.com/go/documentai v1.35.2
github.com/Masterminds/sprig/v3 v3.3.0
github.com/fatih/color v1.18.0
github.com/gabriel-vasile/mimetype v1.4.8
github.com/gen2brain/go-fitz v1.24.14
github.com/Masterminds/sprig/v3 v3.2.3
github.com/gin-gonic/gin v1.10.0
github.com/google/uuid v1.6.0
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.10.0
github.com/tmc/langchaingo v0.1.13
golang.org/x/sync v0.12.0
google.golang.org/api v0.225.0
gorm.io/driver/sqlite v1.5.7
gorm.io/gorm v1.25.12
github.com/tmc/langchaingo v0.1.12
)
require (
cloud.google.com/go v0.118.1 // indirect
cloud.google.com/go/auth v0.15.0 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
cloud.google.com/go/compute/metadata v0.6.0 // indirect
cloud.google.com/go/longrunning v0.6.4 // indirect
dario.cat/mergo v1.0.1 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.3.0 // indirect
github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/bytedance/sonic v1.11.6 // indirect
github.com/bytedance/sonic/loader v0.1.1 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/ebitengine/purego v0.8.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.20.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/google/s2a-go v0.1.9 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.5 // indirect
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/huandu/xstrings v1.5.0 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
github.com/imdario/mergo v0.3.13 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/jupiterrider/ffi v0.2.0 // indirect
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-sqlite3 v1.14.24 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/mitchellh/copystructure v1.0.0 // indirect
github.com/mitchellh/reflectwalk v1.0.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/pkoukk/tiktoken-go v0.1.6 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/shopspring/decimal v1.4.0 // indirect
github.com/spf13/cast v1.7.0 // indirect
github.com/shopspring/decimal v1.2.0 // indirect
github.com/spf13/cast v1.3.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181 // indirect
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82 // indirect
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a // indirect
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
go.opentelemetry.io/otel v1.34.0 // indirect
go.opentelemetry.io/otel/metric v1.34.0 // indirect
go.opentelemetry.io/otel/trace v1.34.0 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.36.0 // indirect
golang.org/x/net v0.37.0 // indirect
golang.org/x/oauth2 v0.28.0 // indirect
golang.org/x/sys v0.31.0 // indirect
golang.org/x/text v0.23.0 // indirect
golang.org/x/time v0.11.0 // indirect
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect
google.golang.org/grpc v1.71.0 // indirect
google.golang.org/protobuf v1.36.5 // indirect
golang.org/x/crypto v0.23.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

245
go.sum
View file

@ -1,23 +1,9 @@
cloud.google.com/go v0.118.1 h1:b8RATMcrK9A4BH0rj8yQupPXp+aP+cJ0l6H7V9osV1E=
cloud.google.com/go v0.118.1/go.mod h1:CFO4UPEPi8oV21xoezZCrd3d81K4fFkDTEJu4R8K+9M=
cloud.google.com/go/auth v0.15.0 h1:Ly0u4aA5vG/fsSsxu98qCQBemXtAtJf+95z9HK+cxps=
cloud.google.com/go/auth v0.15.0/go.mod h1:WJDGqZ1o9E9wKIL+IwStfyn/+s59zl4Bi+1KQNVXLZ8=
cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M=
cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc=
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
cloud.google.com/go/documentai v1.35.2 h1:hswVobCWUTXtmn+4QqUIVkai7sDOe0QS2KB3IpqLkik=
cloud.google.com/go/documentai v1.35.2/go.mod h1:oh/0YXosgEq3hVhyH4ZQ7VNXPaveRO4eLVM3tBSZOsI=
cloud.google.com/go/longrunning v0.6.4 h1:3tyw9rO3E2XVXzSApn1gyEEnH2K9SynNQjMlBi3uHLg=
cloud.google.com/go/longrunning v0.6.4/go.mod h1:ttZpLCe6e7EXvn9OxpBRx7kZEB0efv8yBO6YnVMfhJs=
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
@ -31,27 +17,12 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+GvvE=
github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
github.com/gen2brain/go-fitz v1.24.14/go.mod h1:0KaZeQgASc20Yp5R/pFzyy7SmP01XcoHKNF842U2/S4=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
@ -62,58 +33,31 @@ github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBEx
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
github.com/googleapis/enterprise-certificate-proxy v0.3.5 h1:VgzTY2jogw3xt39CusEnFJWm7rlsq5yL5q9XdLOuP5g=
github.com/googleapis/enterprise-certificate-proxy v0.3.5/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA=
github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q=
github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk=
github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jupiterrider/ffi v0.2.0 h1:tMM70PexgYNmV+WyaYhJgCvQAvtTCs3wXeILPutihnA=
github.com/jupiterrider/ffi v0.2.0/go.mod h1:yqYqX5DdEccAsHeMn+6owkoI2llBLySVAF8dwCDZPVs=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@ -125,136 +69,83 @@ github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAc
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
github.com/tmc/langchaingo v0.1.12 h1:yXwSu54f3b1IKw0jJ5/DWu+qFVH1NBblwC0xddBzGJE=
github.com/tmc/langchaingo v0.1.12/go.mod h1:cd62xD6h+ouk8k/QQFhOsjRYBSA1JJ5UVKXSIgm7Ni4=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181 h1:K+bMSIx9A7mLES1rtG+qKduLIXq40DAzYHtb0XuCukA=
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181/go.mod h1:dzYhVIwWCtzPAa4QP98wfB9+mzt33MSmM8wsKiMi2ow=
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82 h1:oYrL81N608MLZhma3ruL8qTM4xcpYECGut8KSxRY59g=
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82/go.mod h1:Gn+LZmCrhPECMD3SOKlE+BOHwhOYD9j7WT9NUtkCrC8=
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a h1:O85GKETcmnCNAfv4Aym9tepU8OE0NmcZNqPlXcsBKBs=
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a/go.mod h1:LaSIs30YPGs1H5jwGgPhLzc8vkNc/k0rDX/fEZqiU/M=
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 h1:qqjvoVXdWIcZCLPMlzgA7P9FZWdPGPvP/l3ef8GzV6o=
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84/go.mod h1:IJZ+fdMvbW2qW6htJx7sLJ04FEs4Ldl/MDsJtMKywfw=
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f h1:Wku8eEdeJqIOFHtrfkYUByc4bCaTeA6fL0UJgfEiFMI=
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f/go.mod h1:Tiuhl+njh/JIg0uS/sOJVYi0x2HEa5rc1OAaVsb5tAs=
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJWpU5qHKGNy///1pacZC9txiUI=
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 h1:rgMkmiGfix9vFJDcDi1PK8WEQP4FLQwLDfhp5ZLpFeE=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0/go.mod h1:ijPqXp5P6IRRByFVVg9DY8P5HkxkHE5ARIa+86aXPf4=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU=
go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ=
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs=
golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ=
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE=
golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc=
golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
google.golang.org/api v0.223.0 h1:JUTaWEriXmEy5AhvdMgksGGPEFsYfUKaPEYXd4c3Wvc=
google.golang.org/api v0.223.0/go.mod h1:C+RS7Z+dDwds2b+zoAk5hN/eSfsiCn0UDrYof/M4d2M=
google.golang.org/api v0.224.0 h1:Ir4UPtDsNiwIOHdExr3fAj4xZ42QjK7uQte3lORLJwU=
google.golang.org/api v0.224.0/go.mod h1:3V39my2xAGkodXy0vEqcEtkqgw2GtrFL5WuBZlCTCOQ=
google.golang.org/api v0.225.0 h1:+4/IVqBQm0MV5S+JW3kdEGC1WtOmM2mXN1LKH1LdNlw=
google.golang.org/api v0.225.0/go.mod h1:WP/0Xm4LVvMOCldfvOISnWquSRWbG2kArDZcg+W2DbY=
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 h1:Pw6WnI9W/LIdRxqK7T6XGugGbHIRl5Q7q3BssH6xk4s=
google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4/go.mod h1:qbZzneIOXSq+KFAFut9krLfRLZiFLzZL5u2t8SV83EE=
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 h1:L9JNMl/plZH9wmzQUHleO/ZZDSN+9Gh41wPczNy+5Fk=
google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6/go.mod h1:iYONQfRdizDB8JJBybql13nArx91jcUk7zCXEsOofM4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2 h1:DMTIbak9GhdaSxEjvVzAeNZvyc03I61duqNbnm3SU0M=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e h1:YA5lmSs3zc/5w+xsRcHqpETkaYyK63ivEPzNTcUUlSA=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250227231956-55c901821b1e/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb h1:TLPQVbx1GJ8VKZxz52VAxl1EBgKXXbTiU9Fc5fZeLn4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg=
google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gorm.io/driver/sqlite v1.5.7 h1:8NvsrhP0ifM7LX9G4zPB97NwovUakUxc+2V2uuf3Z1I=
gorm.io/driver/sqlite v1.5.7/go.mod h1:U+J8craQU6Fzkcvu8oLeAQmi50TkwPEhHDEjQZXDah4=
gorm.io/gorm v1.25.12 h1:I0u8i2hWQItBq1WfE0o2+WuL9+8L21K9e2HHSTE/0f8=
gorm.io/gorm v1.25.12/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=

135
jobs.go
View file

@ -1,135 +0,0 @@
package main
import (
"context"
"os"
"sort"
"sync"
"time"
"github.com/google/uuid"
"github.com/sirupsen/logrus"
)
// Job represents an OCR job
type Job struct {
ID string
DocumentID int
Status string // "pending", "in_progress", "completed", "failed"
Result string // OCR result or error message
CreatedAt time.Time
UpdatedAt time.Time
PagesDone int // Number of pages processed
}
// JobStore manages jobs and their statuses
type JobStore struct {
sync.RWMutex
jobs map[string]*Job
}
var (
logger = logrus.New()
jobStore = &JobStore{
jobs: make(map[string]*Job),
}
jobQueue = make(chan *Job, 100) // Buffered channel with capacity of 100 jobs
)
func init() {
// Initialize logger
logger.SetOutput(os.Stdout)
logger.SetFormatter(&logrus.TextFormatter{
FullTimestamp: true,
})
logger.SetLevel(logrus.InfoLevel)
logger.WithField("prefix", "OCR_JOB")
}
func generateJobID() string {
return uuid.New().String()
}
func (store *JobStore) addJob(job *Job) {
store.Lock()
defer store.Unlock()
job.PagesDone = 0 // Initialize PagesDone to 0
store.jobs[job.ID] = job
logger.Infof("Job added: %v", job)
}
func (store *JobStore) getJob(jobID string) (*Job, bool) {
store.RLock()
defer store.RUnlock()
job, exists := store.jobs[jobID]
return job, exists
}
func (store *JobStore) GetAllJobs() []*Job {
store.RLock()
defer store.RUnlock()
jobs := make([]*Job, 0, len(store.jobs))
for _, job := range store.jobs {
jobs = append(jobs, job)
}
sort.Slice(jobs, func(i, j int) bool {
return jobs[i].CreatedAt.After(jobs[j].CreatedAt)
})
return jobs
}
func (store *JobStore) updateJobStatus(jobID, status, result string) {
store.Lock()
defer store.Unlock()
if job, exists := store.jobs[jobID]; exists {
job.Status = status
if result != "" {
job.Result = result
}
job.UpdatedAt = time.Now()
logger.Infof("Job status updated: %v", job)
}
}
func (store *JobStore) updatePagesDone(jobID string, pagesDone int) {
store.Lock()
defer store.Unlock()
if job, exists := store.jobs[jobID]; exists {
job.PagesDone = pagesDone
job.UpdatedAt = time.Now()
logger.Infof("Job pages done updated: %v", job)
}
}
func startWorkerPool(app *App, numWorkers int) {
for i := 0; i < numWorkers; i++ {
go func(workerID int) {
logger.Infof("Worker %d started", workerID)
for job := range jobQueue {
logger.Infof("Worker %d processing job: %s", workerID, job.ID)
processJob(app, job)
}
}(i)
}
}
func processJob(app *App, job *Job) {
jobStore.updateJobStatus(job.ID, "in_progress", "")
ctx := context.Background()
fullOcrText, err := app.ProcessDocumentOCR(ctx, job.DocumentID)
if err != nil {
logger.Errorf("Error processing document OCR for job %s: %v", job.ID, err)
jobStore.updateJobStatus(job.ID, "failed", err.Error())
return
}
jobStore.updateJobStatus(job.ID, "completed", fullOcrText)
logger.Infof("Job completed: %s", job.ID)
}

View file

@ -1,101 +0,0 @@
package main
import (
"os"
"path/filepath"
"time"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
)
// ModificationHistory represents the schema of the modification_history table
type ModificationHistory struct {
ID uint `gorm:"primaryKey"` // Auto-incrementing primary key
DocumentID uint `gorm:"not null"` // Foreign key to documents table (if applicable)
DateChanged string `gorm:"not null"` // Date and time of modification
ModField string `gorm:"size:255;not null"` // Field being modified
PreviousValue string `gorm:"size:1048576"` // Previous value of the field
NewValue string `gorm:"size:1048576"` // New value of the field
Undone bool `gorm:"not null;default:false"` // Whether the modification has been undone
UndoneDate string `gorm:"default:null"` // Date and time of undoing the modification
}
// InitializeDB initializes the SQLite database and migrates the schema
func InitializeDB() *gorm.DB {
// Ensure db directory exists
dbDir := "db"
if err := os.MkdirAll(dbDir, os.ModePerm); err != nil {
log.Fatalf("Failed to create db directory: %v", err)
}
dbPath := filepath.Join(dbDir, "modification_history.db")
// Connect to SQLite database
db, err := gorm.Open(sqlite.Open(dbPath), &gorm.Config{})
if err != nil {
log.Fatalf("Failed to connect to database: %v", err)
}
// Migrate the schema (create the table if it doesn't exist)
err = db.AutoMigrate(&ModificationHistory{})
if err != nil {
log.Fatalf("Failed to migrate database schema: %v", err)
}
return db
}
// InsertModification inserts a new modification record into the database
func InsertModification(db *gorm.DB, record *ModificationHistory) error {
log.Debugf("Passed modification record: %+v", record)
record.DateChanged = time.Now().Format(time.RFC3339) // Set the DateChanged field to the current time
log.Debugf("Inserting modification record: %+v", record)
result := db.Create(&record) // GORM's Create method
log.Debugf("Insertion result: %+v", result)
return result.Error
}
// GetModification retrieves a modification record by its ID
func GetModification(db *gorm.DB, id uint) (*ModificationHistory, error) {
var record ModificationHistory
result := db.First(&record, id) // GORM's First method retrieves the first record matching the ID
return &record, result.Error
}
// GetAllModifications retrieves all modification records from the database (deprecated - use GetPaginatedModifications instead)
func GetAllModifications(db *gorm.DB) ([]ModificationHistory, error) {
var records []ModificationHistory
result := db.Order("date_changed DESC").Find(&records)
return records, result.Error
}
// GetPaginatedModifications retrieves a page of modification records with total count
func GetPaginatedModifications(db *gorm.DB, page int, pageSize int) ([]ModificationHistory, int64, error) {
var records []ModificationHistory
var total int64
// Get total count
if err := db.Model(&ModificationHistory{}).Count(&total).Error; err != nil {
return nil, 0, err
}
// Calculate offset
offset := (page - 1) * pageSize
// Get paginated records
result := db.Order("date_changed DESC").
Offset(offset).
Limit(pageSize).
Find(&records)
return records, total, result.Error
}
// UndoModification marks a modification record as undone and sets the undo date
func SetModificationUndone(db *gorm.DB, record *ModificationHistory) error {
record.Undone = true
record.UndoneDate = time.Now().Format(time.RFC3339)
result := db.Save(&record) // GORM's Save method
return result.Error
}

1102
main.go

File diff suppressed because it is too large Load diff

View file

@ -1,199 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"slices"
"testing"
"text/template"
"github.com/Masterminds/sprig/v3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestProcessAutoTagDocuments(t *testing.T) {
// Initialize required global variables
autoTag = "paperless-gpt-auto"
autoOcrTag = "paperless-gpt-ocr-auto"
// Initialize templates
var err error
titleTemplate, err = template.New("title").Funcs(sprig.FuncMap()).Parse("")
require.NoError(t, err)
tagTemplate, err = template.New("tag").Funcs(sprig.FuncMap()).Parse("")
require.NoError(t, err)
correspondentTemplate, err = template.New("correspondent").Funcs(sprig.FuncMap()).Parse("")
require.NoError(t, err)
// Create test environment
env := newTestEnv(t)
defer env.teardown()
// Set up test cases
testCases := []struct {
name string
documents []Document
expectedCount int
expectedError string
updateResponse int // HTTP status code for update response
}{
{
name: "Skip document with autoOcrTag",
documents: []Document{
{
ID: 1,
Title: "Doc with OCR tag",
Tags: []string{autoTag, autoOcrTag},
},
{
ID: 2,
Title: "Doc without OCR tag",
Tags: []string{autoTag},
},
{
ID: 3,
Title: "Doc with OCR tag",
Tags: []string{autoTag, autoOcrTag},
},
},
expectedCount: 1,
updateResponse: http.StatusOK,
},
{
name: "No documents to process",
documents: []Document{},
expectedCount: 0,
updateResponse: http.StatusOK,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Mock the GetAllTags response
env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
response := map[string]interface{}{
"results": []map[string]interface{}{
{"id": 1, "name": autoTag},
{"id": 2, "name": autoOcrTag},
{"id": 3, "name": "other-tag"},
},
}
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(response)
})
// Mock the GetDocumentsByTags response
env.setMockResponse("/api/documents/", func(w http.ResponseWriter, r *http.Request) {
response := GetDocumentsApiResponse{
Results: make([]GetDocumentApiResponseResult, len(tc.documents)),
}
for i, doc := range tc.documents {
tagIds := make([]int, len(doc.Tags))
for j, tagName := range doc.Tags {
switch tagName {
case autoTag:
tagIds[j] = 1
case autoOcrTag:
tagIds[j] = 2
default:
tagIds[j] = 3
}
}
response.Results[i] = GetDocumentApiResponseResult{
ID: doc.ID,
Title: doc.Title,
Tags: tagIds,
Content: "Test content",
}
}
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(response)
})
// Mock the correspondent creation endpoint
env.setMockResponse("/api/correspondents/", func(w http.ResponseWriter, r *http.Request) {
if r.Method == "POST" {
// Mock successful correspondent creation
w.WriteHeader(http.StatusCreated)
json.NewEncoder(w).Encode(map[string]interface{}{
"id": 3,
"name": "test response",
})
} else {
// Mock GET response for existing correspondents
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(map[string]interface{}{
"results": []map[string]interface{}{
{"id": 1, "name": "Alpha"},
{"id": 2, "name": "Beta"},
},
})
}
})
// Create test app
app := &App{
Client: env.client,
Database: env.db,
LLM: &mockLLM{}, // Use mock LLM from app_llm_test.go
}
// Set auto-generate flags
autoGenerateTitle = "true"
autoGenerateTags = "true"
autoGenerateCorrespondents = "true"
// Mock the document update responses
for _, doc := range tc.documents {
if !slices.Contains(doc.Tags, autoOcrTag) {
updatePath := fmt.Sprintf("/api/documents/%d/", doc.ID)
env.setMockResponse(updatePath, func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(tc.updateResponse)
json.NewEncoder(w).Encode(map[string]interface{}{
"id": doc.ID,
"title": "Updated " + doc.Title,
"tags": []int{1, 3}, // Mock updated tag IDs
})
})
}
}
// Run the test
count, err := app.processAutoTagDocuments()
// Verify results
if tc.expectedError != "" {
require.Error(t, err)
assert.Contains(t, err.Error(), tc.expectedError)
} else {
require.NoError(t, err)
assert.Equal(t, tc.expectedCount, count)
}
})
}
}
func TestCreateCustomHTTPClient(t *testing.T) {
// Create a test server
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Verify custom header
assert.Equal(t, "paperless-gpt", r.Header.Get("X-Title"), "Expected X-Title header")
w.WriteHeader(http.StatusOK)
}))
defer server.Close()
// Get custom client
client := createCustomHTTPClient()
require.NotNil(t, client, "HTTP client should not be nil")
// Make a request
resp, err := client.Get(server.URL)
require.NoError(t, err, "Request should not fail")
defer resp.Body.Close()
assert.Equal(t, http.StatusOK, resp.StatusCode, "Expected 200 OK response")
}

57
ocr.go
View file

@ -1,57 +0,0 @@
package main
import (
"context"
"fmt"
"os"
"strings"
)
// ProcessDocumentOCR processes a document through OCR and returns the combined text
func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int) (string, error) {
docLogger := documentLogger(documentID)
docLogger.Info("Starting OCR processing")
imagePaths, err := app.Client.DownloadDocumentAsImages(ctx, documentID, limitOcrPages)
defer func() {
for _, imagePath := range imagePaths {
if err := os.Remove(imagePath); err != nil {
docLogger.WithError(err).WithField("image_path", imagePath).Warn("Failed to remove temporary image file")
}
}
}()
if err != nil {
return "", fmt.Errorf("error downloading document images for document %d: %w", documentID, err)
}
docLogger.WithField("page_count", len(imagePaths)).Debug("Downloaded document images")
var ocrTexts []string
for i, imagePath := range imagePaths {
pageLogger := docLogger.WithField("page", i+1)
pageLogger.Debug("Processing page")
imageContent, err := os.ReadFile(imagePath)
if err != nil {
return "", fmt.Errorf("error reading image file for document %d, page %d: %w", documentID, i+1, err)
}
result, err := app.ocrProvider.ProcessImage(ctx, imageContent)
if err != nil {
return "", fmt.Errorf("error performing OCR for document %d, page %d: %w", documentID, i+1, err)
}
if result == nil {
pageLogger.Error("Got nil result from OCR provider")
return "", fmt.Errorf("error performing OCR for document %d, page %d: nil result", documentID, i+1)
}
pageLogger.WithField("has_hocr", result.HOCR != "").
WithField("metadata", result.Metadata).
Debug("OCR completed for page")
ocrTexts = append(ocrTexts, result.Text)
}
docLogger.Info("OCR processing completed successfully")
return strings.Join(ocrTexts, "\n\n"), nil
}

View file

@ -1,224 +0,0 @@
package ocr
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"github.com/gabriel-vasile/mimetype"
"github.com/hashicorp/go-retryablehttp"
"github.com/sirupsen/logrus"
)
const (
apiVersion = "2024-11-30"
defaultModelID = "prebuilt-read"
defaultTimeout = 120
pollingInterval = 2 * time.Second
)
// AzureProvider implements OCR using Azure Document Intelligence
type AzureProvider struct {
endpoint string
apiKey string
modelID string
timeout time.Duration
httpClient *retryablehttp.Client
}
// Request body for Azure Document Intelligence
type analyzeRequest struct {
Base64Source string `json:"base64Source"`
}
func newAzureProvider(config Config) (*AzureProvider, error) {
logger := log.WithFields(logrus.Fields{
"endpoint": config.AzureEndpoint,
"model_id": config.AzureModelID,
})
logger.Info("Creating new Azure Document Intelligence provider")
// Validate required configuration
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
logger.Error("Missing required configuration")
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
}
// Set defaults and create provider
modelID := defaultModelID
if config.AzureModelID != "" {
modelID = config.AzureModelID
}
timeout := defaultTimeout
if config.AzureTimeout > 0 {
timeout = config.AzureTimeout
}
// Configure retryablehttp client
client := retryablehttp.NewClient()
client.RetryMax = 3
client.RetryWaitMin = 1 * time.Second
client.RetryWaitMax = 5 * time.Second
client.Logger = logger
provider := &AzureProvider{
endpoint: config.AzureEndpoint,
apiKey: config.AzureAPIKey,
modelID: modelID,
timeout: time.Duration(timeout) * time.Second,
httpClient: client,
}
logger.Info("Successfully initialized Azure Document Intelligence provider")
return provider, nil
}
func (p *AzureProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
logger := log.WithFields(logrus.Fields{
"model_id": p.modelID,
})
logger.Debug("Starting Azure Document Intelligence processing")
// Detect MIME type
mtype := mimetype.Detect(imageContent)
logger.WithField("mime_type", mtype.String()).Debug("Detected file type")
if !isImageMIMEType(mtype.String()) {
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
}
// Create context with timeout
ctx, cancel := context.WithTimeout(ctx, p.timeout)
defer cancel()
// Submit document for analysis
operationLocation, err := p.submitDocument(ctx, imageContent)
if err != nil {
return nil, fmt.Errorf("error submitting document: %w", err)
}
// Poll for results
result, err := p.pollForResults(ctx, operationLocation)
if err != nil {
return nil, fmt.Errorf("error polling for results: %w", err)
}
// Convert to OCR result
ocrResult := &OCRResult{
Text: result.AnalyzeResult.Content,
Metadata: map[string]string{
"provider": "azure_docai",
"page_count": fmt.Sprintf("%d", len(result.AnalyzeResult.Pages)),
"api_version": result.AnalyzeResult.APIVersion,
},
}
logger.WithFields(logrus.Fields{
"content_length": len(ocrResult.Text),
"page_count": len(result.AnalyzeResult.Pages),
}).Info("Successfully processed document")
return ocrResult, nil
}
func (p *AzureProvider) submitDocument(ctx context.Context, imageContent []byte) (string, error) {
requestURL := fmt.Sprintf("%s/documentintelligence/documentModels/%s:analyze?api-version=%s",
p.endpoint, p.modelID, apiVersion)
// Prepare request body
requestBody := analyzeRequest{
Base64Source: base64.StdEncoding.EncodeToString(imageContent),
}
requestBodyBytes, err := json.Marshal(requestBody)
if err != nil {
return "", fmt.Errorf("error marshaling request body: %w", err)
}
req, err := retryablehttp.NewRequestWithContext(ctx, "POST", requestURL, bytes.NewBuffer(requestBodyBytes))
if err != nil {
return "", fmt.Errorf("error creating HTTP request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
resp, err := p.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("error sending HTTP request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusAccepted {
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(body))
}
operationLocation := resp.Header.Get("Operation-Location")
if operationLocation == "" {
return "", fmt.Errorf("no Operation-Location header in response")
}
return operationLocation, nil
}
func (p *AzureProvider) pollForResults(ctx context.Context, operationLocation string) (*AzureDocumentResult, error) {
logger := log.WithField("operation_location", operationLocation)
logger.Debug("Starting to poll for results")
ticker := time.NewTicker(pollingInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return nil, fmt.Errorf("operation timed out after %v: %w", p.timeout, ctx.Err())
case <-ticker.C:
req, err := retryablehttp.NewRequestWithContext(ctx, "GET", operationLocation, nil)
if err != nil {
return nil, fmt.Errorf("error creating poll request: %w", err)
}
req.Header.Set("Ocp-Apim-Subscription-Key", p.apiKey)
resp, err := p.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("error polling for results: %w", err)
}
var result AzureDocumentResult
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
resp.Body.Close()
logger.WithError(err).Error("Failed to decode response")
return nil, fmt.Errorf("error decoding response: %w", err)
}
defer resp.Body.Close()
logger.WithFields(logrus.Fields{
"status_code": resp.StatusCode,
"content_length": len(result.AnalyzeResult.Content),
"page_count": len(result.AnalyzeResult.Pages),
"status": result.Status,
}).Debug("Poll response received")
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code %d while polling", resp.StatusCode)
}
switch result.Status {
case "succeeded":
return &result, nil
case "failed":
return nil, fmt.Errorf("document processing failed")
case "running":
// Continue polling
default:
return nil, fmt.Errorf("unexpected status: %s", result.Status)
}
}
}
}

View file

@ -1,222 +0,0 @@
package ocr
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/hashicorp/go-retryablehttp"
"github.com/stretchr/testify/assert"
)
func TestNewAzureProvider(t *testing.T) {
tests := []struct {
name string
config Config
wantErr bool
errContains string
}{
{
name: "valid config",
config: Config{
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
AzureAPIKey: "test-key",
},
wantErr: false,
},
{
name: "valid config with custom model and timeout",
config: Config{
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
AzureAPIKey: "test-key",
AzureModelID: "custom-model",
AzureTimeout: 60,
},
wantErr: false,
},
{
name: "missing endpoint",
config: Config{
AzureAPIKey: "test-key",
},
wantErr: true,
errContains: "missing required Azure Document Intelligence configuration",
},
{
name: "missing api key",
config: Config{
AzureEndpoint: "https://test.cognitiveservices.azure.com/",
},
wantErr: true,
errContains: "missing required Azure Document Intelligence configuration",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
provider, err := newAzureProvider(tt.config)
if tt.wantErr {
assert.Error(t, err)
if tt.errContains != "" {
assert.Contains(t, err.Error(), tt.errContains)
}
return
}
assert.NoError(t, err)
assert.NotNil(t, provider)
// Verify default values
if tt.config.AzureModelID == "" {
assert.Equal(t, defaultModelID, provider.modelID)
} else {
assert.Equal(t, tt.config.AzureModelID, provider.modelID)
}
if tt.config.AzureTimeout == 0 {
assert.Equal(t, time.Duration(defaultTimeout)*time.Second, provider.timeout)
} else {
assert.Equal(t, time.Duration(tt.config.AzureTimeout)*time.Second, provider.timeout)
}
})
}
}
func TestAzureProvider_ProcessImage(t *testing.T) {
// Sample success response
now := time.Now()
successResult := AzureDocumentResult{
Status: "succeeded",
CreatedDateTime: now,
LastUpdatedDateTime: now,
AnalyzeResult: AzureAnalyzeResult{
APIVersion: apiVersion,
ModelID: defaultModelID,
StringIndexType: "utf-16",
Content: "Test document content",
Pages: []AzurePage{
{
PageNumber: 1,
Angle: 0.0,
Width: 800,
Height: 600,
Unit: "pixel",
Lines: []AzureLine{
{
Content: "Test line",
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
Spans: []AzureSpan{{Offset: 0, Length: 9}},
},
},
Spans: []AzureSpan{{Offset: 0, Length: 9}},
},
},
Paragraphs: []AzureParagraph{
{
Content: "Test document content",
Spans: []AzureSpan{{Offset: 0, Length: 19}},
BoundingRegions: []AzureBoundingBox{
{
PageNumber: 1,
Polygon: []int{0, 0, 100, 0, 100, 20, 0, 20},
},
},
},
},
ContentFormat: "text",
},
}
tests := []struct {
name string
setupServer func() *httptest.Server
imageContent []byte
wantErr bool
errContains string
expectedText string
}{
{
name: "successful processing",
setupServer: func() *httptest.Server {
mux := http.NewServeMux()
server := httptest.NewServer(mux)
mux.HandleFunc("/documentintelligence/documentModels/prebuilt-read:analyze", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Operation-Location", fmt.Sprintf("%s/operations/123", server.URL))
w.WriteHeader(http.StatusAccepted)
})
mux.HandleFunc("/operations/123", func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(successResult)
})
return server
},
// Create minimal JPEG content with magic numbers
imageContent: append([]byte{0xFF, 0xD8, 0xFF, 0xE0}, []byte("JFIF test content")...),
expectedText: "Test document content",
},
{
name: "invalid mime type",
setupServer: func() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Log("Server should not be called with invalid mime type")
w.WriteHeader(http.StatusBadRequest)
}))
},
imageContent: []byte("invalid content"),
wantErr: true,
errContains: "unsupported file type",
},
{
name: "submission error",
setupServer: func() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintln(w, "Invalid request")
}))
},
imageContent: []byte{0xFF, 0xD8, 0xFF, 0xE0}, // JPEG magic numbers
wantErr: true,
errContains: "unexpected status code 400",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
server := tt.setupServer()
defer server.Close()
client := retryablehttp.NewClient()
client.HTTPClient = server.Client()
client.Logger = log
provider := &AzureProvider{
endpoint: server.URL,
apiKey: "test-key",
modelID: defaultModelID,
timeout: 5 * time.Second,
httpClient: client,
}
result, err := provider.ProcessImage(context.Background(), tt.imageContent)
if tt.wantErr {
assert.Error(t, err)
if tt.errContains != "" {
assert.Contains(t, err.Error(), tt.errContains)
}
return
}
assert.NoError(t, err)
assert.NotNil(t, result)
assert.Equal(t, tt.expectedText, result.Text)
assert.Equal(t, "azure_docai", result.Metadata["provider"])
assert.Equal(t, apiVersion, result.Metadata["api_version"])
assert.Equal(t, "1", result.Metadata["page_count"])
})
}
}

View file

@ -1,72 +0,0 @@
package ocr
import "time"
// AzureDocumentResult represents the root response from Azure Document Intelligence
type AzureDocumentResult struct {
Status string `json:"status"`
CreatedDateTime time.Time `json:"createdDateTime"`
LastUpdatedDateTime time.Time `json:"lastUpdatedDateTime"`
AnalyzeResult AzureAnalyzeResult `json:"analyzeResult"`
}
// AzureAnalyzeResult represents the analyze result part of the Azure Document Intelligence response
type AzureAnalyzeResult struct {
APIVersion string `json:"apiVersion"`
ModelID string `json:"modelId"`
StringIndexType string `json:"stringIndexType"`
Content string `json:"content"`
Pages []AzurePage `json:"pages"`
Paragraphs []AzureParagraph `json:"paragraphs"`
Styles []interface{} `json:"styles"`
ContentFormat string `json:"contentFormat"`
}
// AzurePage represents a single page in the document
type AzurePage struct {
PageNumber int `json:"pageNumber"`
Angle float64 `json:"angle"`
Width int `json:"width"`
Height int `json:"height"`
Unit string `json:"unit"`
Words []AzureWord `json:"words"`
Lines []AzureLine `json:"lines"`
Spans []AzureSpan `json:"spans"`
}
// AzureWord represents a single word with its properties
type AzureWord struct {
Content string `json:"content"`
Polygon []int `json:"polygon"`
Confidence float64 `json:"confidence"`
Span AzureSpan `json:"span"`
}
// AzureLine represents a line of text
type AzureLine struct {
Content string `json:"content"`
Polygon []int `json:"polygon"`
Spans []AzureSpan `json:"spans"`
}
// AzureSpan represents a span of text with offset and length
type AzureSpan struct {
Offset int `json:"offset"`
Length int `json:"length"`
}
// AzureParagraph represents a paragraph of text
type AzureParagraph struct {
Content string `json:"content"`
Spans []AzureSpan `json:"spans"`
BoundingRegions []AzureBoundingBox `json:"boundingRegions"`
}
// AzureBoundingBox represents the location of content on a page
type AzureBoundingBox struct {
PageNumber int `json:"pageNumber"`
Polygon []int `json:"polygon"`
}
// AzureStyle represents style information for text segments - changed to interface{} as per input
type AzureStyle interface{}

View file

@ -1,232 +0,0 @@
package ocr
import (
"context"
"fmt"
"html"
"strings"
documentai "cloud.google.com/go/documentai/apiv1"
"cloud.google.com/go/documentai/apiv1/documentaipb"
"github.com/gabriel-vasile/mimetype"
"github.com/sirupsen/logrus"
"google.golang.org/api/option"
)
// GoogleDocAIProvider implements OCR using Google Document AI
type GoogleDocAIProvider struct {
projectID string
location string
processorID string
client *documentai.DocumentProcessorClient
}
func newGoogleDocAIProvider(config Config) (*GoogleDocAIProvider, error) {
logger := log.WithFields(logrus.Fields{
"location": config.GoogleLocation,
"processor_id": config.GoogleProcessorID,
})
logger.Info("Creating new Google Document AI provider")
ctx := context.Background()
endpoint := fmt.Sprintf("%s-documentai.googleapis.com:443", config.GoogleLocation)
client, err := documentai.NewDocumentProcessorClient(ctx, option.WithEndpoint(endpoint))
if err != nil {
logger.WithError(err).Error("Failed to create Document AI client")
return nil, fmt.Errorf("error creating Document AI client: %w", err)
}
provider := &GoogleDocAIProvider{
projectID: config.GoogleProjectID,
location: config.GoogleLocation,
processorID: config.GoogleProcessorID,
client: client,
}
logger.Info("Successfully initialized Google Document AI provider")
return provider, nil
}
func (p *GoogleDocAIProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
logger := log.WithFields(logrus.Fields{
"project_id": p.projectID,
"location": p.location,
"processor_id": p.processorID,
})
logger.Debug("Starting Document AI processing")
// Detect MIME type
mtype := mimetype.Detect(imageContent)
logger.WithField("mime_type", mtype.String()).Debug("Detected file type")
if !isImageMIMEType(mtype.String()) {
logger.WithField("mime_type", mtype.String()).Error("Unsupported file type")
return nil, fmt.Errorf("unsupported file type: %s", mtype.String())
}
name := fmt.Sprintf("projects/%s/locations/%s/processors/%s", p.projectID, p.location, p.processorID)
req := &documentaipb.ProcessRequest{
Name: name,
Source: &documentaipb.ProcessRequest_RawDocument{
RawDocument: &documentaipb.RawDocument{
Content: imageContent,
MimeType: mtype.String(),
},
},
}
logger.Debug("Sending request to Document AI")
resp, err := p.client.ProcessDocument(ctx, req)
if err != nil {
logger.WithError(err).Error("Failed to process document")
return nil, fmt.Errorf("error processing document: %w", err)
}
if resp == nil || resp.Document == nil {
logger.Error("Received nil response or document from Document AI")
return nil, fmt.Errorf("received nil response or document from Document AI")
}
if resp.Document.Error != nil {
logger.WithField("error", resp.Document.Error.Message).Error("Document processing error")
return nil, fmt.Errorf("document processing error: %s", resp.Document.Error.Message)
}
metadata := map[string]string{
"provider": "google_docai",
"mime_type": mtype.String(),
"page_count": fmt.Sprintf("%d", len(resp.Document.GetPages())),
"processor_id": p.processorID,
}
// Safely add language code if available
if pages := resp.Document.GetPages(); len(pages) > 0 {
if langs := pages[0].GetDetectedLanguages(); len(langs) > 0 {
metadata["lang_code"] = langs[0].GetLanguageCode()
}
}
result := &OCRResult{
Text: resp.Document.Text,
Metadata: metadata,
}
// Add hOCR output if available
if len(resp.Document.GetPages()) > 0 {
var hocr string
func() {
defer func() {
if r := recover(); r != nil {
logger.WithField("error", r).Error("Panic during hOCR generation")
}
}()
hocr = generateHOCR(resp.Document)
}()
if hocr != "" {
result.HOCR = hocr
}
}
logger.WithField("content_length", len(result.Text)).Info("Successfully processed document")
return result, nil
}
// isImageMIMEType checks if the given MIME type is a supported image type
func isImageMIMEType(mimeType string) bool {
supportedTypes := map[string]bool{
"image/jpeg": true,
"image/jpg": true,
"image/png": true,
"image/tiff": true,
"image/bmp": true,
"application/pdf": true,
}
return supportedTypes[mimeType]
}
// generateHOCR converts Document AI response to hOCR format
func generateHOCR(doc *documentaipb.Document) string {
if len(doc.GetPages()) == 0 {
return ""
}
var hocr strings.Builder
hocr.WriteString(`<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>OCR Output</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name='ocr-system' content='google-docai' />
</head>
<body>`)
for pageNum, page := range doc.GetPages() {
pageWidth := page.GetDimension().GetWidth()
pageHeight := page.GetDimension().GetHeight()
// Validate dimensions
if pageWidth <= 0 || pageHeight <= 0 {
continue
}
hocr.WriteString(fmt.Sprintf(`
<div class='ocr_page' id='page_%d' title='image;bbox 0 0 %d %d'>`,
pageNum+1, int(pageWidth), int(pageHeight)))
// Process paragraphs
for _, para := range page.GetParagraphs() {
paraBox := para.GetLayout().GetBoundingPoly().GetNormalizedVertices()
if len(paraBox) < 4 {
continue
}
// Convert normalized coordinates to absolute
// Use float64 for intermediate calculations to prevent overflow
x1 := int(float64(paraBox[0].GetX()) * float64(pageWidth))
y1 := int(float64(paraBox[0].GetY()) * float64(pageHeight))
x2 := int(float64(paraBox[2].GetX()) * float64(pageWidth))
y2 := int(float64(paraBox[2].GetY()) * float64(pageHeight))
// Validate coordinates
if x1 < 0 || y1 < 0 || x2 < 0 || y2 < 0 ||
x1 > int(pageWidth) || y1 > int(pageHeight) ||
x2 > int(pageWidth) || y2 > int(pageHeight) {
continue
}
hocr.WriteString(fmt.Sprintf(`
<p class='ocr_par' id='par_%d_%d' title='bbox %d %d %d %d'>`,
pageNum+1, len(page.GetParagraphs()), x1, y1, x2, y2))
// Process words within paragraph
for _, token := range para.GetLayout().GetTextAnchor().GetTextSegments() {
text := doc.Text[token.GetStartIndex():token.GetEndIndex()]
if text == "" {
continue
}
// Escape HTML special characters
text = html.EscapeString(text)
hocr.WriteString(fmt.Sprintf(`
<span class='ocrx_word'>%s</span>`, text))
}
hocr.WriteString("\n </p>")
}
hocr.WriteString("\n </div>")
}
hocr.WriteString("\n</body>\n</html>")
return hocr.String()
}
// Close releases resources used by the provider
func (p *GoogleDocAIProvider) Close() error {
if p.client != nil {
return p.client.Close()
}
return nil
}

View file

@ -1,94 +0,0 @@
package ocr
import (
"regexp"
"strings"
"testing"
"cloud.google.com/go/documentai/apiv1/documentaipb"
)
func TestGenerateHOCR(t *testing.T) {
tests := []struct {
name string
doc *documentaipb.Document
expected string
}{
{
name: "empty document",
doc: &documentaipb.Document{},
expected: "",
},
{
name: "single page with one paragraph",
doc: &documentaipb.Document{
Text: "Hello World",
Pages: []*documentaipb.Document_Page{
{
Dimension: &documentaipb.Document_Page_Dimension{
Width: 800,
Height: 600,
},
Paragraphs: []*documentaipb.Document_Page_Paragraph{
{
Layout: &documentaipb.Document_Page_Layout{
BoundingPoly: &documentaipb.BoundingPoly{
NormalizedVertices: []*documentaipb.NormalizedVertex{
{X: 0.1, Y: 0.1},
{X: 0.9, Y: 0.1},
{X: 0.9, Y: 0.2},
{X: 0.1, Y: 0.2},
},
},
TextAnchor: &documentaipb.Document_TextAnchor{
TextSegments: []*documentaipb.Document_TextAnchor_TextSegment{
{
StartIndex: 0,
EndIndex: 11,
},
},
},
},
},
},
},
},
},
expected: "(?s).*<div class='ocr_page' id='page_1' title='image;bbox 0 0 800 600'>.*" +
"<p class='ocr_par' id='par_1_1' title='bbox 80 60 719 120'>.*" +
"<span class='ocrx_word'>Hello World</span>.*</p>.*</div>.*",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := generateHOCR(tt.doc)
if tt.expected == "" {
if result != "" {
t.Errorf("expected empty string, got %v", result)
}
return
}
matched, err := regexp.MatchString(tt.expected, result)
if err != nil {
t.Fatalf("error matching regex: %v", err)
}
if !matched {
t.Errorf("expected to match regex %v\ngot: %v", tt.expected, result)
}
// Verify basic hOCR structure
if !strings.Contains(result, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>") {
t.Error("missing XML declaration")
}
if !strings.Contains(result, "<html xmlns=\"http://www.w3.org/1999/xhtml\"") {
t.Error("missing HTML namespace")
}
if !strings.Contains(result, "<meta name='ocr-system' content='google-docai'") {
t.Error("missing OCR system metadata")
}
})
}
}

View file

@ -1,147 +0,0 @@
package ocr
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"image"
"os"
"strings"
_ "image/jpeg"
"github.com/sirupsen/logrus"
"github.com/tmc/langchaingo/llms"
"github.com/tmc/langchaingo/llms/ollama"
"github.com/tmc/langchaingo/llms/openai"
)
// LLMProvider implements OCR using LLM vision models
type LLMProvider struct {
provider string
model string
llm llms.Model
template string // OCR prompt template
}
func newLLMProvider(config Config) (*LLMProvider, error) {
logger := log.WithFields(logrus.Fields{
"provider": config.VisionLLMProvider,
"model": config.VisionLLMModel,
})
logger.Info("Creating new LLM OCR provider")
var model llms.Model
var err error
switch strings.ToLower(config.VisionLLMProvider) {
case "openai":
logger.Debug("Initializing OpenAI vision model")
model, err = createOpenAIClient(config)
case "ollama":
logger.Debug("Initializing Ollama vision model")
model, err = createOllamaClient(config)
default:
return nil, fmt.Errorf("unsupported vision LLM provider: %s", config.VisionLLMProvider)
}
if err != nil {
logger.WithError(err).Error("Failed to create vision LLM client")
return nil, fmt.Errorf("error creating vision LLM client: %w", err)
}
logger.Info("Successfully initialized LLM OCR provider")
return &LLMProvider{
provider: config.VisionLLMProvider,
model: config.VisionLLMModel,
llm: model,
template: defaultOCRPrompt,
}, nil
}
func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error) {
logger := log.WithFields(logrus.Fields{
"provider": p.provider,
"model": p.model,
})
logger.Debug("Starting OCR processing")
// Log the image dimensions
img, _, err := image.Decode(bytes.NewReader(imageContent))
if err != nil {
logger.WithError(err).Error("Failed to decode image")
return nil, fmt.Errorf("error decoding image: %w", err)
}
bounds := img.Bounds()
logger.WithFields(logrus.Fields{
"width": bounds.Dx(),
"height": bounds.Dy(),
}).Debug("Image dimensions")
// Prepare content parts based on provider type
var parts []llms.ContentPart
if strings.ToLower(p.provider) != "openai" {
logger.Debug("Using binary image format for non-OpenAI provider")
parts = []llms.ContentPart{
llms.BinaryPart("image/jpeg", imageContent),
llms.TextPart(p.template),
}
} else {
logger.Debug("Using base64 image format for OpenAI provider")
base64Image := base64.StdEncoding.EncodeToString(imageContent)
parts = []llms.ContentPart{
llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)),
llms.TextPart(p.template),
}
}
// Convert the image to text
logger.Debug("Sending request to vision model")
completion, err := p.llm.GenerateContent(ctx, []llms.MessageContent{
{
Parts: parts,
Role: llms.ChatMessageTypeHuman,
},
})
if err != nil {
logger.WithError(err).Error("Failed to get response from vision model")
return nil, fmt.Errorf("error getting response from LLM: %w", err)
}
result := &OCRResult{
Text: completion.Choices[0].Content,
Metadata: map[string]string{
"provider": p.provider,
"model": p.model,
},
}
logger.WithField("content_length", len(result.Text)).Info("Successfully processed image")
return result, nil
}
// createOpenAIClient creates a new OpenAI vision model client
func createOpenAIClient(config Config) (llms.Model, error) {
apiKey := os.Getenv("OPENAI_API_KEY")
if apiKey == "" {
return nil, fmt.Errorf("OpenAI API key is not set")
}
return openai.New(
openai.WithModel(config.VisionLLMModel),
openai.WithToken(apiKey),
)
}
// createOllamaClient creates a new Ollama vision model client
func createOllamaClient(config Config) (llms.Model, error) {
host := os.Getenv("OLLAMA_HOST")
if host == "" {
host = "http://127.0.0.1:11434"
}
return ollama.New(
ollama.WithModel(config.VisionLLMModel),
ollama.WithServerURL(host),
)
}
const defaultOCRPrompt = `Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format but without a code block.`

View file

@ -1,92 +0,0 @@
package ocr
import (
"context"
"fmt"
"github.com/sirupsen/logrus"
)
var log = logrus.New()
// OCRResult holds the output from OCR processing
type OCRResult struct {
// Plain text output (required)
Text string
// hOCR output (optional, if provider supports it)
HOCR string
// Additional provider-specific metadata
Metadata map[string]string
}
// Provider defines the interface for OCR processing
type Provider interface {
ProcessImage(ctx context.Context, imageContent []byte) (*OCRResult, error)
}
// Config holds the OCR provider configuration
type Config struct {
// Provider type (e.g., "llm", "google_docai", "azure")
Provider string
// Google Document AI settings
GoogleProjectID string
GoogleLocation string
GoogleProcessorID string
// LLM settings (from existing config)
VisionLLMProvider string
VisionLLMModel string
// Azure Document Intelligence settings
AzureEndpoint string
AzureAPIKey string
AzureModelID string // Optional, defaults to "prebuilt-read"
AzureTimeout int // Optional, defaults to 120 seconds
// OCR output options
EnableHOCR bool // Whether to request hOCR output if supported by the provider
}
// NewProvider creates a new OCR provider based on configuration
func NewProvider(config Config) (Provider, error) {
log.Info("Initializing OCR provider: ", config.Provider)
switch config.Provider {
case "google_docai":
if config.GoogleProjectID == "" || config.GoogleLocation == "" || config.GoogleProcessorID == "" {
return nil, fmt.Errorf("missing required Google Document AI configuration")
}
log.WithFields(logrus.Fields{
"location": config.GoogleLocation,
"processor_id": config.GoogleProcessorID,
}).Info("Using Google Document AI provider")
return newGoogleDocAIProvider(config)
case "llm":
if config.VisionLLMProvider == "" || config.VisionLLMModel == "" {
return nil, fmt.Errorf("missing required LLM configuration")
}
log.WithFields(logrus.Fields{
"provider": config.VisionLLMProvider,
"model": config.VisionLLMModel,
}).Info("Using LLM OCR provider")
return newLLMProvider(config)
case "azure":
if config.AzureEndpoint == "" || config.AzureAPIKey == "" {
return nil, fmt.Errorf("missing required Azure Document Intelligence configuration")
}
return newAzureProvider(config)
default:
return nil, fmt.Errorf("unsupported OCR provider: %s", config.Provider)
}
}
// SetLogLevel sets the logging level for the OCR package
func SetLogLevel(level logrus.Level) {
log.SetLevel(level)
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

After

Width:  |  Height:  |  Size: 42 KiB

View file

@ -1,774 +0,0 @@
package main
import (
"bytes"
"context"
"crypto/tls"
"encoding/json"
"fmt"
"image/jpeg"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"slices"
"sort"
"strings"
"sync"
"github.com/gen2brain/go-fitz"
"github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
"gorm.io/gorm"
)
// PaperlessClient struct to interact with the Paperless-NGX API
type PaperlessClient struct {
BaseURL string
APIToken string
HTTPClient *http.Client
CacheFolder string
}
func hasSameTags(original, suggested []string) bool {
if len(original) != len(suggested) {
return false
}
// Create copies to avoid modifying original slices
orig := make([]string, len(original))
sugg := make([]string, len(suggested))
copy(orig, original)
copy(sugg, suggested)
// Sort both slices
sort.Strings(orig)
sort.Strings(sugg)
// Compare elements
for i := range orig {
if orig[i] != sugg[i] {
return false
}
}
return true
}
// NewPaperlessClient creates a new instance of PaperlessClient with a default HTTP client
func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient {
cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR")
// Create a custom HTTP transport with TLS configuration
tr := &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: paperlessInsecureSkipVerify,
},
}
httpClient := &http.Client{Transport: tr}
return &PaperlessClient{
BaseURL: strings.TrimRight(baseURL, "/"),
APIToken: apiToken,
HTTPClient: httpClient,
CacheFolder: cacheFolder,
}
}
// Do method to make requests to the Paperless-NGX API
func (client *PaperlessClient) Do(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) {
url := fmt.Sprintf("%s/%s", client.BaseURL, strings.TrimLeft(path, "/"))
req, err := http.NewRequestWithContext(ctx, method, url, body)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", fmt.Sprintf("Token %s", client.APIToken))
// Set Content-Type if body is present
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
log.WithFields(logrus.Fields{
"method": method,
"url": url,
}).Debug("Making HTTP request")
resp, err := client.HTTPClient.Do(req)
if err != nil {
log.WithError(err).WithFields(logrus.Fields{
"url": url,
"method": method,
"error": err,
}).Error("HTTP request failed")
return nil, fmt.Errorf("HTTP request failed: %w", err)
}
// Check if response is HTML instead of JSON for API endpoints
if strings.HasPrefix(path, "api/") {
contentType := resp.Header.Get("Content-Type")
if strings.Contains(contentType, "text/html") {
bodyBytes, _ := io.ReadAll(resp.Body)
resp.Body.Close()
// Create a new response with the same body for the caller
resp = &http.Response{
Status: resp.Status,
StatusCode: resp.StatusCode,
Header: resp.Header,
Body: io.NopCloser(bytes.NewBuffer(bodyBytes)),
}
log.WithFields(logrus.Fields{
"url": url,
"method": method,
"content-type": contentType,
"status-code": resp.StatusCode,
"response": string(bodyBytes),
"base-url": client.BaseURL,
"request-path": path,
"full-headers": resp.Header,
}).Error("Received HTML response for API request")
return nil, fmt.Errorf("received HTML response instead of JSON (status: %d). This often indicates an SSL/TLS issue or invalid authentication. Check your PAPERLESS_URL, PAPERLESS_TOKEN and PAPERLESS_INSECURE_SKIP_VERIFY settings. Full response: %s", resp.StatusCode, string(bodyBytes))
}
}
return resp, nil
}
// GetAllTags retrieves all tags from the Paperless-NGX API
func (client *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int, error) {
tagIDMapping := make(map[string]int)
path := "api/tags/"
for path != "" {
resp, err := client.Do(ctx, "GET", path, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("error fetching tags: %d, %s", resp.StatusCode, string(bodyBytes))
}
var tagsResponse struct {
Results []struct {
ID int `json:"id"`
Name string `json:"name"`
} `json:"results"`
Next string `json:"next"`
}
err = json.NewDecoder(resp.Body).Decode(&tagsResponse)
if err != nil {
return nil, err
}
for _, tag := range tagsResponse.Results {
tagIDMapping[tag.Name] = tag.ID
}
// Extract relative path from the Next URL
if tagsResponse.Next != "" {
nextURL := tagsResponse.Next
if strings.HasPrefix(nextURL, "http") {
// Extract just the path portion from the full URL
if parsedURL, err := url.Parse(nextURL); err == nil {
path = strings.TrimPrefix(parsedURL.Path, "/")
if parsedURL.RawQuery != "" {
path += "?" + parsedURL.RawQuery
}
} else {
return nil, fmt.Errorf("failed to parse next URL: %v", err)
}
} else {
path = strings.TrimPrefix(nextURL, "/")
}
} else {
path = ""
}
}
return tagIDMapping, nil
}
// GetDocumentsByTags retrieves documents that match the specified tags
func (client *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []string, pageSize int) ([]Document, error) {
tagQueries := make([]string, len(tags))
for i, tag := range tags {
tagQueries[i] = fmt.Sprintf("tags__name__iexact=%s", tag)
}
searchQuery := strings.Join(tagQueries, "&")
path := fmt.Sprintf("api/documents/?%s&page_size=%d", urlEncode(searchQuery), pageSize)
resp, err := client.Do(ctx, "GET", path, nil)
if err != nil {
return nil, fmt.Errorf("HTTP request failed in GetDocumentsByTags: %w", err)
}
defer resp.Body.Close()
// Read the response body
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode != http.StatusOK {
log.WithFields(logrus.Fields{
"status_code": resp.StatusCode,
"path": path,
"response": string(bodyBytes),
"headers": resp.Header,
}).Error("Error response from server in GetDocumentsByTags")
return nil, fmt.Errorf("error searching documents: status=%d, body=%s", resp.StatusCode, string(bodyBytes))
}
var documentsResponse GetDocumentsApiResponse
err = json.Unmarshal(bodyBytes, &documentsResponse)
if err != nil {
log.WithFields(logrus.Fields{
"response_body": string(bodyBytes),
"error": err,
}).Error("Failed to parse JSON response in GetDocumentsByTags")
return nil, fmt.Errorf("failed to parse JSON response: %w", err)
}
allTags, err := client.GetAllTags(ctx)
if err != nil {
return nil, err
}
allCorrespondents, err := client.GetAllCorrespondents(ctx)
if err != nil {
return nil, err
}
documents := make([]Document, 0, len(documentsResponse.Results))
for _, result := range documentsResponse.Results {
tagNames := make([]string, len(result.Tags))
for i, resultTagID := range result.Tags {
for tagName, tagID := range allTags {
if resultTagID == tagID {
tagNames[i] = tagName
break
}
}
}
correspondentName := ""
if result.Correspondent != 0 {
for name, id := range allCorrespondents {
if result.Correspondent == id {
correspondentName = name
break
}
}
}
documents = append(documents, Document{
ID: result.ID,
Title: result.Title,
Content: result.Content,
Correspondent: correspondentName,
Tags: tagNames,
})
}
return documents, nil
}
// DownloadPDF downloads the PDF file of the specified document
func (client *PaperlessClient) DownloadPDF(ctx context.Context, document Document) ([]byte, error) {
path := fmt.Sprintf("api/documents/%d/download/", document.ID)
resp, err := client.Do(ctx, "GET", path, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("error downloading document %d: %d, %s", document.ID, resp.StatusCode, string(bodyBytes))
}
return io.ReadAll(resp.Body)
}
func (client *PaperlessClient) GetDocument(ctx context.Context, documentID int) (Document, error) {
path := fmt.Sprintf("api/documents/%d/", documentID)
resp, err := client.Do(ctx, "GET", path, nil)
if err != nil {
return Document{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return Document{}, fmt.Errorf("error fetching document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
}
var documentResponse GetDocumentApiResponse
err = json.NewDecoder(resp.Body).Decode(&documentResponse)
if err != nil {
return Document{}, err
}
allTags, err := client.GetAllTags(ctx)
if err != nil {
return Document{}, err
}
allCorrespondents, err := client.GetAllCorrespondents(ctx)
if err != nil {
return Document{}, err
}
// Match tag IDs to tag names
tagNames := make([]string, len(documentResponse.Tags))
for i, resultTagID := range documentResponse.Tags {
for tagName, tagID := range allTags {
if resultTagID == tagID {
tagNames[i] = tagName
break
}
}
}
// Match correspondent ID to correspondent name
correspondentName := ""
for name, id := range allCorrespondents {
if documentResponse.Correspondent == id {
correspondentName = name
break
}
}
return Document{
ID: documentResponse.ID,
Title: documentResponse.Title,
Content: documentResponse.Content,
Correspondent: correspondentName,
Tags: tagNames,
}, nil
}
// UpdateDocuments updates the specified documents with suggested changes
func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion, db *gorm.DB, isUndo bool) error {
// Fetch all available tags
availableTags, err := client.GetAllTags(ctx)
if err != nil {
log.Errorf("Error fetching available tags: %v", err)
return err
}
documentsContainSuggestedCorrespondent := false
for _, document := range documents {
if document.SuggestedCorrespondent != "" {
documentsContainSuggestedCorrespondent = true
break
}
}
availableCorrespondents := make(map[string]int)
if documentsContainSuggestedCorrespondent {
availableCorrespondents, err = client.GetAllCorrespondents(ctx)
if err != nil {
log.Errorf("Error fetching available correspondents: %v",
err)
return err
}
}
for _, document := range documents {
documentID := document.ID
// Original fields will store any updated fields to store records for
originalFields := make(map[string]interface{})
updatedFields := make(map[string]interface{})
newTags := []int{}
tags := document.SuggestedTags
originalTags := document.OriginalDocument.Tags
originalTagsJSON, err := json.Marshal(originalTags)
if err != nil {
log.Errorf("Error marshalling JSON for document %d: %v", documentID, err)
return err
}
// remove autoTag to prevent infinite loop (even if it is in the original tags)
for _, tag := range document.RemoveTags {
originalTags = removeTagFromList(originalTags, tag)
}
if len(tags) == 0 {
tags = originalTags
} else {
// We have suggested tags to change
originalFields["tags"] = originalTags
// remove autoTag to prevent infinite loop - this is required in case of undo
tags = removeTagFromList(tags, autoTag)
// remove duplicates
slices.Sort(tags)
tags = slices.Compact(tags)
}
updatedTagsJSON, err := json.Marshal(tags)
if err != nil {
log.Errorf("Error marshalling JSON for document %d: %v", documentID, err)
return err
}
// Map suggested tag names to IDs
for _, tagName := range tags {
if tagID, exists := availableTags[tagName]; exists {
// Skip the tag that we are filtering
if !isUndo && tagName == manualTag {
continue
}
newTags = append(newTags, tagID)
} else {
log.Errorf("Suggested tag '%s' does not exist in paperless-ngx, skipping.", tagName)
}
}
updatedFields["tags"] = newTags
// Map suggested correspondent names to IDs
if document.SuggestedCorrespondent != "" {
if correspondentID, exists := availableCorrespondents[document.SuggestedCorrespondent]; exists {
updatedFields["correspondent"] = correspondentID
} else {
newCorrespondent := instantiateCorrespondent(document.SuggestedCorrespondent)
newCorrespondentID, err := client.CreateOrGetCorrespondent(context.Background(), newCorrespondent)
if err != nil {
log.Errorf("Error creating/getting correspondent with name %s: %v\n", document.SuggestedCorrespondent, err)
return err
}
log.Infof("Using correspondent with name %s and ID %d\n", document.SuggestedCorrespondent, newCorrespondentID)
updatedFields["correspondent"] = newCorrespondentID
}
}
suggestedTitle := document.SuggestedTitle
if len(suggestedTitle) > 128 {
suggestedTitle = suggestedTitle[:128]
}
if suggestedTitle != "" {
originalFields["title"] = document.OriginalDocument.Title
updatedFields["title"] = suggestedTitle
} else {
log.Warnf("No valid title found for document %d, skipping.", documentID)
}
// Suggested Content
suggestedContent := document.SuggestedContent
if suggestedContent != "" {
originalFields["content"] = document.OriginalDocument.Content
updatedFields["content"] = suggestedContent
}
log.Debugf("Document %d: Original fields: %v", documentID, originalFields)
log.Debugf("Document %d: Updated fields: %v Tags: %v", documentID, updatedFields, tags)
// Marshal updated fields to JSON
jsonData, err := json.Marshal(updatedFields)
if err != nil {
log.Errorf("Error marshalling JSON for document %d: %v", documentID, err)
return err
}
// Send the update request using the generic Do method
path := fmt.Sprintf("api/documents/%d/", documentID)
resp, err := client.Do(ctx, "PATCH", path, bytes.NewBuffer(jsonData))
if err != nil {
log.Errorf("Error updating document %d: %v", documentID, err)
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
log.Errorf("Error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
return fmt.Errorf("error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
} else {
for field, value := range originalFields {
log.Printf("Document %d: Updated %s from %v to %v", documentID, field, originalFields[field], value)
// Insert the modification record into the database
var modificationRecord ModificationHistory
if field == "tags" {
// Make sure we only store changes where tags are changed - not the same before and after
// And we have to use tags, not updatedFields as they are IDs not fields
if !hasSameTags(document.OriginalDocument.Tags, tags) {
modificationRecord = ModificationHistory{
DocumentID: uint(documentID),
ModField: field,
PreviousValue: string(originalTagsJSON),
NewValue: string(updatedTagsJSON),
}
}
} else {
// Only store mod if field actually changed
if originalFields[field] != updatedFields[field] {
modificationRecord = ModificationHistory{
DocumentID: uint(documentID),
ModField: field,
PreviousValue: fmt.Sprintf("%v", originalFields[field]),
NewValue: fmt.Sprintf("%v", updatedFields[field]),
}
}
}
// Only store if we have a valid modification record
if (modificationRecord != ModificationHistory{}) {
err = InsertModification(db, &modificationRecord)
}
if err != nil {
log.Errorf("Error inserting modification record for document %d: %v", documentID, err)
return err
}
}
}
log.Printf("Document %d updated successfully.", documentID)
}
return nil
}
// DownloadDocumentAsImages downloads the PDF file of the specified document and converts it to images
// If limitPages > 0, only the first N pages will be processed
func (client *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, documentId int, limitPages int) ([]string, error) {
// Create a directory named after the document ID
docDir := filepath.Join(client.GetCacheFolder(), fmt.Sprintf("document-%d", documentId))
if _, err := os.Stat(docDir); os.IsNotExist(err) {
err = os.MkdirAll(docDir, 0755)
if err != nil {
return nil, err
}
}
// Check if images already exist
var imagePaths []string
for n := 0; ; n++ {
if limitPages > 0 && n >= limitPages {
break
}
imagePath := filepath.Join(docDir, fmt.Sprintf("page%03d.jpg", n))
if _, err := os.Stat(imagePath); os.IsNotExist(err) {
break
}
imagePaths = append(imagePaths, imagePath)
}
// If images exist, return them
if len(imagePaths) > 0 {
return imagePaths, nil
}
// Proceed with downloading and converting the document to images
path := fmt.Sprintf("api/documents/%d/download/", documentId)
resp, err := client.Do(ctx, "GET", path, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("error downloading document %d: %d, %s", documentId, resp.StatusCode, string(bodyBytes))
}
pdfData, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
tmpFile, err := os.CreateTemp("", "document-*.pdf")
if err != nil {
return nil, err
}
defer os.Remove(tmpFile.Name())
_, err = tmpFile.Write(pdfData)
if err != nil {
return nil, err
}
tmpFile.Close()
doc, err := fitz.New(tmpFile.Name())
if err != nil {
return nil, err
}
defer doc.Close()
totalPages := doc.NumPage()
if limitPages > 0 && limitPages < totalPages {
totalPages = limitPages
}
var mu sync.Mutex
var g errgroup.Group
for n := 0; n < totalPages; n++ {
n := n // capture loop variable
g.Go(func() error {
mu.Lock()
// I assume the libmupdf library is not thread-safe
img, err := doc.Image(n)
mu.Unlock()
if err != nil {
return err
}
imagePath := filepath.Join(docDir, fmt.Sprintf("page%03d.jpg", n))
f, err := os.Create(imagePath)
if err != nil {
return err
}
err = jpeg.Encode(f, img, &jpeg.Options{Quality: jpeg.DefaultQuality})
if err != nil {
f.Close()
return err
}
f.Close()
// Verify the JPEG file
file, err := os.Open(imagePath)
if err != nil {
return err
}
defer file.Close()
_, err = jpeg.Decode(file)
if err != nil {
return fmt.Errorf("invalid JPEG file: %s", imagePath)
}
mu.Lock()
imagePaths = append(imagePaths, imagePath)
mu.Unlock()
return nil
})
}
if err := g.Wait(); err != nil {
return nil, err
}
// sort the image paths to ensure they are in order
slices.Sort(imagePaths)
return imagePaths, nil
}
// GetCacheFolder returns the cache folder for the PaperlessClient
func (client *PaperlessClient) GetCacheFolder() string {
if client.CacheFolder == "" {
client.CacheFolder = filepath.Join(os.TempDir(), "paperless-gpt")
}
return client.CacheFolder
}
// urlEncode encodes a string for safe URL usage
func urlEncode(s string) string {
return strings.ReplaceAll(s, " ", "+")
}
// instantiateCorrespondent creates a new Correspondent object with default values
func instantiateCorrespondent(name string) Correspondent {
return Correspondent{
Name: name,
MatchingAlgorithm: 0,
Match: "",
IsInsensitive: true,
Owner: nil,
}
}
// CreateOrGetCorrespondent creates a new correspondent or returns existing one if name already exists
func (client *PaperlessClient) CreateOrGetCorrespondent(ctx context.Context, correspondent Correspondent) (int, error) {
// First try to find existing correspondent
correspondents, err := client.GetAllCorrespondents(ctx)
if err != nil {
return 0, fmt.Errorf("error fetching correspondents: %w", err)
}
// Check if correspondent already exists
if id, exists := correspondents[correspondent.Name]; exists {
log.Infof("Using existing correspondent with name %s and ID %d", correspondent.Name, id)
return id, nil
}
// If not found, create new correspondent
url := "api/correspondents/"
jsonData, err := json.Marshal(correspondent)
if err != nil {
return 0, err
}
resp, err := client.Do(ctx, "POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return 0, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusCreated {
bodyBytes, _ := io.ReadAll(resp.Body)
return 0, fmt.Errorf("error creating correspondent: %d, %s", resp.StatusCode, string(bodyBytes))
}
var createdCorrespondent struct {
ID int `json:"id"`
}
err = json.NewDecoder(resp.Body).Decode(&createdCorrespondent)
if err != nil {
return 0, err
}
return createdCorrespondent.ID, nil
}
// CorrespondentResponse represents the response structure for correspondents
type CorrespondentResponse struct {
Results []struct {
ID int `json:"id"`
Name string `json:"name"`
} `json:"results"`
}
// GetAllCorrespondents retrieves all correspondents from the Paperless-NGX API
func (client *PaperlessClient) GetAllCorrespondents(ctx context.Context) (map[string]int, error) {
correspondentIDMapping := make(map[string]int)
path := "api/correspondents/?page_size=9999"
resp, err := client.Do(ctx, "GET", path, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("error fetching correspondents: %d, %s", resp.StatusCode, string(bodyBytes))
}
var correspondentsResponse CorrespondentResponse
err = json.NewDecoder(resp.Body).Decode(&correspondentsResponse)
if err != nil {
return nil, err
}
for _, correspondent := range correspondentsResponse.Results {
correspondentIDMapping[correspondent.Name] = correspondent.ID
}
return correspondentIDMapping, nil
}

View file

@ -1,428 +0,0 @@
package main
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
)
// Helper struct to hold common test data and methods
type testEnv struct {
t *testing.T
server *httptest.Server
client *PaperlessClient
requestCount int
mockResponses map[string]http.HandlerFunc
db *gorm.DB
}
// newTestEnv initializes a new test environment
func newTestEnv(t *testing.T) *testEnv {
env := &testEnv{
t: t,
mockResponses: make(map[string]http.HandlerFunc),
}
// Initialize test database
db, err := InitializeTestDB()
require.NoError(t, err)
env.db = db
// Create a mock server with a handler that dispatches based on URL path
env.server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
env.requestCount++
handler, exists := env.mockResponses[r.URL.Path]
if !exists {
t.Fatalf("Unexpected request URL: %s", r.URL.Path)
}
// Set common headers and invoke the handler
assert.Equal(t, "Token test-token", r.Header.Get("Authorization"))
handler(w, r)
}))
// Initialize the PaperlessClient with the mock server URL
env.client = NewPaperlessClient(env.server.URL, "test-token")
env.client.HTTPClient = env.server.Client()
// Add mock response for /api/correspondents/
env.setMockResponse("/api/correspondents/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"results": [{"id": 1, "name": "Alpha"}, {"id": 2, "name": "Beta"}]}`))
})
return env
}
func InitializeTestDB() (*gorm.DB, error) {
// Use in-memory SQLite for testing
db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared"), &gorm.Config{})
if err != nil {
return nil, err
}
// Migrate schema
err = db.AutoMigrate(&ModificationHistory{})
if err != nil {
return nil, err
}
return db, nil
}
// teardown closes the mock server
func (env *testEnv) teardown() {
env.server.Close()
}
// Helper method to set a mock response for a specific path
func (env *testEnv) setMockResponse(path string, handler http.HandlerFunc) {
env.mockResponses[path] = handler
}
// TestNewPaperlessClient tests the creation of a new PaperlessClient instance
func TestNewPaperlessClient(t *testing.T) {
baseURL := "http://example.com"
apiToken := "test-token"
client := NewPaperlessClient(baseURL, apiToken)
assert.Equal(t, "http://example.com", client.BaseURL)
assert.Equal(t, apiToken, client.APIToken)
assert.NotNil(t, client.HTTPClient)
}
// TestDo tests the Do method of PaperlessClient
func TestDo(t *testing.T) {
env := newTestEnv(t)
defer env.teardown()
// Set mock response for "/test-path"
env.setMockResponse("/test-path", func(w http.ResponseWriter, r *http.Request) {
// Verify the request method
assert.Equal(t, "GET", r.Method)
// Send a mock response
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"message": "success"}`))
})
ctx := context.Background()
resp, err := env.client.Do(ctx, "GET", "/test-path", nil)
require.NoError(t, err)
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
assert.Equal(t, `{"message": "success"}`, string(body))
}
// TestGetAllTags tests the GetAllTags method, including pagination
func TestGetAllTags(t *testing.T) {
env := newTestEnv(t)
defer env.teardown()
// Mock data for paginated responses
page1 := map[string]interface{}{
"results": []map[string]interface{}{
{"id": 1, "name": "tag1"},
{"id": 2, "name": "tag2"},
},
"next": fmt.Sprintf("%s/api/tags/?page=2", env.server.URL),
}
page2 := map[string]interface{}{
"results": []map[string]interface{}{
{"id": 3, "name": "tag3"},
},
"next": nil,
}
// Set mock responses for pagination
env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
query := r.URL.Query().Get("page")
if query == "2" {
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(page2)
} else {
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(page1)
}
})
ctx := context.Background()
tags, err := env.client.GetAllTags(ctx)
require.NoError(t, err)
expectedTags := map[string]int{
"tag1": 1,
"tag2": 2,
"tag3": 3,
}
assert.Equal(t, expectedTags, tags)
}
// TestGetDocumentsByTags tests the GetDocumentsByTags method
func TestGetDocumentsByTags(t *testing.T) {
env := newTestEnv(t)
defer env.teardown()
// Mock data for documents
documentsResponse := GetDocumentsApiResponse{
Results: []GetDocumentApiResponseResult{
{
ID: 1,
Title: "Document 1",
Content: "Content 1",
Tags: []int{1, 2},
Correspondent: 1,
},
{
ID: 2,
Title: "Document 2",
Content: "Content 2",
Tags: []int{2, 3},
Correspondent: 2,
},
},
}
// Mock data for tags
tagsResponse := map[string]interface{}{
"results": []map[string]interface{}{
{"id": 1, "name": "tag1"},
{"id": 2, "name": "tag2"},
{"id": 3, "name": "tag3"},
},
"next": nil,
}
// Set mock responses
env.setMockResponse("/api/documents/", func(w http.ResponseWriter, r *http.Request) {
// Verify query parameters
expectedQuery := "tags__name__iexact=tag1&tags__name__iexact=tag2&page_size=25"
assert.Equal(t, expectedQuery, r.URL.RawQuery)
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(documentsResponse)
})
env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(tagsResponse)
})
ctx := context.Background()
tags := []string{"tag1", "tag2"}
documents, err := env.client.GetDocumentsByTags(ctx, tags, 25)
require.NoError(t, err)
expectedDocuments := []Document{
{
ID: 1,
Title: "Document 1",
Content: "Content 1",
Tags: []string{"tag1", "tag2"},
Correspondent: "Alpha",
},
{
ID: 2,
Title: "Document 2",
Content: "Content 2",
Tags: []string{"tag2", "tag3"},
Correspondent: "Beta",
},
}
assert.Equal(t, expectedDocuments, documents)
}
// TestDownloadPDF tests the DownloadPDF method
func TestDownloadPDF(t *testing.T) {
env := newTestEnv(t)
defer env.teardown()
document := Document{
ID: 123,
}
// Get sample PDF from tests/pdf/sample.pdf
pdfFile := "tests/pdf/sample.pdf"
pdfContent, err := os.ReadFile(pdfFile)
require.NoError(t, err)
// Set mock response
downloadPath := fmt.Sprintf("/api/documents/%d/download/", document.ID)
env.setMockResponse(downloadPath, func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write(pdfContent)
})
ctx := context.Background()
data, err := env.client.DownloadPDF(ctx, document)
require.NoError(t, err)
assert.Equal(t, pdfContent, data)
}
// TestUpdateDocuments tests the UpdateDocuments method
func TestUpdateDocuments(t *testing.T) {
env := newTestEnv(t)
defer env.teardown()
// Mock data for documents to update
documents := []DocumentSuggestion{
{
ID: 1,
OriginalDocument: Document{
ID: 1,
Title: "Old Title",
Tags: []string{"tag1", "tag3", "manual", "removeMe"},
},
SuggestedTitle: "New Title",
SuggestedTags: []string{"tag2", "tag3"},
RemoveTags: []string{"removeMe"},
},
}
idTag1 := 1
idTag2 := 2
idTag3 := 4
// Mock data for tags
tagsResponse := map[string]interface{}{
"results": []map[string]interface{}{
{"id": idTag1, "name": "tag1"},
{"id": idTag2, "name": "tag2"},
{"id": 3, "name": "manual"},
{"id": idTag3, "name": "tag3"},
{"id": 5, "name": "removeMe"},
},
"next": nil,
}
// Set the manual tag
manualTag = "manual"
// Set mock responses
env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(tagsResponse)
})
updatePath := fmt.Sprintf("/api/documents/%d/", documents[0].ID)
env.setMockResponse(updatePath, func(w http.ResponseWriter, r *http.Request) {
// Verify the request method
assert.Equal(t, "PATCH", r.Method)
// Read and parse the request body
bodyBytes, err := io.ReadAll(r.Body)
require.NoError(t, err)
defer r.Body.Close()
var updatedFields map[string]interface{}
err = json.Unmarshal(bodyBytes, &updatedFields)
require.NoError(t, err)
// Expected updated fields
expectedFields := map[string]interface{}{
"title": "New Title",
// do not keep previous tags since the tag generation will already take care to include old ones:
"tags": []interface{}{float64(idTag2), float64(idTag3)},
}
assert.Equal(t, expectedFields, updatedFields)
w.WriteHeader(http.StatusOK)
})
ctx := context.Background()
err := env.client.UpdateDocuments(ctx, documents, env.db, false)
require.NoError(t, err)
}
// TestUrlEncode tests the urlEncode function
func TestUrlEncode(t *testing.T) {
input := "tag:tag1 tag:tag2"
expected := "tag:tag1+tag:tag2"
result := urlEncode(input)
assert.Equal(t, expected, result)
}
// TestDownloadDocumentAsImages tests the DownloadDocumentAsImages method
func TestDownloadDocumentAsImages(t *testing.T) {
env := newTestEnv(t)
defer env.teardown()
document := Document{
ID: 123,
}
// Get sample PDF from tests/pdf/sample.pdf
pdfFile := "tests/pdf/sample.pdf"
pdfContent, err := os.ReadFile(pdfFile)
require.NoError(t, err)
// Set mock response
downloadPath := fmt.Sprintf("/api/documents/%d/download/", document.ID)
env.setMockResponse(downloadPath, func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write(pdfContent)
})
ctx := context.Background()
imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID, 0)
require.NoError(t, err)
// Verify that exatly one page was extracted
assert.Len(t, imagePaths, 1)
// The path shall end with paperless-gpt/document-123/page000.jpg
assert.Contains(t, imagePaths[0], "paperless-gpt/document-123/page000.jpg")
for _, imagePath := range imagePaths {
_, err := os.Stat(imagePath)
assert.NoError(t, err)
}
}
func TestDownloadDocumentAsImages_ManyPages(t *testing.T) {
env := newTestEnv(t)
defer env.teardown()
document := Document{
ID: 321,
}
// Get sample PDF from tests/pdf/many-pages.pdf
pdfFile := "tests/pdf/many-pages.pdf"
pdfContent, err := os.ReadFile(pdfFile)
require.NoError(t, err)
// Set mock response
downloadPath := fmt.Sprintf("/api/documents/%d/download/", document.ID)
env.setMockResponse(downloadPath, func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write(pdfContent)
})
ctx := context.Background()
env.client.CacheFolder = "tests/tmp"
// Clean the cache folder
os.RemoveAll(env.client.CacheFolder)
imagePaths, err := env.client.DownloadDocumentAsImages(ctx, document.ID, 50)
require.NoError(t, err)
// Verify that exatly 50 pages were extracted - the original doc contains 52 pages
assert.Len(t, imagePaths, 50)
// The path shall end with tests/tmp/document-321/page000.jpg
for _, imagePath := range imagePaths {
_, err := os.Stat(imagePath)
assert.NoError(t, err)
assert.Contains(t, imagePath, "tests/tmp/document-321/page")
}
}

View file

@ -1,20 +0,0 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
"config:recommended"
],
"customManagers": [
{
"customType": "regex",
"description": "Update VERSION variables in Dockerfiles",
"fileMatch": [
"^Dockerfile$"
],
"matchStrings": [
"# renovate: datasource=(?<datasource>[a-z-]+?) depName=(?<depName>.+?)(?: versioning=(?<versioning>[a-z-]+?))?\\s(?:ENV|ARG) .+?_VERSION=\"(?<currentValue>.+?)\"\\s",
"# renovate: datasource=(?<datasource>[a-z-]+?) depName=(?<depName>.+?)(?: versioning=(?<versioning>[a-z-]+?))?\\s(?:ENV|ARG) VERSION=\"(?<currentValue>.+?)\"\\s"
],
"versioningTemplate": "{{#if versioning}}{{versioning}}{{else}}semver{{/if}}"
}
]
}

Binary file not shown.

Binary file not shown.

View file

@ -1,99 +0,0 @@
package main
import (
"bytes"
"fmt"
"text/template"
"github.com/tmc/langchaingo/llms"
)
// getAvailableTokensForContent calculates how many tokens are available for content
// by rendering the template with empty content and counting tokens
func getAvailableTokensForContent(tmpl *template.Template, data map[string]interface{}) (int, error) {
if tokenLimit <= 0 {
return -1, nil // No limit when disabled
}
// Create a copy of data and set "Content" to empty
templateData := make(map[string]interface{})
for k, v := range data {
templateData[k] = v
}
templateData["Content"] = ""
// Execute template with empty content
var promptBuffer bytes.Buffer
if err := tmpl.Execute(&promptBuffer, templateData); err != nil {
return 0, fmt.Errorf("error executing template: %v", err)
}
// Count tokens in prompt template
promptTokens, err := getTokenCount(promptBuffer.String())
if err != nil {
return 0, fmt.Errorf("error counting tokens in prompt: %v", err)
}
log.Debugf("Prompt template uses %d tokens", promptTokens)
// Add safety margin for prompt tokens
promptTokens += 10
// Calculate available tokens for content
availableTokens := tokenLimit - promptTokens
if availableTokens < 0 {
return 0, fmt.Errorf("prompt template exceeds token limit")
}
return availableTokens, nil
}
func getTokenCount(content string) (int, error) {
return llms.CountTokens(llmModel, content), nil
}
// truncateContentByTokens truncates the content so that its token count does not exceed availableTokens.
// This implementation uses a binary search on runes to find the longest prefix whose token count is within the limit.
// If availableTokens is 0 or negative, the original content is returned.
func truncateContentByTokens(content string, availableTokens int) (string, error) {
if availableTokens < 0 || tokenLimit <= 0 {
return content, nil
}
totalTokens, err := getTokenCount(content)
if err != nil {
return "", fmt.Errorf("error counting tokens: %v", err)
}
if totalTokens <= availableTokens {
return content, nil
}
// Convert content to runes for safe slicing.
runes := []rune(content)
low := 0
high := len(runes)
validCut := 0
for low <= high {
mid := (low + high) / 2
substr := string(runes[:mid])
count, err := getTokenCount(substr)
if err != nil {
return "", fmt.Errorf("error counting tokens in substring: %v", err)
}
if count <= availableTokens {
validCut = mid
low = mid + 1
} else {
high = mid - 1
}
}
truncated := string(runes[:validCut])
// Final verification
finalTokens, err := getTokenCount(truncated)
if err != nil {
return "", fmt.Errorf("error counting tokens in final truncated content: %v", err)
}
if finalTokens > availableTokens {
return "", fmt.Errorf("truncated content still exceeds the available token limit")
}
return truncated, nil
}

View file

@ -1,302 +0,0 @@
package main
import (
"bytes"
"fmt"
"os"
"strconv"
"testing"
"text/template"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/tmc/langchaingo/textsplitter"
)
// resetTokenLimit parses TOKEN_LIMIT from environment and sets the tokenLimit variable
func resetTokenLimit() {
// Reset tokenLimit
tokenLimit = 0
// Parse from environment
if limit := os.Getenv("TOKEN_LIMIT"); limit != "" {
if parsed, err := strconv.Atoi(limit); err == nil {
tokenLimit = parsed
}
}
}
func TestTokenLimit(t *testing.T) {
// Save current env and restore after test
originalLimit := os.Getenv("TOKEN_LIMIT")
defer os.Setenv("TOKEN_LIMIT", originalLimit)
tests := []struct {
name string
envValue string
wantLimit int
}{
{
name: "empty value",
envValue: "",
wantLimit: 0,
},
{
name: "zero value",
envValue: "0",
wantLimit: 0,
},
{
name: "positive value",
envValue: "1000",
wantLimit: 1000,
},
{
name: "invalid value",
envValue: "not-a-number",
wantLimit: 0,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// Set environment variable
os.Setenv("TOKEN_LIMIT", tc.envValue)
// Set tokenLimit based on environment
resetTokenLimit()
assert.Equal(t, tc.wantLimit, tokenLimit)
})
}
}
func TestGetAvailableTokensForContent(t *testing.T) {
// Save current env and restore after test
originalLimit := os.Getenv("TOKEN_LIMIT")
defer os.Setenv("TOKEN_LIMIT", originalLimit)
// Test template
tmpl := template.Must(template.New("test").Parse("Template with {{.Var1}} and {{.Content}}"))
tests := []struct {
name string
limit int
data map[string]interface{}
wantCount int
wantErr bool
}{
{
name: "disabled token limit",
limit: 0,
data: map[string]interface{}{"Var1": "test"},
wantCount: -1,
wantErr: false,
},
{
name: "template exceeds limit",
limit: 2,
data: map[string]interface{}{
"Var1": "test",
},
wantCount: 0,
wantErr: true,
},
{
name: "available tokens calculation",
limit: 100,
data: map[string]interface{}{
"Var1": "test",
},
wantCount: 85,
wantErr: false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// Set token limit
os.Setenv("TOKEN_LIMIT", fmt.Sprintf("%d", tc.limit))
// Set tokenLimit based on environment
resetTokenLimit()
count, err := getAvailableTokensForContent(tmpl, tc.data)
if tc.wantErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, tc.wantCount, count)
}
})
}
}
func TestTruncateContentByTokens(t *testing.T) {
// Save current env and restore after test
originalLimit := os.Getenv("TOKEN_LIMIT")
defer os.Setenv("TOKEN_LIMIT", originalLimit)
// Set a token limit for testing
os.Setenv("TOKEN_LIMIT", "100")
// Set tokenLimit based on environment
resetTokenLimit()
tests := []struct {
name string
content string
availableTokens int
wantTruncated bool
wantErr bool
}{
{
name: "no truncation needed",
content: "short content",
availableTokens: 20,
wantTruncated: false,
wantErr: false,
},
{
name: "disabled by token limit",
content: "any content",
availableTokens: -1,
wantTruncated: false,
wantErr: false,
},
{
name: "truncation needed",
content: "This is a much longer content that will definitely need to be truncated because it exceeds the available tokens",
availableTokens: 10,
wantTruncated: true,
wantErr: false,
},
{
name: "empty content",
content: "",
availableTokens: 10,
wantTruncated: false,
wantErr: false,
},
{
name: "exact token count",
content: "one two three four five",
availableTokens: 5,
wantTruncated: false,
wantErr: false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
result, err := truncateContentByTokens(tc.content, tc.availableTokens)
if tc.wantErr {
require.Error(t, err)
return
}
require.NoError(t, err)
if tc.wantTruncated {
assert.True(t, len(result) < len(tc.content), "Content should be truncated")
} else {
assert.Equal(t, tc.content, result, "Content should not be truncated")
}
})
}
}
func TestTokenLimitIntegration(t *testing.T) {
// Save current env and restore after test
originalLimit := os.Getenv("TOKEN_LIMIT")
defer os.Setenv("TOKEN_LIMIT", originalLimit)
// Create a test template
tmpl := template.Must(template.New("test").Parse(`
Template with variables:
Language: {{.Language}}
Title: {{.Title}}
Content: {{.Content}}
`))
// Test data
data := map[string]interface{}{
"Language": "English",
"Title": "Test Document",
}
// Test with different token limits
tests := []struct {
name string
limit int
content string
wantSize int
wantError bool
}{
{
name: "no limit",
limit: 0,
content: "original content",
wantSize: len("original content"),
wantError: false,
},
{
name: "sufficient limit",
limit: 1000,
content: "original content",
wantSize: len("original content"),
wantError: false,
},
{
name: "tight limit",
limit: 50,
content: "This is a long content that should be truncated to fit within the token limit",
wantSize: 50,
wantError: false,
},
{
name: "very small limit",
limit: 3,
content: "Content too large for small limit",
wantError: true,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// Set token limit
os.Setenv("TOKEN_LIMIT", fmt.Sprintf("%d", tc.limit))
// Set tokenLimit based on environment
resetTokenLimit()
// First get available tokens
availableTokens, err := getAvailableTokensForContent(tmpl, data)
if tc.wantError {
require.Error(t, err)
return
}
require.NoError(t, err)
// Then truncate content
truncated, err := truncateContentByTokens(tc.content, availableTokens)
require.NoError(t, err)
// Finally execute template with truncated content
data["Content"] = truncated
var result string
{
var buf bytes.Buffer
err = tmpl.Execute(&buf, data)
require.NoError(t, err)
result = buf.String()
}
// Verify final size is within limit if limit is enabled
if tc.limit > 0 {
splitter := textsplitter.NewTokenSplitter()
tokens, err := splitter.SplitText(result)
require.NoError(t, err)
assert.LessOrEqual(t, len(tokens), tc.limit)
}
})
}
}

135
types.go
View file

@ -1,108 +1,61 @@
package main
// GetDocumentsApiResponse is the response payload for /documents endpoint.
// But we are only interested in a subset of the fields.
import (
"time"
)
type GetDocumentsApiResponse struct {
Count int `json:"count"`
// Next interface{} `json:"next"`
// Previous interface{} `json:"previous"`
All []int `json:"all"`
Results []GetDocumentApiResponseResult `json:"results"`
}
// GetDocumentApiResponseResult is a part of the response payload for /documents endpoint.
// But we are only interested in a subset of the fields.
type GetDocumentApiResponseResult struct {
ID int `json:"id"`
Correspondent int `json:"correspondent"`
// DocumentType interface{} `json:"document_type"`
// StoragePath interface{} `json:"storage_path"`
Title string `json:"title"`
Content string `json:"content"`
Tags []int `json:"tags"`
// Created time.Time `json:"created"`
// CreatedDate string `json:"created_date"`
// Modified time.Time `json:"modified"`
// Added time.Time `json:"added"`
// ArchiveSerialNumber interface{} `json:"archive_serial_number"`
// OriginalFileName string `json:"original_file_name"`
// ArchivedFileName string `json:"archived_file_name"`
// Owner int `json:"owner"`
// UserCanChange bool `json:"user_can_change"`
Notes []interface{} `json:"notes"`
// SearchHit struct {
// Score float64 `json:"score"`
// Highlights string `json:"highlights"`
// NoteHighlights string `json:"note_highlights"`
// Rank int `json:"rank"`
// } `json:"__search_hit__"`
}
// GetDocumentApiResponse is the response payload for /documents/{id} endpoint.
// But we are only interested in a subset of the fields.
type GetDocumentApiResponse struct {
ID int `json:"id"`
Correspondent int `json:"correspondent"`
// DocumentType interface{} `json:"document_type"`
// StoragePath interface{} `json:"storage_path"`
Title string `json:"title"`
Content string `json:"content"`
Tags []int `json:"tags"`
// Created time.Time `json:"created"`
// CreatedDate string `json:"created_date"`
// Modified time.Time `json:"modified"`
// Added time.Time `json:"added"`
// ArchiveSerialNumber interface{} `json:"archive_serial_number"`
// OriginalFileName string `json:"original_file_name"`
// ArchivedFileName string `json:"archived_file_name"`
// Owner int `json:"owner"`
// UserCanChange bool `json:"user_can_change"`
Notes []interface{} `json:"notes"`
Count int `json:"count"`
Next interface{} `json:"next"`
Previous interface{} `json:"previous"`
All []int `json:"all"`
Results []struct {
ID int `json:"id"`
Correspondent interface{} `json:"correspondent"`
DocumentType interface{} `json:"document_type"`
StoragePath interface{} `json:"storage_path"`
Title string `json:"title"`
Content string `json:"content"`
Tags []int `json:"tags"`
Created time.Time `json:"created"`
CreatedDate string `json:"created_date"`
Modified time.Time `json:"modified"`
Added time.Time `json:"added"`
ArchiveSerialNumber interface{} `json:"archive_serial_number"`
OriginalFileName string `json:"original_file_name"`
ArchivedFileName string `json:"archived_file_name"`
Owner int `json:"owner"`
UserCanChange bool `json:"user_can_change"`
Notes []interface{} `json:"notes"`
SearchHit struct {
Score float64 `json:"score"`
Highlights string `json:"highlights"`
NoteHighlights string `json:"note_highlights"`
Rank int `json:"rank"`
} `json:"__search_hit__"`
} `json:"results"`
}
// Document is a stripped down version of the document object from paperless-ngx.
// Response payload for /documents endpoint and part of request payload for /generate-suggestions endpoint
type Document struct {
ID int `json:"id"`
Title string `json:"title"`
Content string `json:"content"`
Tags []string `json:"tags"`
Correspondent string `json:"correspondent"`
ID int `json:"id"`
Title string `json:"title"`
Content string `json:"content"`
Tags []string `json:"tags"`
}
// GenerateSuggestionsRequest is the request payload for generating suggestions for /generate-suggestions endpoint
type GenerateSuggestionsRequest struct {
Documents []Document `json:"documents"`
GenerateTitles bool `json:"generate_titles,omitempty"`
GenerateTags bool `json:"generate_tags,omitempty"`
GenerateCorrespondents bool `json:"generate_correspondents,omitempty"`
Documents []Document `json:"documents"`
GenerateTitles bool `json:"generate_titles,omitempty"`
GenerateTags bool `json:"generate_tags,omitempty"`
}
// DocumentSuggestion is the response payload for /generate-suggestions endpoint and the request payload for /update-documents endpoint (as an array)
type DocumentSuggestion struct {
ID int `json:"id"`
OriginalDocument Document `json:"original_document"`
SuggestedTitle string `json:"suggested_title,omitempty"`
SuggestedTags []string `json:"suggested_tags,omitempty"`
SuggestedContent string `json:"suggested_content,omitempty"`
SuggestedCorrespondent string `json:"suggested_correspondent,omitempty"`
RemoveTags []string `json:"remove_tags,omitempty"`
}
type Correspondent struct {
Name string `json:"name"`
MatchingAlgorithm int `json:"matching_algorithm"`
Match string `json:"match"`
IsInsensitive bool `json:"is_insensitive"`
Owner *int `json:"owner"`
SetPermissions struct {
View struct {
Users []int `json:"users"`
Groups []int `json:"groups"`
} `json:"view"`
Change struct {
Users []int `json:"users"`
Groups []int `json:"groups"`
} `json:"change"`
} `json:"set_permissions"`
ID int `json:"id"`
OriginalDocument Document `json:"original_document"`
SuggestedTitle string `json:"suggested_title,omitempty"`
SuggestedTags []string `json:"suggested_tags,omitempty"`
}

View file

@ -1,7 +0,0 @@
package main
var (
version = "devVersion"
buildDate = "devBuildDate"
commit = "devCommit"
)

View file

@ -1,8 +0,0 @@
# Environment variables for E2E tests
PAPERLESS_NGX_URL=http://localhost:8000
PAPERLESS_GPT_URL=http://localhost:8080
PAPERLESS_ADMIN_USER=admin
PAPERLESS_ADMIN_PASSWORD=admin
# Add your OpenAI API key here (required for document processing)
# OPENAI_API_KEY=sk-uhB.... # Replace with your actual OpenAI API key from https://platform.openai.com/api-keys

0
web-app/dist/.keep vendored
View file

View file

@ -1,57 +0,0 @@
version: '3.8'
services:
paperless-ngx:
image: ghcr.io/paperless-ngx/paperless-ngx:latest
environment:
PAPERLESS_URL: http://localhost:8001
PAPERLESS_SECRET_KEY: change-me
PAPERLESS_ADMIN_USER: admin
PAPERLESS_ADMIN_PASSWORD: admin
PAPERLESS_TIME_ZONE: Europe/Berlin
PAPERLESS_OCR_LANGUAGE: eng
PAPERLESS_REDIS: redis://redis:6379
ports:
- "8001:8000"
volumes:
- paperless-data:/usr/src/paperless/data
- paperless-media:/usr/src/paperless/media
- paperless-export:/usr/src/paperless/export
depends_on:
- redis
- postgres
redis:
image: redis:7
restart: unless-stopped
postgres:
image: postgres:17
restart: unless-stopped
environment:
POSTGRES_DB: paperless
POSTGRES_USER: paperless
POSTGRES_PASSWORD: paperless
volumes:
- pgdata:/var/lib/postgresql/data
paperless-gpt:
build:
context: ..
dockerfile: Dockerfile
image: icereed/paperless-gpt:e2e
environment:
PAPERLESS_URL: http://paperless-ngx:8000
PAPERLESS_ADMIN_USER: admin
PAPERLESS_ADMIN_PASSWORD: admin
OPENAI_API_KEY: ${OPENAI_API_KEY}
ports:
- "8080:8080"
depends_on:
- paperless-ngx
volumes:
paperless-data:
paperless-media:
paperless-export:
pgdata:

View file

@ -1,135 +0,0 @@
import { expect, Page, test } from '@playwright/test';
import path, { dirname } from 'path';
import { fileURLToPath } from 'url';
import { addTagToDocument, PORTS, setupTestEnvironment, TestEnvironment, uploadDocument } from './test-environment';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
let testEnv: TestEnvironment;
let page: Page;
test.beforeAll(async () => {
testEnv = await setupTestEnvironment();
});
test.afterAll(async () => {
await testEnv.cleanup();
});
test.beforeEach(async ({ page: testPage }) => {
page = testPage;
await page.goto(`http://localhost:${testEnv.paperlessGpt.getMappedPort(PORTS.paperlessGpt)}`);
await page.screenshot({ path: 'test-results/initial-state.png' });
});
test.afterEach(async () => {
await page.close();
});
test('should process document and show changes in history', async () => {
const paperlessNgxPort = testEnv.paperlessNgx.getMappedPort(PORTS.paperlessNgx);
const paperlessGptPort = testEnv.paperlessGpt.getMappedPort(PORTS.paperlessGpt);
const credentials = { username: 'admin', password: 'admin' };
// 1. Upload document and add initial tag via API
const baseUrl = `http://localhost:${paperlessNgxPort}`;
const documentPath = path.join(__dirname, 'fixtures', 'test-document.txt');
// Get the paperless-gpt tag ID
const response = await fetch(`${baseUrl}/api/tags/?name=paperless-gpt`, {
headers: {
'Authorization': 'Basic ' + btoa(`${credentials.username}:${credentials.password}`),
},
});
if (!response.ok) {
throw new Error('Failed to fetch paperless-gpt tag');
}
const tags = await response.json();
if (!tags.results || tags.results.length === 0) {
throw new Error('paperless-gpt tag not found');
}
const tagId = tags.results[0].id;
// Upload document and get ID
const { id: documentId } = await uploadDocument(
baseUrl,
documentPath,
'Original Title',
credentials
);
console.log(`Document ID: ${documentId}`);
// Add tag to document
await addTagToDocument(
baseUrl,
documentId,
tagId,
credentials
);
// 2. Navigate to Paperless-GPT UI and process the document
await page.goto(`http://localhost:${paperlessGptPort}`);
// Wait for document to appear in the list
await page.waitForSelector('.document-card', { timeout: 1000 * 60 });
await page.screenshot({ path: 'test-results/document-loaded.png' });
// Click the process button
await page.click('button:has-text("Generate Suggestions")');
// Wait for processing to complete
await page.waitForSelector('.suggestions-review', { timeout: 30000 });
await page.screenshot({ path: 'test-results/suggestions-loaded.png' });
// Apply the suggestions
await page.click('button:has-text("Apply")');
// Wait for success message
await page.waitForSelector('.success-modal', { timeout: 10000 });
await page.screenshot({ path: 'test-results/suggestions-applied.png' });
// Click "OK" on success modal
await page.click('button:has-text("OK")');
// 3. Check history page for the modifications
await page.click('a:has-text("History")');
// Wait for history page to load
await page.waitForSelector('.modification-history', { timeout: 5000 });
await page.screenshot({ path: 'test-results/history-page.png' });
// Verify at least one modification entry exists
const modifications = await page.locator('.undo-card').count();
expect(modifications).toBeGreaterThan(0);
// Verify modification details
const firstModification = await page.locator('.undo-card').first();
// Check if title was modified
const titleChange = await firstModification.locator('text=Original Title').isVisible();
expect(titleChange).toBeTruthy();
// Test pagination if there are multiple modifications
const paginationVisible = await page.locator('.pagination-controls').isVisible();
if (paginationVisible) {
// Click next page if available
const nextButton = page.locator('button:has-text("Next")');
if (await nextButton.isEnabled()) {
await nextButton.click();
// Wait for new items to load
await page.waitForSelector('.undo-card');
}
}
// 4. Test undo functionality
const undoButton = await firstModification.locator('button:has-text("Undo")');
if (await undoButton.isEnabled()) {
await undoButton.click();
// Wait for undo to complete. Text should change to "Undone"
await page.waitForSelector('text=Undone');
}
});

View file

@ -1,20 +0,0 @@
Invoice #12345
From: Test Company Ltd.
To: Sample Client Inc.
Date: January 27, 2025
Item Description:
1. Software Development Services - $5,000
2. Cloud Infrastructure Setup - $2,500
3. Technical Consulting - $1,500
Total: $9,000
Payment Terms: Net 30
Due Date: February 26, 2025
Please make payment to:
Bank: Test Bank
Account: 1234567890

View file

@ -1,24 +0,0 @@
import { chromium } from '@playwright/test';
import * as nodeFetch from 'node-fetch';
// Polyfill fetch for Node.js environment
if (!globalThis.fetch) {
(globalThis as any).fetch = nodeFetch.default;
(globalThis as any).Headers = nodeFetch.Headers;
(globalThis as any).Request = nodeFetch.Request;
(globalThis as any).Response = nodeFetch.Response;
(globalThis as any).FormData = nodeFetch.FormData;
}
async function globalSetup() {
// Install Playwright browser if needed
const browser = await chromium.launch();
await browser.close();
// Load environment variables
if (!process.env.OPENAI_API_KEY) {
console.warn('Warning: OPENAI_API_KEY environment variable is not set');
}
}
export default globalSetup;

View file

@ -1,281 +0,0 @@
import { Browser, chromium } from '@playwright/test';
import * as fs from 'fs';
import { GenericContainer, Network, StartedTestContainer, Wait } from 'testcontainers';
export interface TestEnvironment {
paperlessNgx: StartedTestContainer;
paperlessGpt: StartedTestContainer;
browser: Browser;
cleanup: () => Promise<void>;
}
export const PORTS = {
paperlessNgx: 8000,
paperlessGpt: 8080,
};
export async function setupTestEnvironment(): Promise<TestEnvironment> {
console.log('Setting up test environment...');
const paperlessPort = PORTS.paperlessNgx;
const gptPort = PORTS.paperlessGpt;
// Create a network for the containers
const network = await new Network().start();
console.log('Starting Redis container...');
const redis = await new GenericContainer('redis:7')
.withNetwork(network)
.withNetworkAliases('redis')
.start();
console.log('Starting Postgres container...');
const postgres = await new GenericContainer('postgres:15')
.withNetwork(network)
.withNetworkAliases('postgres')
.withEnvironment({
POSTGRES_DB: 'paperless',
POSTGRES_USER: 'paperless',
POSTGRES_PASSWORD: 'paperless'
})
.start();
console.log('Starting Paperless-ngx container...');
const paperlessNgx = await new GenericContainer('ghcr.io/paperless-ngx/paperless-ngx:latest')
.withNetwork(network)
.withNetworkAliases('paperless-ngx')
.withEnvironment({
PAPERLESS_URL: `http://localhost:${paperlessPort}`,
PAPERLESS_SECRET_KEY: 'change-me',
PAPERLESS_ADMIN_USER: 'admin',
PAPERLESS_ADMIN_PASSWORD: 'admin',
PAPERLESS_TIME_ZONE: 'Europe/Berlin',
PAPERLESS_OCR_LANGUAGE: 'eng',
PAPERLESS_REDIS: 'redis://redis:6379',
PAPERLESS_DBHOST: 'postgres',
PAPERLESS_DBNAME: 'paperless',
PAPERLESS_DBUSER: 'paperless',
PAPERLESS_DBPASS: 'paperless'
})
.withExposedPorts(paperlessPort)
.withWaitStrategy(Wait.forHttp('/api/', paperlessPort))
.start();
const mappedPort = paperlessNgx.getMappedPort(paperlessPort);
console.log(`Paperless-ngx container started, mapped port: ${mappedPort}`);
// Create required tag before starting paperless-gpt
const baseUrl = `http://localhost:${mappedPort}`;
const credentials = { username: 'admin', password: 'admin' };
try {
console.log('Creating paperless-gpt tag...');
await createTag(baseUrl, 'paperless-gpt', credentials);
} catch (error) {
console.error('Failed to create paperless-gpt tag:', error);
await paperlessNgx.stop();
throw error;
}
console.log('Starting Paperless-gpt container...');
const paperlessGptImage = process.env.PAPERLESS_GPT_IMAGE || 'icereed/paperless-gpt:e2e';
console.log(`Using image: ${paperlessGptImage}`);
const paperlessGpt = await new GenericContainer(paperlessGptImage)
.withNetwork(network)
.withEnvironment({
PAPERLESS_BASE_URL: `http://paperless-ngx:${paperlessPort}`,
PAPERLESS_API_TOKEN: await getApiToken(baseUrl, credentials),
LLM_PROVIDER: "openai",
LLM_MODEL: "gpt-4o-mini",
LLM_LANGUAGE: "english",
OPENAI_API_KEY: process.env.OPENAI_API_KEY || '',
})
.withExposedPorts(gptPort)
.withWaitStrategy(Wait.forHttp('/', gptPort))
.start();
console.log('Paperless-gpt container started');
console.log('Launching browser...');
const browser = await chromium.launch();
console.log('Browser launched');
const cleanup = async () => {
console.log('Cleaning up test environment...');
await browser.close();
await paperlessGpt.stop();
await paperlessNgx.stop();
await redis.stop();
await postgres.stop();
await network.stop();
console.log('Test environment cleanup completed');
};
console.log('Test environment setup completed');
return {
paperlessNgx,
paperlessGpt,
browser,
cleanup,
};
}
export async function waitForElement(page: any, selector: string, timeout = 5000): Promise<void> {
await page.waitForSelector(selector, { timeout });
}
export interface PaperlessDocument {
id: number;
title: string;
content: string;
tags: number[];
}
// Helper to upload a document via Paperless-ngx API
export async function uploadDocument(
baseUrl: string,
filePath: string,
title: string,
credentials: { username: string; password: string }
): Promise<PaperlessDocument> {
console.log(`Uploading document: ${title} from ${filePath}`);
const formData = new FormData();
const fileData = await fs.promises.readFile(filePath);
formData.append('document', new Blob([fileData]));
formData.append('title', title);
// Initial upload to get task ID
const uploadResponse = await fetch(`${baseUrl}/api/documents/post_document/`, {
method: 'POST',
body: formData,
headers: {
'Authorization': 'Basic ' + btoa(`${credentials.username}:${credentials.password}`),
},
});
if (!uploadResponse.ok) {
console.error(`Upload failed with status ${uploadResponse.status}: ${uploadResponse.statusText}`);
throw new Error(`Failed to upload document: ${uploadResponse.statusText}`);
}
const task_id = await uploadResponse.json();
// Poll the tasks endpoint until document is processed
while (true) {
console.log(`Checking task status for ID: ${task_id}`);
const taskResponse = await fetch(`${baseUrl}/api/tasks/?task_id=${task_id}`, {
headers: {
'Authorization': 'Basic ' + btoa(`${credentials.username}:${credentials.password}`),
},
});
if (!taskResponse.ok) {
throw new Error(`Failed to check task status: ${taskResponse.statusText}`);
}
const taskResultArr = await taskResponse.json();
console.log(`Task status: ${JSON.stringify(taskResultArr)}`);
if (taskResultArr.length === 0) {
continue;
}
const taskResult = taskResultArr[0];
// Check if task is completed
if (taskResult.status === 'SUCCESS' && taskResult.id) {
console.log(`Document processed successfully with ID: ${taskResult.id}`);
// Fetch the complete document details
const documentResponse = await fetch(`${baseUrl}/api/documents/${taskResult.id}/`, {
headers: {
'Authorization': 'Basic ' + btoa(`${credentials.username}:${credentials.password}`),
},
});
if (!documentResponse.ok) {
throw new Error(`Failed to fetch document details: ${documentResponse.statusText}`);
}
return await documentResponse.json();
}
// Check for failure
if (taskResult.status === 'FAILED') {
throw new Error(`Document processing failed: ${taskResult.result}`);
}
// Wait before polling again
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
// Helper to create a tag via Paperless-ngx API
export async function createTag(
baseUrl: string,
name: string,
credentials: { username: string; password: string }
): Promise<number> {
console.log(`Creating tag: ${name}`);
const response = await fetch(`${baseUrl}/api/tags/`, {
method: 'POST',
body: JSON.stringify({ name }),
headers: {
'Content-Type': 'application/json',
'Authorization': 'Basic ' + btoa(`${credentials.username}:${credentials.password}`),
},
});
if (!response.ok) {
console.error(`Tag creation failed with status ${response.status}: ${response.statusText}`);
throw new Error(`Failed to create tag: ${response.statusText}`);
}
const tag = await response.json();
console.log(`Tag created successfully with ID: ${tag.id}`);
return tag.id;
}
// Helper to get an API token
export async function getApiToken(
baseUrl: string,
credentials: { username: string; password: string }
): Promise<string> {
console.log('Fetching API token');
const response = await fetch(`${baseUrl}/api/token/`, {
method: 'POST',
body: new URLSearchParams({
username: credentials.username,
password: credentials.password,
}),
});
if (!response.ok) {
console.error(`API token fetch failed with status ${response.status}: ${response.statusText}`);
throw new Error(`Failed to fetch API token: ${response.statusText}`);
}
const token = await response.json();
console.log(`API token fetched successfully: ${token.token}`);
return token.token;
}
// Helper to add a tag to a document
export async function addTagToDocument(
baseUrl: string,
documentId: number,
tagId: number,
credentials: { username: string; password: string }
): Promise<void> {
console.log(`Adding tag ${tagId} to document ${documentId}`);
const response = await fetch(`${baseUrl}/api/documents/${documentId}/`, {
method: 'PATCH',
body: JSON.stringify({
tags: [tagId],
}),
headers: {
'Content-Type': 'application/json',
'Authorization': 'Basic ' + btoa(`${credentials.username}:${credentials.password}`),
},
});
if (!response.ok) {
console.error(`Tag addition failed with status ${response.status}: ${response.statusText}`);
throw new Error(`Failed to add tag to document: ${response.statusText}`);
}
console.log('Tag added successfully');
}

2925
web-app/package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -8,48 +8,32 @@
"build": "tsc -b && vite build",
"lint": "eslint .",
"preview": "vite preview",
"test": "echo \"TODO\"",
"test:e2e": "playwright test",
"test:e2e:ui": "playwright test --ui",
"docker:test:up": "docker compose -f docker-compose.test.yml up -d",
"docker:test:down": "docker compose -f docker-compose.test.yml down -v"
"test": "echo \"TODO\""
},
"dependencies": {
"@headlessui/react": "^2.1.8",
"@heroicons/react": "^2.1.5",
"@mdi/js": "^7.4.47",
"@mdi/react": "^1.6.1",
"axios": "^1.7.7",
"classnames": "^2.5.1",
"date-fns": "^4.1.0",
"prop-types": "^15.8.1",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-icons": "^5.3.0",
"react-router-dom": "^7.0.0",
"react-tag-autocomplete": "^7.3.0",
"react-tooltip": "^5.28.0",
"winston": "^3.17.0"
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-tag-autocomplete": "^7.3.0"
},
"devDependencies": {
"@eslint/js": "^9.9.0",
"@playwright/test": "^1.50.0",
"@types/node": "^22.10.10",
"@types/react": "^19.0.0",
"@types/react-dom": "^19.0.0",
"@types/react": "^18.3.3",
"@types/react-dom": "^18.3.0",
"@vitejs/plugin-react-swc": "^3.5.0",
"autoprefixer": "^10.4.20",
"dotenv": "^16.4.7",
"eslint": "^9.9.0",
"eslint-plugin-react-hooks": "^5.1.0-rc.0",
"eslint-plugin-react-refresh": "^0.4.9",
"globals": "^16.0.0",
"node-fetch": "^3.3.0",
"globals": "^15.9.0",
"postcss": "^8.4.47",
"tailwindcss": "^3.4.12",
"testcontainers": "^10.17.1",
"typescript": "^5.5.3",
"typescript-eslint": "^8.0.1",
"vite": "^6.0.0"
"vite": "^5.4.1"
}
}
}

View file

@ -1,36 +0,0 @@
import { defineConfig, devices } from '@playwright/test';
import dotenv from 'dotenv';
// Load test environment variables
dotenv.config({ path: '.env.test' });
export default defineConfig({
testDir: './e2e',
timeout: 120000, // Increased timeout for container startup
expect: {
timeout: 10000,
},
globalSetup: './e2e/setup/global-setup.ts',
fullyParallel: false,
forbidOnly: !!process.env.CI,
retries: process.env.CI ? 2 : 0,
workers: 1,
reporter: 'html',
use: {
screenshot: 'on',
baseURL: process.env.PAPERLESS_GPT_URL || 'http://localhost:8080',
trace: 'retain-on-failure',
video: 'retain-on-failure',
},
projects: [
{
name: 'chromium',
use: {
...devices['Desktop Chrome'],
viewport: { width: 1280, height: 720 },
},
},
],
outputDir: 'test-results',
preserveOutput: 'failures-only',
});

View file

@ -1,26 +1,13 @@
// App.tsx or App.jsx
import React from 'react';
import { Route, BrowserRouter as Router, Routes } from 'react-router-dom';
import Sidebar from './components/Sidebar';
import DocumentProcessor from './DocumentProcessor';
import ExperimentalOCR from './ExperimentalOCR'; // New component
import History from './History';
import './index.css';
const App: React.FC = () => {
return (
<Router>
<div style={{ display: "flex", height: "100vh" }}>
<Sidebar onSelectPage={(page) => console.log(page)} />
<div style={{ flex: 1, overflowY: "auto" }}>
<Routes>
<Route path="/" element={<DocumentProcessor />} />
<Route path="/experimental-ocr" element={<ExperimentalOCR />} />
<Route path="/history" element={<History />} />
</Routes>
return (
<div className="App">
<DocumentProcessor />
</div>
</div>
</Router>
);
);
};
export default App;

View file

@ -11,14 +11,12 @@ export interface Document {
title: string;
content: string;
tags: string[];
correspondent: string;
}
export interface GenerateSuggestionsRequest {
documents: Document[];
generate_titles?: boolean;
generate_tags?: boolean;
generate_correspondents?: boolean;
}
export interface DocumentSuggestion {
@ -26,8 +24,6 @@ export interface DocumentSuggestion {
original_document: Document;
suggested_title?: string;
suggested_tags?: string[];
suggested_content?: string;
suggested_correspondent?: string;
}
export interface TagOption {
@ -46,7 +42,6 @@ const DocumentProcessor: React.FC = () => {
const [filterTag, setFilterTag] = useState<string | null>(null);
const [generateTitles, setGenerateTitles] = useState(true);
const [generateTags, setGenerateTags] = useState(true);
const [generateCorrespondents, setGenerateCorrespondents] = useState(true);
const [error, setError] = useState<string | null>(null);
// Custom hook to fetch initial data
@ -85,7 +80,6 @@ const DocumentProcessor: React.FC = () => {
documents,
generate_titles: generateTitles,
generate_tags: generateTags,
generate_correspondents: generateCorrespondents,
};
const { data } = await axios.post<DocumentSuggestion[]>(
@ -135,30 +129,25 @@ const DocumentProcessor: React.FC = () => {
doc.id === docId
? {
...doc,
suggested_tags: doc.suggested_tags?.filter((_, i) => i !== index),
suggested_tags: doc.suggested_tags?.filter(
(_, i) => i !== index
),
}
: doc
)
);
};
const handleTitleChange = (docId: number, title: string) => {
setSuggestions((prevSuggestions) =>
prevSuggestions.map((doc) =>
doc.id === docId ? { ...doc, suggested_title: title } : doc
doc.id === docId
? { ...doc, suggested_title: title }
: doc
)
);
};
const handleCorrespondentChange = (docId: number, correspondent: string) => {
setSuggestions((prevSuggestions) =>
prevSuggestions.map((doc) =>
doc.id === docId ? { ...doc, suggested_correspondent: correspondent } : doc
)
);
}
const resetSuggestions = () => {
setSuggestions([]);
};
@ -193,12 +182,11 @@ const DocumentProcessor: React.FC = () => {
}
}, [documents]);
if (loading) {
return (
<div className="flex items-center justify-center min-h-screen bg-white dark:bg-gray-900">
<div className="text-xl font-semibold text-gray-800 dark:text-gray-200">
Loading documents...
</div>
<div className="text-xl font-semibold text-gray-800 dark:text-gray-200">Loading documents...</div>
</div>
);
}
@ -228,8 +216,6 @@ const DocumentProcessor: React.FC = () => {
setGenerateTitles={setGenerateTitles}
generateTags={generateTags}
setGenerateTags={setGenerateTags}
generateCorrespondents={generateCorrespondents}
setGenerateCorrespondents={setGenerateCorrespondents}
onProcess={handleProcessDocuments}
processing={processing}
onReload={reloadDocuments}
@ -241,7 +227,6 @@ const DocumentProcessor: React.FC = () => {
onTitleChange={handleTitleChange}
onTagAddition={handleTagAddition}
onTagDeletion={handleTagDeletion}
onCorrespondentChange={handleCorrespondentChange}
onBack={resetSuggestions}
onUpdate={handleUpdateDocuments}
updating={updating}
@ -259,4 +244,4 @@ const DocumentProcessor: React.FC = () => {
);
};
export default DocumentProcessor;
export default DocumentProcessor;

View file

@ -1,188 +0,0 @@
import axios from 'axios';
import React, { useCallback, useEffect, useState } from 'react';
import { FaSpinner } from 'react-icons/fa';
import { Document, DocumentSuggestion } from './DocumentProcessor';
const ExperimentalOCR: React.FC = () => {
const refreshInterval = 1000; // Refresh interval in milliseconds
const [documentId, setDocumentId] = useState(0);
const [jobId, setJobId] = useState('');
const [ocrResult, setOcrResult] = useState('');
const [status, setStatus] = useState('');
const [error, setError] = useState<string | null>('');
const [pagesDone, setPagesDone] = useState(0); // New state for pages done
const [saving, setSaving] = useState(false); // New state for saving
const [documentDetails, setDocumentDetails] = useState<Document | null>(null); // New state for document details
const fetchDocumentDetails = useCallback(async () => {
if (!documentId) return;
try {
const response = await axios.get<Document>(`/api/documents/${documentId}`);
setDocumentDetails(response.data);
} catch (err) {
console.error("Error fetching document details:", err);
setError("Failed to fetch document details.");
}
}, [documentId]);
const submitOCRJob = async () => {
setStatus('');
setError('');
setJobId('');
setOcrResult('');
setPagesDone(0); // Reset pages done
try {
setStatus('Fetching document details...');
await fetchDocumentDetails(); // Fetch document details before submitting the job
setStatus('Submitting OCR job...');
const response = await axios.post(`/api/documents/${documentId}/ocr`);
setJobId(response.data.job_id);
setStatus('Job submitted. Processing...');
} catch (err) {
console.error(err);
setError('Failed to submit OCR job.');
}
};
const checkJobStatus = async () => {
if (!jobId) return;
try {
const response = await axios.get(`/api/jobs/ocr/${jobId}`);
const jobStatus = response.data.status;
setPagesDone(response.data.pages_done); // Update pages done
if (jobStatus === 'completed') {
setOcrResult(response.data.result);
setStatus('OCR completed successfully.');
} else if (jobStatus === 'failed') {
setError(response.data.error);
setStatus('OCR failed.');
} else {
setStatus(`Job status: ${jobStatus}. This may take a few minutes.`);
// Automatically check again after a delay
setTimeout(checkJobStatus, refreshInterval);
}
} catch (err) {
console.error(err);
setError('Failed to check job status.');
}
};
const handleSaveContent = async () => {
setSaving(true);
setError(null);
try {
if (!documentDetails) {
setError('Document details not fetched.');
throw new Error('Document details not fetched.');
}
const requestPayload: DocumentSuggestion = {
id: documentId,
original_document: documentDetails, // Use fetched document details
suggested_content: ocrResult,
};
await axios.patch("/api/update-documents", [requestPayload]);
setStatus('Content saved successfully.');
} catch (err) {
console.error("Error saving content:", err);
setError("Failed to save content.");
} finally {
setSaving(false);
}
};
// Start checking job status when jobId is set
useEffect(() => {
if (jobId) {
checkJobStatus();
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [jobId]);
return (
<div className="max-w-3xl mx-auto p-6 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200">
<h1 className="text-4xl font-bold mb-6 text-center">OCR via LLMs (Experimental)</h1>
<p className="mb-6 text-center text-yellow-600">
This is an experimental feature. Results may vary, and processing may take some time.
</p>
<div className="bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md">
<div className="mb-4">
<label htmlFor="documentId" className="block mb-2 font-semibold">
Document ID:
</label>
<input
type="number"
id="documentId"
value={documentId}
onChange={(e) => setDocumentId(Number(e.target.value))}
className="border border-gray-300 dark:border-gray-700 rounded w-full p-2 focus:outline-none focus:ring-2 focus:ring-blue-500"
placeholder="Enter the document ID"
/>
</div>
<button
onClick={submitOCRJob}
className="w-full bg-blue-600 hover:bg-blue-700 text-white font-semibold py-2 px-4 rounded transition duration-200"
disabled={!documentId}
>
{status.startsWith('Submitting') ? (
<span className="flex items-center justify-center">
<FaSpinner className="animate-spin mr-2" />
Submitting...
</span>
) : (
'Submit OCR Job'
)}
</button>
{status && (
<div className="mt-4 text-center text-gray-700 dark:text-gray-300">
{status.includes('in_progress') && (
<span className="flex items-center justify-center">
<FaSpinner className="animate-spin mr-2" />
{status}
</span>
)}
{!status.includes('in_progress') && status}
{pagesDone > 0 && (
<div className="mt-2">
Pages processed: {pagesDone}
</div>
)}
</div>
)}
{error && (
<div className="mt-4 p-4 bg-red-100 dark:bg-red-800 text-red-700 dark:text-red-200 rounded">
{error}
</div>
)}
{ocrResult && (
<div className="mt-6">
<h2 className="text-2xl font-bold mb-4">OCR Result:</h2>
<div className="bg-gray-50 dark:bg-gray-900 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-auto max-h-96">
<pre className="whitespace-pre-wrap">{ocrResult}</pre>
</div>
<button
onClick={handleSaveContent}
className="w-full bg-green-600 hover:bg-green-700 text-white font-semibold py-2 px-4 rounded transition duration-200 mt-4"
disabled={saving}
>
{saving ? (
<span className="flex items-center justify-center">
<FaSpinner className="animate-spin mr-2" />
Saving...
</span>
) : (
'Save Content'
)}
</button>
</div>
)}
</div>
</div>
);
};
export default ExperimentalOCR;

View file

@ -1,177 +0,0 @@
import React, { useEffect, useState } from 'react';
import UndoCard from './components/UndoCard';
interface ModificationHistory {
ID: number;
DocumentID: number;
DateChanged: string;
ModField: string;
PreviousValue: string;
NewValue: string;
Undone: boolean;
UndoneDate: string | null;
}
interface PaginatedResponse {
items: ModificationHistory[];
totalItems: number;
totalPages: number;
currentPage: number;
pageSize: number;
}
const History: React.FC = () => {
const [modifications, setModifications] = useState<ModificationHistory[]>([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [paperlessUrl, setPaperlessUrl] = useState<string>('');
const [currentPage, setCurrentPage] = useState(1);
const [totalPages, setTotalPages] = useState(1);
const [totalItems, setTotalItems] = useState(0);
const pageSize = 20;
// Get Paperless URL
useEffect(() => {
const fetchUrl = async () => {
try {
const response = await fetch('/api/paperless-url');
if (!response.ok) {
throw new Error('Failed to fetch public URL');
}
const { url } = await response.json();
setPaperlessUrl(url);
} catch (err) {
console.error('Error fetching Paperless URL:', err);
}
};
fetchUrl();
}, []);
// Get modifications with pagination
useEffect(() => {
fetchModifications(currentPage);
}, [currentPage]);
const fetchModifications = async (page: number) => {
setLoading(true);
try {
const response = await fetch(`/api/modifications?page=${page}&pageSize=${pageSize}`);
if (!response.ok) {
throw new Error('Failed to fetch modifications');
}
const data: PaginatedResponse = await response.json();
setModifications(data.items);
setTotalPages(data.totalPages);
setTotalItems(data.totalItems);
} catch (err) {
setError(err instanceof Error ? err.message : 'Unknown error occurred');
} finally {
setLoading(false);
}
};
const handleUndo = async (id: number) => {
try {
const response = await fetch(`/api/undo-modification/${id}`, {
method: 'POST',
});
if (!response.ok) {
throw new Error('Failed to undo modification');
}
// Use ISO 8601 format for consistency
const now = new Date().toISOString();
setModifications(mods => mods.map(mod =>
mod.ID === id
? { ...mod, Undone: true, UndoneDate: now }
: mod
));
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to undo modification');
}
};
if (loading) {
return (
<div className="flex justify-center items-center min-h-screen">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-500" />
</div>
);
}
if (error) {
return (
<div className="text-red-500 dark:text-red-400 p-4 text-center">
Error: {error}
</div>
);
}
return (
<div className="modification-history container mx-auto px-4 py-8">
<h1 className="text-2xl font-bold text-gray-800 dark:text-gray-200">
Modification History
</h1>
<div className="mb-6 text-sm text-gray-500 dark:text-gray-400">
Note: when undoing tag changes, this will not re-add 'paperless-gpt-auto'
</div>
{modifications.length === 0 ? (
<p className="text-gray-500 dark:text-gray-400 text-center">
No modifications found
</p>
) : (
<>
<div className="grid gap-4 md:grid-cols-1 lg:grid-cols-1 mb-6">
{modifications.map((modification) => (
<UndoCard
key={modification.ID}
{...modification}
onUndo={handleUndo}
paperlessUrl={paperlessUrl}
/>
))}
</div>
<div className="flex items-center justify-between border-t border-gray-200 dark:border-gray-700 pt-4">
<div className="flex items-center text-sm text-gray-500 dark:text-gray-400">
<span>
Showing {((currentPage - 1) * pageSize) + 1} to {Math.min(currentPage * pageSize, totalItems)} of {totalItems} results
</span>
</div>
<div className="flex items-center space-x-2">
<button
onClick={() => setCurrentPage(page => Math.max(1, page - 1))}
disabled={currentPage === 1}
className={`px-3 py-1 rounded-md ${
currentPage === 1
? 'bg-gray-100 text-gray-400 cursor-not-allowed dark:bg-gray-800'
: 'bg-blue-500 text-white hover:bg-blue-600 dark:bg-blue-600 dark:hover:bg-blue-700'
}`}
>
Previous
</button>
<span className="text-sm text-gray-600 dark:text-gray-300">
Page {currentPage} of {totalPages}
</span>
<button
onClick={() => setCurrentPage(page => Math.min(totalPages, page + 1))}
disabled={currentPage === totalPages}
className={`px-3 py-1 rounded-md ${
currentPage === totalPages
? 'bg-gray-100 text-gray-400 cursor-not-allowed dark:bg-gray-800'
: 'bg-blue-500 text-white hover:bg-blue-600 dark:bg-blue-600 dark:hover:bg-blue-700'
}`}
>
Next
</button>
</div>
</div>
</>
)}
</div>
);
};
export default History;

View file

@ -1,12 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 27.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
viewBox="0 0 1000 1000" style="enable-background:new 0 0 1000 1000;" xml:space="preserve">
<style type="text/css">
.st0{fill:#FFFFFF;}
</style>
<path class="st0" d="M299,891.7c-4.2-19.8-12.5-59.6-13.6-59.6c-176.7-105.7-155.8-288.7-97.3-393.4
c12.5,131.8,245.8,222.8,109.8,383.9c-1.1,2,6.2,27.2,12.5,50.2c27.2-46,68-101.4,65.8-106.7C208.9,358.2,731.9,326.9,840.6,73.7
c49.1,244.8-25.1,623.5-445.5,719.7c-2,1.1-76.3,131.8-79.5,132.9c0-2-31.4-1.1-27.2-11.5C290.7,908.4,294.8,900.1,299,891.7
L299,891.7z M293.8,793.4c53.3-61.8-9.4-167.4-47.1-201.9C310.5,701.3,306.3,765.1,293.8,793.4L293.8,793.4z"/>
</svg>

Before

Width:  |  Height:  |  Size: 869 B

View file

@ -6,16 +6,13 @@ interface DocumentCardProps {
}
const DocumentCard: React.FC<DocumentCardProps> = ({ document }) => (
<div className="document-card bg-white dark:bg-gray-800 shadow-lg shadow-blue-500/50 rounded-md p-4 relative group overflow-hidden">
<div className="bg-white dark:bg-gray-800 shadow-lg shadow-blue-500/50 rounded-md p-4 relative group overflow-hidden">
<h3 className="text-lg font-semibold text-gray-800 dark:text-gray-200">{document.title}</h3>
<p className="text-sm text-gray-600 dark:text-gray-400 mt-2 truncate">
{document.content.length > 100
? `${document.content.substring(0, 100)}...`
: document.content}
</p>
<p className="text-sm text-gray-600 dark:text-gray-400 mt-2">
Correspondent: <span className="font-bold text-blue-600 dark:text-blue-400">{document.correspondent}</span>
</p>
<div className="mt-4">
{document.tags.map((tag) => (
<span
@ -30,9 +27,6 @@ const DocumentCard: React.FC<DocumentCardProps> = ({ document }) => (
<div className="text-sm text-white p-2 bg-gray-800 dark:bg-gray-900 rounded-md w-full max-h-full overflow-y-auto">
<h3 className="text-lg font-semibold text-white">{document.title}</h3>
<p className="mt-2 whitespace-pre-wrap">{document.content}</p>
<p className="mt-2">
Correspondent: <span className="font-bold text-blue-400">{document.correspondent}</span>
</p>
<div className="mt-4">
{document.tags.map((tag) => (
<span

View file

@ -9,8 +9,6 @@ interface DocumentsToProcessProps {
setGenerateTitles: React.Dispatch<React.SetStateAction<boolean>>;
generateTags: boolean;
setGenerateTags: React.Dispatch<React.SetStateAction<boolean>>;
generateCorrespondents: boolean;
setGenerateCorrespondents: React.Dispatch<React.SetStateAction<boolean>>;
onProcess: () => void;
processing: boolean;
onReload: () => void;
@ -22,8 +20,6 @@ const DocumentsToProcess: React.FC<DocumentsToProcessProps> = ({
setGenerateTitles,
generateTags,
setGenerateTags,
generateCorrespondents,
setGenerateCorrespondents,
onProcess,
processing,
onReload,
@ -68,15 +64,6 @@ const DocumentsToProcess: React.FC<DocumentsToProcessProps> = ({
/>
<span className="text-gray-700 dark:text-gray-200">Generate Tags</span>
</label>
<label className="flex items-center space-x-2">
<input
type="checkbox"
checked={generateCorrespondents}
onChange={(e) => setGenerateCorrespondents(e.target.checked)}
className="dark:bg-gray-700 dark:border-gray-600"
/>
<span className="text-gray-700 dark:text-gray-200">Generate Correspondents</span>
</label>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">

View file

@ -1,75 +0,0 @@
.sidebar {
width: 250px;
background-color: #2c3e50;
color: #ecf0f1;
display: flex;
flex-direction: column;
transition: width 0.3s;
}
.sidebar.collapsed {
width: 60px;
}
.sidebar-header {
display: flex;
align-items: center;
padding: 10px;
background-color: #34495e;
justify-content: space-between;
}
.sidebar-header.collapsed {
justify-content: center;
}
.logo {
height: 40px;
margin-right: 10px;
}
.menu-items {
list-style: none;
padding: 0;
margin: 0;
}
.menu-items li {
padding: 15px 20px;
cursor: pointer;
}
.menu-items li.active {
background-color: darkslategray;
padding: 15px 20px;
cursor: pointer;
}
.menu-items li:hover {
background-color: #1abc9c;
}
.menu-items li a {
text-decoration: none;
color: inherit;
font-size: 18px;
}
.toggle-btn {
background: none;
border: none;
color: white;
font-size: 24px;
cursor: pointer;
}
.sidebar.collapsed .menu-items li a {
display: none;
}
.sidebar.collapsed .logo {
height: 40px;
margin: auto;
}

View file

@ -1,97 +0,0 @@
import { mdiHistory, mdiHomeOutline, mdiTextBoxSearchOutline } from "@mdi/js";
import { Icon } from "@mdi/react";
import axios from "axios";
import React, { useCallback, useEffect, useState } from "react";
import { Link, useLocation } from "react-router-dom";
import logo from "../assets/logo.svg";
import "./Sidebar.css";
interface SidebarProps {
onSelectPage: (page: string) => void;
}
const Sidebar: React.FC<SidebarProps> = ({ onSelectPage }) => {
const [collapsed, setCollapsed] = useState(false);
const location = useLocation();
const toggleSidebar = () => {
setCollapsed(!collapsed);
};
const handlePageClick = (page: string) => {
onSelectPage(page);
};
// Get whether experimental OCR is enabled
const [ocrEnabled, setOcrEnabled] = useState(false);
const fetchOcrEnabled = useCallback(async () => {
try {
const res = await axios.get<{ enabled: boolean }>(
"/api/experimental/ocr"
);
setOcrEnabled(res.data.enabled);
} catch (err) {
console.error(err);
}
}, []);
useEffect(() => {
fetchOcrEnabled();
}, [fetchOcrEnabled]);
const menuItems = [
{ name: "home", path: "/", icon: mdiHomeOutline, title: "Home" },
{ name: "history", path: "/history", icon: mdiHistory, title: "History" },
];
// If OCR is enabled, add the OCR menu item
if (ocrEnabled) {
menuItems.push({
name: "ocr",
path: "/experimental-ocr",
icon: mdiTextBoxSearchOutline,
title: "OCR",
});
}
return (
<div className={`sidebar min-w-[64px] ${collapsed ? "collapsed" : ""}`}>
<div className={`sidebar-header ${collapsed ? "collapsed" : ""}`}>
{!collapsed && (
<img
src={logo}
alt="Logo"
className="logo w-8 h-8 object-contain flex-shrink-0"
/>
)}
<button className="toggle-btn" onClick={toggleSidebar}>
&#9776;
</button>
</div>
<ul className="menu-items">
{menuItems.map((item) => (
<li
key={item.name}
className={location.pathname === item.path ? "active" : ""}
onClick={() => handlePageClick(item.name)}
>
<Link
to={item.path}
onClick={() => handlePageClick(item.name)}
style={{ display: "flex", alignItems: "center" }}
>
{/* <Icon path={item.icon} size={1} />
{!collapsed && <span>&nbsp; {item.title}</span>} */}
<div className="w-7 h-7 flex items-center justify-center flex-shrink-0">
<Icon path={item.icon} size={1} />
</div>
{!collapsed && <span className="ml-2">{item.title}</span>}
</Link>
</li>
))}
</ul>
</div>
);
};
export default Sidebar;

View file

@ -11,7 +11,7 @@ const SuccessModal: React.FC<SuccessModalProps> = ({ isOpen, onClose }) => (
<Transition show={isOpen} as={Fragment}>
<Dialog
as="div"
className="success-modal fixed z-10 inset-0 overflow-y-auto"
className="fixed z-10 inset-0 overflow-y-auto"
open={isOpen}
onClose={onClose}
>

View file

@ -8,7 +8,6 @@ interface SuggestionCardProps {
onTitleChange: (docId: number, title: string) => void;
onTagAddition: (docId: number, tag: TagOption) => void;
onTagDeletion: (docId: number, index: number) => void;
onCorrespondentChange: (docId: number, correspondent: string) => void;
}
const SuggestionCard: React.FC<SuggestionCardProps> = ({
@ -17,7 +16,6 @@ const SuggestionCard: React.FC<SuggestionCardProps> = ({
onTitleChange,
onTagAddition,
onTagDeletion,
onCorrespondentChange,
}) => {
const sortedAvailableTags = availableTags.sort((a, b) => a.name.localeCompare(b.name));
const document = suggestion.original_document;
@ -51,9 +49,6 @@ const SuggestionCard: React.FC<SuggestionCardProps> = ({
</div>
</div>
<div className="mt-4">
<label className="block text-sm font-medium text-gray-700 dark:text-gray-300">
Suggested Title
</label>
<input
type="text"
value={suggestion.suggested_title || ""}
@ -61,9 +56,6 @@ const SuggestionCard: React.FC<SuggestionCardProps> = ({
className="w-full border border-gray-300 dark:border-gray-600 rounded px-2 py-1 mt-2 focus:outline-none focus:ring-2 focus:ring-blue-500 dark:bg-gray-700 dark:text-gray-200"
/>
<div className="mt-4">
<label className="block text-sm font-medium text-gray-700 dark:text-gray-300">
Suggested Tags
</label>
<ReactTags
selected={
suggestion.suggested_tags?.map((tag, index) => ({
@ -107,18 +99,6 @@ const SuggestionCard: React.FC<SuggestionCardProps> = ({
}}
/>
</div>
<div className="mt-4">
<label className="block text-sm font-medium text-gray-700 dark:text-gray-300">
Suggested Correspondent
</label>
<input
type="text"
value={suggestion.suggested_correspondent || ""}
onChange={(e) => onCorrespondentChange(suggestion.id, e.target.value)}
className="w-full border border-gray-300 dark:border-gray-600 rounded px-2 py-1 mt-2 focus:outline-none focus:ring-2 focus:ring-blue-500 dark:bg-gray-700 dark:text-gray-200"
placeholder="Correspondent"
/>
</div>
</div>
</div>
);

View file

@ -8,7 +8,6 @@ interface SuggestionsReviewProps {
onTitleChange: (docId: number, title: string) => void;
onTagAddition: (docId: number, tag: TagOption) => void;
onTagDeletion: (docId: number, index: number) => void;
onCorrespondentChange: (docId: number, correspondent: string) => void;
onBack: () => void;
onUpdate: () => void;
updating: boolean;
@ -20,12 +19,11 @@ const SuggestionsReview: React.FC<SuggestionsReviewProps> = ({
onTitleChange,
onTagAddition,
onTagDeletion,
onCorrespondentChange,
onBack,
onUpdate,
updating,
}) => (
<section className="suggestions-review">
<section>
<h2 className="text-2xl font-semibold text-gray-700 dark:text-gray-200 mb-6">
Review and Edit Suggested Titles
</h2>
@ -38,7 +36,6 @@ const SuggestionsReview: React.FC<SuggestionsReviewProps> = ({
onTitleChange={onTitleChange}
onTagAddition={onTagAddition}
onTagDeletion={onTagDeletion}
onCorrespondentChange={onCorrespondentChange}
/>
))}
</div>

View file

@ -1,193 +0,0 @@
// UndoCard.tsx
import React from 'react';
import { Tooltip } from 'react-tooltip';
interface ModificationProps {
ID: number;
DocumentID: number;
DateChanged: string;
ModField: string;
PreviousValue: string;
NewValue: string;
Undone: boolean;
UndoneDate: string | null;
onUndo: (id: number) => void;
paperlessUrl: string;
}
const formatDate = (dateString: string | null): string => {
if (!dateString) return '';
try {
const date = new Date(dateString);
// Check if date is valid
if (isNaN(date.getTime())) {
return 'Invalid date';
}
return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')} ${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}`;
} catch {
return 'Invalid date';
}
};
const buildPaperlessUrl = (paperlessUrl: string, documentId: number): string => {
return `${paperlessUrl}/documents/${documentId}/details`;
};
const UndoCard: React.FC<ModificationProps> = ({
ID,
DocumentID,
DateChanged,
ModField,
PreviousValue,
NewValue,
Undone,
UndoneDate,
onUndo,
paperlessUrl,
}) => {
const formatValue = (value: string, field: string) => {
if (field === 'tags') {
try {
const tags = JSON.parse(value) as string[];
return (
<div className="flex flex-wrap gap-1">
{tags.map((tag) => (
<span
key={tag}
className="bg-blue-100 dark:bg-blue-900 text-blue-800 dark:text-blue-200 text-xs font-medium px-2.5 py-0.5 rounded-full"
>
{tag}
</span>
))}
</div>
);
} catch {
return value;
}
} else if (field.toLowerCase().includes('date')) {
return formatDate(value);
}
return value;
};
return (
<div className="undo-card relative bg-white dark:bg-gray-800 p-4 rounded-md shadow-md">
<div className="grid grid-cols-6">
<div className="col-span-5"> {/* Left content */}
<div className="grid grid-cols-3 gap-4 mb-4">
<div className="">
<div className="text-xs uppercase text-gray-500 dark:text-gray-400 font-semibold mb-1">
Date Modified
</div>
<div className="text-sm text-gray-700 dark:text-gray-300">
{DateChanged && formatDate(DateChanged)}
</div>
</div>
<div className="">
<a
href={buildPaperlessUrl(paperlessUrl, DocumentID)}
target="_blank"
rel="noopener noreferrer"
className="text-blue-500 hover:text-blue-600 dark:text-blue-400 dark:hover:text-blue-300"
>
<div className="text-xs uppercase text-gray-500 dark:text-gray-400 font-semibold mb-1">
Document ID
</div>
<div className="text-sm text-gray-700 dark:text-gray-300">
{DocumentID}
</div>
</a>
</div>
<div className="">
<div className="text-xs uppercase text-gray-500 dark:text-gray-400 font-semibold mb-1">
Modified Field
</div>
<div className="text-sm text-gray-700 dark:text-gray-300">
{ModField}
</div>
</div>
</div>
<div className="mt-3">
<div className="mt-2 space-y-2">
<div className={`text-sm flex flex-nowrap ${Undone ? 'line-through' : ''}`}>
<span className="text-red-500 dark:text-red-400">Previous: &nbsp;</span>
<span
className="text-gray-600 dark:text-gray-300 truncate overflow-hidden flex-shrink-0 whitespace-nowrap flex-1 max-w-full group relative"
{ // Add tooltip if value is too long and not tags
...(ModField !== 'tags' && PreviousValue.length > 100 ? {
'data-tooltip-id': `tooltip-${ID}-prev`
} : {})}
>
{formatValue(PreviousValue, ModField)}
</span>
</div>
<div className={`text-sm flex flex-nowrap ${Undone ? 'line-through' : ''}`}>
<span className="text-green-500 dark:text-green-400">New: &nbsp;</span>
<span
className="text-gray-600 dark:text-gray-300 truncate overflow-hidden flex-shrink-0 whitespace-nowrap flex-1 max-w-full group relative"
{ // Add tooltip if value is too long and not tags
...(ModField !== 'tags' && NewValue.length > 100 ? {
'data-tooltip-id': `tooltip-${ID}-new`
} : {})}
>
{formatValue(NewValue, ModField)}
</span>
</div>
</div>
<Tooltip
id={`tooltip-${ID}-prev`}
place="bottom"
className="flex-wrap"
style={{
flexWrap: 'wrap',
wordWrap: 'break-word',
zIndex: 10,
whiteSpace: 'pre-line',
textAlign: 'left',
}}
>
{PreviousValue}
</Tooltip>
<Tooltip
id={`tooltip-${ID}-new`}
place="bottom"
className="flex-wrap"
style={{
flexWrap: 'wrap',
wordWrap: 'break-word',
zIndex: 10,
whiteSpace: 'pre-line',
textAlign: 'left',
}}
>
{NewValue}
</Tooltip>
</div>
</div>
<div className="grid place-items-center"> {/* Button content */}
<button
onClick={() => onUndo(ID)}
disabled={Undone}
className={`mt-2 mb-2 p-4 text-sm font-medium rounded-md min-w-[100px] max-w-[150px] text-center break-words ${Undone
? 'bg-gray-300 dark:bg-gray-700 text-gray-500 dark:text-gray-400 cursor-not-allowed'
: 'bg-blue-500 dark:bg-blue-600 text-white hover:bg-blue-600 dark:hover:bg-blue-700'
} transition-colors duration-200`}
>
{Undone ? (
<>
<span className="block text-xs">Undone on</span>
<span className="block text-xs">{formatDate(UndoneDate)}</span>
</>
) : (
'Undo'
)}
</button>
</div>
</div>
</div>
);
};
export default UndoCard;

View file

@ -1 +1 @@
{"root":["./src/app.tsx","./src/documentprocessor.tsx","./src/experimentalocr.tsx","./src/history.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/documentcard.tsx","./src/components/documentstoprocess.tsx","./src/components/nodocuments.tsx","./src/components/sidebar.tsx","./src/components/successmodal.tsx","./src/components/suggestioncard.tsx","./src/components/suggestionsreview.tsx","./src/components/undocard.tsx"],"version":"5.7.3"}
{"root":["./src/app.tsx","./src/documentprocessor.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/documentcard.tsx","./src/components/documentstoprocess.tsx","./src/components/nodocuments.tsx","./src/components/successmodal.tsx","./src/components/suggestioncard.tsx","./src/components/suggestionsreview.tsx"],"version":"5.6.2"}

View file

@ -1 +1 @@
{"root":["./vite.config.ts"],"version":"5.7.3"}
{"root":["./vite.config.ts"],"version":"5.6.2"}