Merge branch 'main' into hocr

This commit is contained in:
Dominik Schröter 2025-03-10 09:12:34 +01:00
commit fc03f7e0c9
6 changed files with 165 additions and 84 deletions

View file

@ -25,22 +25,22 @@ COPY web-app /app/
RUN npm run build RUN npm run build
# Stage 2: Build the Go binary # Stage 2: Build the Go binary
FROM golang:1.23.6-alpine3.21 AS builder FROM golang:1.24.0-alpine3.21 AS builder
# Set the working directory inside the container # Set the working directory inside the container
WORKDIR /app WORKDIR /app
# Package versions for Renovate # Package versions for Renovate
# renovate: datasource=repology depName=alpine_3_21/gcc versioning=loose # renovate: datasource=repology depName=alpine_3_21/gcc versioning=loose
ENV GCC_VERSION=14.2.0-r4 ENV GCC_VERSION="14.2.0-r4"
# renovate: datasource=repology depName=alpine_3_21/musl-dev versioning=loose # renovate: datasource=repology depName=alpine_3_21/musl-dev versioning=loose
ENV MUSL_DEV_VERSION=1.2.5-r8 ENV MUSL_DEV_VERSION="1.2.5-r9"
# renovate: datasource=repology depName=alpine_3_21/mupdf versioning=loose # renovate: datasource=repology depName=alpine_3_21/mupdf versioning=loose
ENV MUPDF_VERSION=1.24.10-r0 ENV MUPDF_VERSION="1.24.10-r0"
# renovate: datasource=repology depName=alpine_3_21/mupdf-dev versioning=loose # renovate: datasource=repology depName=alpine_3_21/mupdf-dev versioning=loose
ENV MUPDF_DEV_VERSION=1.24.10-r0 ENV MUPDF_DEV_VERSION="1.24.10-r0"
# renovate: datasource=repology depName=alpine_3_21/sed versioning=loose # renovate: datasource=repology depName=alpine_3_21/sed versioning=loose
ENV SED_VERSION=4.9-r2 ENV SED_VERSION="4.9-r2"
# Install necessary packages with pinned versions # Install necessary packages with pinned versions
RUN apk add --no-cache \ RUN apk add --no-cache \

28
go.mod
View file

@ -5,7 +5,7 @@ go 1.22.7
toolchain go1.23.6 toolchain go1.23.6
require ( require (
cloud.google.com/go/documentai v1.35.1 cloud.google.com/go/documentai v1.35.2
github.com/Masterminds/sprig/v3 v3.3.0 github.com/Masterminds/sprig/v3 v3.3.0
github.com/fatih/color v1.18.0 github.com/fatih/color v1.18.0
github.com/gabriel-vasile/mimetype v1.4.8 github.com/gabriel-vasile/mimetype v1.4.8
@ -16,17 +16,17 @@ require (
github.com/stretchr/testify v1.10.0 github.com/stretchr/testify v1.10.0
github.com/tmc/langchaingo v0.1.13-pre.1 github.com/tmc/langchaingo v0.1.13-pre.1
golang.org/x/sync v0.11.0 golang.org/x/sync v0.11.0
google.golang.org/api v0.220.0 google.golang.org/api v0.221.0
gorm.io/driver/sqlite v1.5.7 gorm.io/driver/sqlite v1.5.7
gorm.io/gorm v1.25.12 gorm.io/gorm v1.25.12
) )
require ( require (
cloud.google.com/go v0.116.0 // indirect cloud.google.com/go v0.118.1 // indirect
cloud.google.com/go/auth v0.14.1 // indirect cloud.google.com/go/auth v0.14.1 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
cloud.google.com/go/compute/metadata v0.6.0 // indirect cloud.google.com/go/compute/metadata v0.6.0 // indirect
cloud.google.com/go/longrunning v0.6.2 // indirect cloud.google.com/go/longrunning v0.6.4 // indirect
dario.cat/mergo v1.0.1 // indirect dario.cat/mergo v1.0.1 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.3.0 // indirect github.com/Masterminds/semver/v3 v3.3.0 // indirect
@ -81,16 +81,16 @@ require (
go.opentelemetry.io/otel/metric v1.34.0 // indirect go.opentelemetry.io/otel/metric v1.34.0 // indirect
go.opentelemetry.io/otel/trace v1.34.0 // indirect go.opentelemetry.io/otel/trace v1.34.0 // indirect
golang.org/x/arch v0.8.0 // indirect golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.32.0 // indirect golang.org/x/crypto v0.33.0 // indirect
golang.org/x/net v0.34.0 // indirect golang.org/x/net v0.35.0 // indirect
golang.org/x/oauth2 v0.25.0 // indirect golang.org/x/oauth2 v0.26.0 // indirect
golang.org/x/sys v0.29.0 // indirect golang.org/x/sys v0.30.0 // indirect
golang.org/x/text v0.21.0 // indirect golang.org/x/text v0.22.0 // indirect
golang.org/x/time v0.9.0 // indirect golang.org/x/time v0.10.0 // indirect
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 // indirect google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6 // indirect
google.golang.org/grpc v1.70.0 // indirect google.golang.org/grpc v1.70.0 // indirect
google.golang.org/protobuf v1.36.4 // indirect google.golang.org/protobuf v1.36.5 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect
) )

56
go.sum
View file

@ -1,15 +1,15 @@
cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE= cloud.google.com/go v0.118.1 h1:b8RATMcrK9A4BH0rj8yQupPXp+aP+cJ0l6H7V9osV1E=
cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U= cloud.google.com/go v0.118.1/go.mod h1:CFO4UPEPi8oV21xoezZCrd3d81K4fFkDTEJu4R8K+9M=
cloud.google.com/go/auth v0.14.1 h1:AwoJbzUdxA/whv1qj3TLKwh3XX5sikny2fc40wUl+h0= cloud.google.com/go/auth v0.14.1 h1:AwoJbzUdxA/whv1qj3TLKwh3XX5sikny2fc40wUl+h0=
cloud.google.com/go/auth v0.14.1/go.mod h1:4JHUxlGXisL0AW8kXPtUF6ztuOksyfUQNFjfsOCXkPM= cloud.google.com/go/auth v0.14.1/go.mod h1:4JHUxlGXisL0AW8kXPtUF6ztuOksyfUQNFjfsOCXkPM=
cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M= cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M=
cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc= cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc=
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I= cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg= cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
cloud.google.com/go/documentai v1.35.1 h1:52RfiUsoblXcE57CfKJGnITWLxRM30BcqNk/BKZl2LI= cloud.google.com/go/documentai v1.35.2 h1:hswVobCWUTXtmn+4QqUIVkai7sDOe0QS2KB3IpqLkik=
cloud.google.com/go/documentai v1.35.1/go.mod h1:WJjwUAQfwQPJORW8fjz7RODprMULDzEGLA2E6WxenFw= cloud.google.com/go/documentai v1.35.2/go.mod h1:oh/0YXosgEq3hVhyH4ZQ7VNXPaveRO4eLVM3tBSZOsI=
cloud.google.com/go/longrunning v0.6.2 h1:xjDfh1pQcWPEvnfjZmwjKQEcHnpz6lHjfy7Fo0MK+hc= cloud.google.com/go/longrunning v0.6.4 h1:3tyw9rO3E2XVXzSApn1gyEEnH2K9SynNQjMlBi3uHLg=
cloud.google.com/go/longrunning v0.6.2/go.mod h1:k/vIs83RN4bE3YCswdXC5PFfWVILjm3hpEUlSko4PiI= cloud.google.com/go/longrunning v0.6.4/go.mod h1:ttZpLCe6e7EXvn9OxpBRx7kZEB0efv8yBO6YnVMfhJs=
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
@ -177,38 +177,38 @@ go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc= golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc= golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70= golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE=
golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
google.golang.org/api v0.220.0 h1:3oMI4gdBgB72WFVwE1nerDD8W3HUOS4kypK6rRLbGns= google.golang.org/api v0.221.0 h1:qzaJfLhDsbMeFee8zBRdt/Nc+xmOuafD/dbdgGfutOU=
google.golang.org/api v0.220.0/go.mod h1:26ZAlY6aN/8WgpCzjPNy18QpYaz7Zgg1h0qe1GkZEmY= google.golang.org/api v0.221.0/go.mod h1:7sOU2+TL4TxUTdbi0gWgAIg7tH5qBXxoyhtL+9x3biQ=
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 h1:ToEetK57OidYuqD4Q5w+vfEnPvPpuTwedCNVohYJfNk= google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4 h1:Pw6WnI9W/LIdRxqK7T6XGugGbHIRl5Q7q3BssH6xk4s=
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697/go.mod h1:JJrvXBWRZaFMxBufik1a4RpFw4HhgVtBBWQeQgUj2cc= google.golang.org/genproto v0.0.0-20250122153221-138b5a5a4fd4/go.mod h1:qbZzneIOXSq+KFAFut9krLfRLZiFLzZL5u2t8SV83EE=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 h1:CkkIfIt50+lT6NHAVoRYEyAvQGFM7xEwXUUywFvEb3Q= google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6 h1:L9JNMl/plZH9wmzQUHleO/ZZDSN+9Gh41wPczNy+5Fk=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08= google.golang.org/genproto/googleapis/api v0.0.0-20250207221924-e9438ea467c6/go.mod h1:iYONQfRdizDB8JJBybql13nArx91jcUk7zCXEsOofM4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287 h1:J1H9f+LEdWAfHcez/4cvaVBox7cOYT+IU6rgqj5x++8= google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6 h1:2duwAxN2+k0xLNpjnHTXoMUgnv6VPSp5fiqTuwSxjmI=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287/go.mod h1:8BS3B93F/U1juMFq9+EDk+qOT5CO1R9IzXxG3PTqiRk= google.golang.org/genproto/googleapis/rpc v0.0.0-20250207221924-e9438ea467c6/go.mod h1:8BS3B93F/U1juMFq9+EDk+qOT5CO1R9IzXxG3PTqiRk=
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ= google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw= google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM= google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=

40
main.go
View file

@ -32,26 +32,26 @@ var (
log = logrus.New() log = logrus.New()
// Environment Variables // Environment Variables
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",") paperlessInsecureSkipVerify = os.Getenv("PAPERLESS_INSECURE_SKIP_VERIFY") == "true"
correspondentBlackList = strings.Split(os.Getenv("CORRESPONDENT_BLACK_LIST"), ",")
paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL") paperlessBaseURL = os.Getenv("PAPERLESS_BASE_URL")
paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN") paperlessAPIToken = os.Getenv("PAPERLESS_API_TOKEN")
openaiAPIKey = os.Getenv("OPENAI_API_KEY") openaiAPIKey = os.Getenv("OPENAI_API_KEY")
manualTag = os.Getenv("MANUAL_TAG") manualTag = os.Getenv("MANUAL_TAG")
autoTag = os.Getenv("AUTO_TAG") autoTag = os.Getenv("AUTO_TAG")
manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet manualOcrTag = os.Getenv("MANUAL_OCR_TAG") // Not used yet
autoOcrTag = os.Getenv("AUTO_OCR_TAG") autoOcrTag = os.Getenv("AUTO_OCR_TAG")
llmProvider = os.Getenv("LLM_PROVIDER") llmProvider = os.Getenv("LLM_PROVIDER")
llmModel = os.Getenv("LLM_MODEL") llmModel = os.Getenv("LLM_MODEL")
visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER") visionLlmProvider = os.Getenv("VISION_LLM_PROVIDER")
visionLlmModel = os.Getenv("VISION_LLM_MODEL") visionLlmModel = os.Getenv("VISION_LLM_MODEL")
logLevel = strings.ToLower(os.Getenv("LOG_LEVEL")) logLevel = strings.ToLower(os.Getenv("LOG_LEVEL"))
listenInterface = os.Getenv("LISTEN_INTERFACE") listenInterface = os.Getenv("LISTEN_INTERFACE")
autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE") autoGenerateTitle = os.Getenv("AUTO_GENERATE_TITLE")
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS") autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS") autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS")
limitOcrPages int // Will be read from OCR_LIMIT_PAGES limitOcrPages int // Will be read from OCR_LIMIT_PAGES
tokenLimit = 0 // Will be read from TOKEN_LIMIT tokenLimit = 0 // Will be read from TOKEN_LIMIT
// Templates // Templates
titleTemplate *template.Template titleTemplate *template.Template

View file

@ -3,11 +3,13 @@ package main
import ( import (
"bytes" "bytes"
"context" "context"
"crypto/tls"
"encoding/json" "encoding/json"
"fmt" "fmt"
"image/jpeg" "image/jpeg"
"io" "io"
"net/http" "net/http"
"net/url"
"os" "os"
"path/filepath" "path/filepath"
"slices" "slices"
@ -16,6 +18,7 @@ import (
"sync" "sync"
"github.com/gen2brain/go-fitz" "github.com/gen2brain/go-fitz"
"github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
"gorm.io/gorm" "gorm.io/gorm"
) )
@ -58,10 +61,18 @@ func hasSameTags(original, suggested []string) bool {
func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient { func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient {
cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR") cacheFolder := os.Getenv("PAPERLESS_GPT_CACHE_DIR")
// Create a custom HTTP transport with TLS configuration
tr := &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: paperlessInsecureSkipVerify,
},
}
httpClient := &http.Client{Transport: tr}
return &PaperlessClient{ return &PaperlessClient{
BaseURL: strings.TrimRight(baseURL, "/"), BaseURL: strings.TrimRight(baseURL, "/"),
APIToken: apiToken, APIToken: apiToken,
HTTPClient: &http.Client{}, HTTPClient: httpClient,
CacheFolder: cacheFolder, CacheFolder: cacheFolder,
} }
} }
@ -80,7 +91,53 @@ func (client *PaperlessClient) Do(ctx context.Context, method, path string, body
req.Header.Set("Content-Type", "application/json") req.Header.Set("Content-Type", "application/json")
} }
return client.HTTPClient.Do(req) log.WithFields(logrus.Fields{
"method": method,
"url": url,
"headers": req.Header,
}).Debug("Making HTTP request")
resp, err := client.HTTPClient.Do(req)
if err != nil {
log.WithError(err).WithFields(logrus.Fields{
"url": url,
"method": method,
"error": err,
}).Error("HTTP request failed")
return nil, fmt.Errorf("HTTP request failed: %w", err)
}
// Check if response is HTML instead of JSON for API endpoints
if strings.HasPrefix(path, "api/") {
contentType := resp.Header.Get("Content-Type")
if strings.Contains(contentType, "text/html") {
bodyBytes, _ := io.ReadAll(resp.Body)
resp.Body.Close()
// Create a new response with the same body for the caller
resp = &http.Response{
Status: resp.Status,
StatusCode: resp.StatusCode,
Header: resp.Header,
Body: io.NopCloser(bytes.NewBuffer(bodyBytes)),
}
log.WithFields(logrus.Fields{
"url": url,
"method": method,
"content-type": contentType,
"status-code": resp.StatusCode,
"response": string(bodyBytes),
"base-url": client.BaseURL,
"request-path": path,
"full-headers": resp.Header,
}).Error("Received HTML response for API request")
return nil, fmt.Errorf("received HTML response instead of JSON (status: %d). This often indicates an SSL/TLS issue or invalid authentication. Check your PAPERLESS_URL, PAPERLESS_TOKEN and PAPERLESS_INSECURE_SKIP_VERIFY settings. Full response: %s", resp.StatusCode, string(bodyBytes))
}
}
return resp, nil
} }
// GetAllTags retrieves all tags from the Paperless-NGX API // GetAllTags retrieves all tags from the Paperless-NGX API
@ -120,10 +177,19 @@ func (client *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int,
// Extract relative path from the Next URL // Extract relative path from the Next URL
if tagsResponse.Next != "" { if tagsResponse.Next != "" {
nextURL := tagsResponse.Next nextURL := tagsResponse.Next
if strings.HasPrefix(nextURL, client.BaseURL) { if strings.HasPrefix(nextURL, "http") {
nextURL = strings.TrimPrefix(nextURL, client.BaseURL+"/") // Extract just the path portion from the full URL
if parsedURL, err := url.Parse(nextURL); err == nil {
path = strings.TrimPrefix(parsedURL.Path, "/")
if parsedURL.RawQuery != "" {
path += "?" + parsedURL.RawQuery
}
} else {
return nil, fmt.Errorf("failed to parse next URL: %v", err)
}
} else {
path = strings.TrimPrefix(nextURL, "/")
} }
path = nextURL
} else { } else {
path = "" path = ""
} }
@ -143,19 +209,34 @@ func (client *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []st
resp, err := client.Do(ctx, "GET", path, nil) resp, err := client.Do(ctx, "GET", path, nil)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("HTTP request failed in GetDocumentsByTags: %w", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
// Read the response body
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body) log.WithFields(logrus.Fields{
return nil, fmt.Errorf("error searching documents: %d, %s", resp.StatusCode, string(bodyBytes)) "status_code": resp.StatusCode,
"path": path,
"response": string(bodyBytes),
"headers": resp.Header,
}).Error("Error response from server in GetDocumentsByTags")
return nil, fmt.Errorf("error searching documents: status=%d, body=%s", resp.StatusCode, string(bodyBytes))
} }
var documentsResponse GetDocumentsApiResponse var documentsResponse GetDocumentsApiResponse
err = json.NewDecoder(resp.Body).Decode(&documentsResponse) err = json.Unmarshal(bodyBytes, &documentsResponse)
if err != nil { if err != nil {
return nil, err log.WithFields(logrus.Fields{
"response_body": string(bodyBytes),
"error": err,
}).Error("Failed to parse JSON response in GetDocumentsByTags")
return nil, fmt.Errorf("failed to parse JSON response: %w", err)
} }
allTags, err := client.GetAllTags(ctx) allTags, err := client.GetAllTags(ctx)

View file

@ -1591,9 +1591,9 @@
"license": "MIT" "license": "MIT"
}, },
"node_modules/@types/node": { "node_modules/@types/node": {
"version": "22.13.1", "version": "22.13.2",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.1.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.2.tgz",
"integrity": "sha512-jK8uzQlrvXqEU91UxiK5J7pKHyzgnI1Qnl0QDHIgVGuolJhRb9EEl28Cj9b3rGR8B2lhFCtvIm5os8lFnO/1Ew==", "integrity": "sha512-Z+r8y3XL9ZpI2EY52YYygAFmo2/oWfNSj4BCpAXE2McAexDk8VcnBMGC9Djn9gTKt4d2T/hhXqmPzo4hfIXtTg==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
@ -3529,9 +3529,9 @@
} }
}, },
"node_modules/globals": { "node_modules/globals": {
"version": "15.14.0", "version": "15.15.0",
"resolved": "https://registry.npmjs.org/globals/-/globals-15.14.0.tgz", "resolved": "https://registry.npmjs.org/globals/-/globals-15.15.0.tgz",
"integrity": "sha512-OkToC372DtlQeje9/zHIo5CT8lRP/FUgEOKBEhU4e0abL7J7CD24fD9ohiLN5hagG/kWCYj4K5oaxxtj2Z0Dig==", "integrity": "sha512-7ACyT3wmyp3I61S4fG682L0VA2RGD9otkqGJIwNUMF1SWUombIIk+af1unuDYgMm082aHYwD+mzJvv9Iu8dsgg==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"engines": { "engines": {