2024-09-23 07:59:50 -05:00
package main
import (
"context"
"fmt"
"net/http"
"os"
2024-10-16 07:49:07 -05:00
"path/filepath"
2024-09-23 07:59:50 -05:00
"strings"
"sync"
2024-10-16 07:49:07 -05:00
"text/template"
2024-10-20 16:49:42 -05:00
"time"
2024-09-23 07:59:50 -05:00
2024-10-16 07:49:07 -05:00
"github.com/Masterminds/sprig/v3"
2024-09-23 07:59:50 -05:00
"github.com/gin-gonic/gin"
2024-10-31 14:00:43 -05:00
"github.com/sirupsen/logrus"
2024-09-23 07:59:50 -05:00
"github.com/tmc/langchaingo/llms"
"github.com/tmc/langchaingo/llms/ollama"
"github.com/tmc/langchaingo/llms/openai"
)
2024-10-21 14:52:23 -05:00
// Global Variables and Constants
2024-09-23 07:59:50 -05:00
var (
2024-10-31 14:00:43 -05:00
// Logger
log = logrus . New ( )
// Environment Variables
2024-09-23 07:59:50 -05:00
paperlessBaseURL = os . Getenv ( "PAPERLESS_BASE_URL" )
paperlessAPIToken = os . Getenv ( "PAPERLESS_API_TOKEN" )
openaiAPIKey = os . Getenv ( "OPENAI_API_KEY" )
2024-10-20 16:49:42 -05:00
manualTag = "paperless-gpt"
autoTag = "paperless-gpt-auto"
2024-09-23 07:59:50 -05:00
llmProvider = os . Getenv ( "LLM_PROVIDER" )
llmModel = os . Getenv ( "LLM_MODEL" )
2024-10-28 11:34:41 -05:00
visionLlmProvider = os . Getenv ( "VISION_LLM_PROVIDER" )
visionLlmModel = os . Getenv ( "VISION_LLM_MODEL" )
2024-10-31 14:00:43 -05:00
logLevel = strings . ToLower ( os . Getenv ( "LOG_LEVEL" ) )
2024-10-16 07:49:07 -05:00
// Templates
titleTemplate * template . Template
tagTemplate * template . Template
2024-10-28 11:34:41 -05:00
ocrTemplate * template . Template
2024-10-16 07:49:07 -05:00
templateMutex sync . RWMutex
// Default templates
defaultTitleTemplate = ` I will provide you with the content of a document that has been partially read by OCR ( so it may contain errors ) .
Your task is to find a suitable document title that I can use as the title in the paperless - ngx program .
Respond only with the title , without any additional information . The content is likely in { { . Language } } .
Content :
{ { . Content } }
`
defaultTagTemplate = ` I will provide you with the content and the title of a document . Your task is to select appropriate tags for the document from the list of available tags I will provide . Only select tags from the provided list . Respond only with the selected tags as a comma - separated list , without any additional information . The content is likely in { { . Language } } .
Available Tags :
{ { . AvailableTags | join ", " } }
Title :
{ { . Title } }
Content :
{ { . Content } }
Please concisely select the { { . Language } } tags from the list above that best describe the document .
Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable .
`
2024-10-28 11:34:41 -05:00
defaultOcrPrompt = ` Just transcribe the text in this image and preserve the formatting and layout (high quality OCR). Do that for ALL the text in the image. Be thorough and pay attention. This is very important. The image is from a text document so be sure to continue until the bottom of the page. Thanks a lot! You tend to forget about some text in the image so please focus! Use markdown format. `
2024-09-23 07:59:50 -05:00
)
2024-10-21 14:52:23 -05:00
// App struct to hold dependencies
type App struct {
2024-10-28 11:34:41 -05:00
Client * PaperlessClient
LLM llms . Model
VisionLLM llms . Model
2024-10-21 14:52:23 -05:00
}
2024-09-23 07:59:50 -05:00
func main ( ) {
2024-10-21 14:52:23 -05:00
// Validate Environment Variables
validateEnvVars ( )
2024-09-23 07:59:50 -05:00
2024-10-31 14:00:43 -05:00
// Initialize logrus logger
initLogger ( )
2024-10-21 14:52:23 -05:00
// Initialize PaperlessClient
client := NewPaperlessClient ( paperlessBaseURL , paperlessAPIToken )
2024-10-21 02:56:49 -05:00
2024-10-21 14:52:23 -05:00
// Load Templates
loadTemplates ( )
2024-10-21 02:56:49 -05:00
2024-10-21 14:52:23 -05:00
// Initialize LLM
llm , err := createLLM ( )
if err != nil {
log . Fatalf ( "Failed to create LLM client: %v" , err )
2024-09-23 07:59:50 -05:00
}
2024-10-28 11:34:41 -05:00
// Initialize Vision LLM
visionLlm , err := createVisionLLM ( )
if err != nil {
log . Fatalf ( "Failed to create Vision LLM client: %v" , err )
}
2024-10-21 14:52:23 -05:00
// Initialize App with dependencies
app := & App {
2024-10-28 11:34:41 -05:00
Client : client ,
LLM : llm ,
VisionLLM : visionLlm ,
2024-09-23 07:59:50 -05:00
}
2024-10-21 14:52:23 -05:00
// Start background process for auto-tagging
2024-10-20 16:49:42 -05:00
go func ( ) {
2024-10-31 14:00:43 -05:00
minBackoffDuration := 10 * time . Second
2024-10-21 16:46:22 -05:00
maxBackoffDuration := time . Hour
pollingInterval := 10 * time . Second
backoffDuration := minBackoffDuration
2024-10-20 16:49:42 -05:00
for {
2024-10-31 14:00:43 -05:00
processedCount , err := app . processAutoTagDocuments ( )
if err != nil {
log . Errorf ( "Error in processAutoTagDocuments: %v" , err )
2024-10-21 16:46:22 -05:00
time . Sleep ( backoffDuration )
backoffDuration *= 2 // Exponential backoff
if backoffDuration > maxBackoffDuration {
2024-10-31 14:00:43 -05:00
log . Warnf ( "Repeated errors in processAutoTagDocuments detected. Setting backoff to %v" , maxBackoffDuration )
2024-10-21 16:46:22 -05:00
backoffDuration = maxBackoffDuration
}
} else {
backoffDuration = minBackoffDuration
}
2024-10-31 14:00:43 -05:00
if processedCount == 0 {
time . Sleep ( pollingInterval )
}
2024-10-20 16:49:42 -05:00
}
} ( )
2024-09-23 07:59:50 -05:00
// Create a Gin router with default middleware (logger and recovery)
router := gin . Default ( )
// API routes
api := router . Group ( "/api" )
{
2024-10-21 14:52:23 -05:00
api . GET ( "/documents" , app . documentsHandler )
2024-10-28 11:34:41 -05:00
// http://localhost:8080/api/documents/544
api . GET ( "/documents/:id" , app . getDocumentHandler ( ) )
2024-10-21 14:52:23 -05:00
api . POST ( "/generate-suggestions" , app . generateSuggestionsHandler )
api . PATCH ( "/update-documents" , app . updateDocumentsHandler )
2024-09-23 07:59:50 -05:00
api . GET ( "/filter-tag" , func ( c * gin . Context ) {
2024-10-20 16:49:42 -05:00
c . JSON ( http . StatusOK , gin . H { "tag" : manualTag } )
2024-09-23 07:59:50 -05:00
} )
2024-10-21 14:52:23 -05:00
// Get all tags
api . GET ( "/tags" , app . getAllTagsHandler )
2024-10-16 07:49:07 -05:00
api . GET ( "/prompts" , getPromptsHandler )
api . POST ( "/prompts" , updatePromptsHandler )
2024-10-28 11:34:41 -05:00
// OCR endpoints
api . POST ( "/documents/:id/ocr" , app . submitOCRJobHandler )
api . GET ( "/jobs/ocr/:job_id" , app . getJobStatusHandler )
api . GET ( "/jobs/ocr" , app . getAllJobsHandler )
// Endpoint to see if user enabled OCR
api . GET ( "/experimental/ocr" , func ( c * gin . Context ) {
enabled := isOcrEnabled ( )
c . JSON ( http . StatusOK , gin . H { "enabled" : enabled } )
} )
2024-09-23 07:59:50 -05:00
}
2024-10-21 14:52:23 -05:00
// Serve static files for the frontend under /assets
2024-09-23 07:59:50 -05:00
router . StaticFS ( "/assets" , gin . Dir ( "./web-app/dist/assets" , true ) )
router . StaticFile ( "/vite.svg" , "./web-app/dist/vite.svg" )
// Catch-all route for serving the frontend
router . NoRoute ( func ( c * gin . Context ) {
c . File ( "./web-app/dist/index.html" )
} )
2024-10-28 11:34:41 -05:00
// Start OCR worker pool
numWorkers := 1 // Number of workers to start
startWorkerPool ( app , numWorkers )
2024-10-31 14:00:43 -05:00
log . Infoln ( "Server started on port :8080" )
2024-09-23 07:59:50 -05:00
if err := router . Run ( ":8080" ) ; err != nil {
log . Fatalf ( "Failed to run server: %v" , err )
}
}
2024-10-31 14:00:43 -05:00
func initLogger ( ) {
switch logLevel {
case "debug" :
log . SetLevel ( logrus . DebugLevel )
case "info" :
log . SetLevel ( logrus . InfoLevel )
case "warn" :
log . SetLevel ( logrus . WarnLevel )
case "error" :
log . SetLevel ( logrus . ErrorLevel )
default :
log . SetLevel ( logrus . InfoLevel )
if logLevel != "" {
log . Fatalf ( "Invalid log level: '%s'." , logLevel )
}
}
log . SetFormatter ( & logrus . TextFormatter {
FullTimestamp : true ,
} )
}
2024-10-28 11:34:41 -05:00
func isOcrEnabled ( ) bool {
return visionLlmModel != "" && visionLlmProvider != ""
}
2024-10-21 14:52:23 -05:00
// validateEnvVars ensures all necessary environment variables are set
func validateEnvVars ( ) {
if paperlessBaseURL == "" {
log . Fatal ( "Please set the PAPERLESS_BASE_URL environment variable." )
}
if paperlessAPIToken == "" {
log . Fatal ( "Please set the PAPERLESS_API_TOKEN environment variable." )
}
if llmProvider == "" {
log . Fatal ( "Please set the LLM_PROVIDER environment variable." )
}
2024-10-31 14:00:43 -05:00
if visionLlmProvider != "" && visionLlmProvider != "openai" && visionLlmProvider != "ollama" {
log . Fatal ( "Please set the LLM_PROVIDER environment variable to 'openai' or 'ollama'." )
}
2024-10-21 14:52:23 -05:00
if llmModel == "" {
log . Fatal ( "Please set the LLM_MODEL environment variable." )
}
2024-10-31 14:00:43 -05:00
if ( llmProvider == "openai" || visionLlmProvider == "openai" ) && openaiAPIKey == "" {
2024-10-21 14:52:23 -05:00
log . Fatal ( "Please set the OPENAI_API_KEY environment variable for OpenAI provider." )
}
}
// processAutoTagDocuments handles the background auto-tagging of documents
2024-10-31 14:00:43 -05:00
func ( app * App ) processAutoTagDocuments ( ) ( int , error ) {
2024-10-20 16:49:42 -05:00
ctx := context . Background ( )
2024-10-21 14:52:23 -05:00
documents , err := app . Client . GetDocumentsByTags ( ctx , [ ] string { autoTag } )
2024-10-20 16:49:42 -05:00
if err != nil {
2024-10-31 14:00:43 -05:00
return 0 , fmt . Errorf ( "error fetching documents with autoTag: %w" , err )
2024-10-20 16:49:42 -05:00
}
2024-10-21 14:52:23 -05:00
if len ( documents ) == 0 {
2024-10-31 14:00:43 -05:00
log . Debugf ( "No documents with tag %s found" , autoTag )
return 0 , nil // No documents to process
2024-10-21 14:52:23 -05:00
}
2024-10-31 14:00:43 -05:00
log . Debugf ( "Found at least %d remaining documents with tag %s" , len ( documents ) , autoTag )
documents = documents [ : 1 ] // Process only one document at a time
2024-10-20 16:49:42 -05:00
suggestionRequest := GenerateSuggestionsRequest {
Documents : documents ,
GenerateTitles : true ,
GenerateTags : true ,
}
2024-10-21 14:52:23 -05:00
suggestions , err := app . generateDocumentSuggestions ( ctx , suggestionRequest )
2024-10-20 16:49:42 -05:00
if err != nil {
2024-10-31 14:00:43 -05:00
return 0 , fmt . Errorf ( "error generating suggestions: %w" , err )
2024-10-20 16:49:42 -05:00
}
2024-10-21 14:52:23 -05:00
err = app . Client . UpdateDocuments ( ctx , suggestions )
2024-10-20 16:49:42 -05:00
if err != nil {
2024-10-31 14:00:43 -05:00
return 0 , fmt . Errorf ( "error updating documents: %w" , err )
2024-10-20 16:49:42 -05:00
}
2024-10-21 16:46:22 -05:00
2024-10-31 14:00:43 -05:00
return len ( documents ) , nil
2024-10-20 16:49:42 -05:00
}
2024-10-21 14:52:23 -05:00
// removeTagFromList removes a specific tag from a list of tags
2024-10-07 06:40:17 -05:00
func removeTagFromList ( tags [ ] string , tagToRemove string ) [ ] string {
filteredTags := [ ] string { }
for _ , tag := range tags {
if tag != tagToRemove {
filteredTags = append ( filteredTags , tag )
}
}
return filteredTags
2024-09-23 07:59:50 -05:00
}
2024-10-21 14:52:23 -05:00
// getLikelyLanguage determines the likely language of the document content
2024-09-24 02:18:50 -05:00
func getLikelyLanguage ( ) string {
likelyLanguage := os . Getenv ( "LLM_LANGUAGE" )
if likelyLanguage == "" {
2024-09-23 07:59:50 -05:00
likelyLanguage = "English"
}
2024-09-24 02:18:50 -05:00
return strings . Title ( strings . ToLower ( likelyLanguage ) )
}
2024-10-21 14:52:23 -05:00
// loadTemplates loads the title and tag templates from files or uses default templates
func loadTemplates ( ) {
templateMutex . Lock ( )
defer templateMutex . Unlock ( )
// Ensure prompts directory exists
promptsDir := "prompts"
if err := os . MkdirAll ( promptsDir , os . ModePerm ) ; err != nil {
log . Fatalf ( "Failed to create prompts directory: %v" , err )
}
2024-09-23 07:59:50 -05:00
2024-10-21 14:52:23 -05:00
// Load title template
titleTemplatePath := filepath . Join ( promptsDir , "title_prompt.tmpl" )
titleTemplateContent , err := os . ReadFile ( titleTemplatePath )
if err != nil {
2024-10-31 14:00:43 -05:00
log . Errorf ( "Could not read %s, using default template: %v" , titleTemplatePath , err )
2024-10-21 14:52:23 -05:00
titleTemplateContent = [ ] byte ( defaultTitleTemplate )
if err := os . WriteFile ( titleTemplatePath , titleTemplateContent , os . ModePerm ) ; err != nil {
log . Fatalf ( "Failed to write default title template to disk: %v" , err )
}
}
titleTemplate , err = template . New ( "title" ) . Funcs ( sprig . FuncMap ( ) ) . Parse ( string ( titleTemplateContent ) )
2024-09-23 10:03:14 -05:00
if err != nil {
2024-10-21 14:52:23 -05:00
log . Fatalf ( "Failed to parse title template: %v" , err )
2024-09-23 10:03:14 -05:00
}
2024-10-21 14:52:23 -05:00
// Load tag template
tagTemplatePath := filepath . Join ( promptsDir , "tag_prompt.tmpl" )
tagTemplateContent , err := os . ReadFile ( tagTemplatePath )
if err != nil {
2024-10-31 14:00:43 -05:00
log . Errorf ( "Could not read %s, using default template: %v" , tagTemplatePath , err )
2024-10-21 14:52:23 -05:00
tagTemplateContent = [ ] byte ( defaultTagTemplate )
if err := os . WriteFile ( tagTemplatePath , tagTemplateContent , os . ModePerm ) ; err != nil {
log . Fatalf ( "Failed to write default tag template to disk: %v" , err )
}
}
tagTemplate , err = template . New ( "tag" ) . Funcs ( sprig . FuncMap ( ) ) . Parse ( string ( tagTemplateContent ) )
if err != nil {
log . Fatalf ( "Failed to parse tag template: %v" , err )
}
2024-10-28 11:34:41 -05:00
// Load OCR template
ocrTemplatePath := filepath . Join ( promptsDir , "ocr_prompt.tmpl" )
ocrTemplateContent , err := os . ReadFile ( ocrTemplatePath )
if err != nil {
2024-10-31 14:00:43 -05:00
log . Errorf ( "Could not read %s, using default template: %v" , ocrTemplatePath , err )
2024-10-28 11:34:41 -05:00
ocrTemplateContent = [ ] byte ( defaultOcrPrompt )
if err := os . WriteFile ( ocrTemplatePath , ocrTemplateContent , os . ModePerm ) ; err != nil {
log . Fatalf ( "Failed to write default OCR template to disk: %v" , err )
}
}
ocrTemplate , err = template . New ( "ocr" ) . Funcs ( sprig . FuncMap ( ) ) . Parse ( string ( ocrTemplateContent ) )
if err != nil {
log . Fatalf ( "Failed to parse OCR template: %v" , err )
}
2024-10-21 14:52:23 -05:00
}
2024-09-23 07:59:50 -05:00
2024-10-21 14:52:23 -05:00
// createLLM creates the appropriate LLM client based on the provider
func createLLM ( ) ( llms . Model , error ) {
switch strings . ToLower ( llmProvider ) {
case "openai" :
if openaiAPIKey == "" {
return nil , fmt . Errorf ( "OpenAI API key is not set" )
}
return openai . New (
openai . WithModel ( llmModel ) ,
openai . WithToken ( openaiAPIKey ) ,
)
case "ollama" :
host := os . Getenv ( "OLLAMA_HOST" )
if host == "" {
host = "http://127.0.0.1:11434"
}
return ollama . New (
ollama . WithModel ( llmModel ) ,
ollama . WithServerURL ( host ) ,
)
default :
return nil , fmt . Errorf ( "unsupported LLM provider: %s" , llmProvider )
}
}
2024-09-23 07:59:50 -05:00
2024-10-28 11:34:41 -05:00
func createVisionLLM ( ) ( llms . Model , error ) {
switch strings . ToLower ( visionLlmProvider ) {
case "openai" :
if openaiAPIKey == "" {
return nil , fmt . Errorf ( "OpenAI API key is not set" )
2024-09-23 07:59:50 -05:00
}
2024-10-28 11:34:41 -05:00
return openai . New (
openai . WithModel ( visionLlmModel ) ,
openai . WithToken ( openaiAPIKey ) ,
)
case "ollama" :
host := os . Getenv ( "OLLAMA_HOST" )
if host == "" {
host = "http://127.0.0.1:11434"
2024-09-23 07:59:50 -05:00
}
2024-10-28 11:34:41 -05:00
return ollama . New (
ollama . WithModel ( visionLlmModel ) ,
ollama . WithServerURL ( host ) ,
)
default :
2024-10-31 14:00:43 -05:00
log . Infoln ( "Vision LLM not enabled" )
2024-10-28 11:34:41 -05:00
return nil , nil
2024-09-23 07:59:50 -05:00
}
}