mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-12 04:48:02 -05:00
feat: add TOKEN_LIMIT environment variable for controlling maximum to… (#161)
* feat: add TOKEN_LIMIT environment variable for controlling maximum tokens in prompts --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
parent
32cc3d2794
commit
90ca87a207
8 changed files with 797 additions and 80 deletions
26
README.md
26
README.md
|
@ -175,6 +175,7 @@ services:
|
|||
| `AUTO_GENERATE_TAGS` | Generate tags automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
||||
| `AUTO_GENERATE_CORRESPONDENTS` | Generate correspondents automatically if `paperless-gpt-auto` is used. Default: `true`. | No |
|
||||
| `OCR_LIMIT_PAGES` | Limit the number of pages for OCR. Set to `0` for no limit. Default: `5`. | No |
|
||||
| `TOKEN_LIMIT` | Maximum tokens allowed for prompts/content. Set to `0` to disable limit. Useful for smaller LLMs. | No |
|
||||
| `CORRESPONDENT_BLACK_LIST` | A comma-separated list of names to exclude from the correspondents suggestions. Example: `John Doe, Jane Smith`.
|
||||
|
||||
### Custom Prompt Templates
|
||||
|
@ -446,6 +447,31 @@ P.O. Box 94515
|
|||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Working with Local LLMs
|
||||
|
||||
When using local LLMs (like those through Ollama), you might need to adjust certain settings to optimize performance:
|
||||
|
||||
#### Token Management
|
||||
- Use `TOKEN_LIMIT` environment variable to control the maximum number of tokens sent to the LLM
|
||||
- Smaller models might truncate content unexpectedly if given too much text
|
||||
- Start with a conservative limit (e.g., 2000 tokens) and adjust based on your model's capabilities
|
||||
- Set to `0` to disable the limit (use with caution)
|
||||
|
||||
Example configuration for smaller models:
|
||||
```yaml
|
||||
environment:
|
||||
TOKEN_LIMIT: '2000' # Adjust based on your model's context window
|
||||
LLM_PROVIDER: 'ollama'
|
||||
LLM_MODEL: 'llama2' # Or other local model
|
||||
```
|
||||
|
||||
Common issues and solutions:
|
||||
- If you see truncated or incomplete responses, try lowering the `TOKEN_LIMIT`
|
||||
- If processing is too limited, gradually increase the limit while monitoring performance
|
||||
- For models with larger context windows, you can increase the limit or disable it entirely
|
||||
|
||||
## Contributing
|
||||
|
||||
**Pull requests** and **issues** are welcome!
|
||||
|
|
80
app_llm.go
80
app_llm.go
|
@ -23,14 +23,29 @@ func (app *App) getSuggestedCorrespondent(ctx context.Context, content string, s
|
|||
templateMutex.RLock()
|
||||
defer templateMutex.RUnlock()
|
||||
|
||||
var promptBuffer bytes.Buffer
|
||||
err := correspondentTemplate.Execute(&promptBuffer, map[string]interface{}{
|
||||
// Get available tokens for content
|
||||
templateData := map[string]interface{}{
|
||||
"Language": likelyLanguage,
|
||||
"AvailableCorrespondents": availableCorrespondents,
|
||||
"BlackList": correspondentBlackList,
|
||||
"Title": suggestedTitle,
|
||||
"Content": content,
|
||||
})
|
||||
}
|
||||
|
||||
availableTokens, err := getAvailableTokensForContent(correspondentTemplate, templateData)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error calculating available tokens: %v", err)
|
||||
}
|
||||
|
||||
// Truncate content if needed
|
||||
truncatedContent, err := truncateContentByTokens(content, availableTokens)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error truncating content: %v", err)
|
||||
}
|
||||
|
||||
// Execute template with truncated content
|
||||
var promptBuffer bytes.Buffer
|
||||
templateData["Content"] = truncatedContent
|
||||
err = correspondentTemplate.Execute(&promptBuffer, templateData)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error executing correspondent template: %v", err)
|
||||
}
|
||||
|
@ -74,14 +89,31 @@ func (app *App) getSuggestedTags(
|
|||
availableTags = removeTagFromList(availableTags, autoTag)
|
||||
availableTags = removeTagFromList(availableTags, autoOcrTag)
|
||||
|
||||
var promptBuffer bytes.Buffer
|
||||
err := tagTemplate.Execute(&promptBuffer, map[string]interface{}{
|
||||
// Get available tokens for content
|
||||
templateData := map[string]interface{}{
|
||||
"Language": likelyLanguage,
|
||||
"AvailableTags": availableTags,
|
||||
"OriginalTags": originalTags,
|
||||
"Title": suggestedTitle,
|
||||
"Content": content,
|
||||
})
|
||||
}
|
||||
|
||||
availableTokens, err := getAvailableTokensForContent(tagTemplate, templateData)
|
||||
if err != nil {
|
||||
logger.Errorf("Error calculating available tokens: %v", err)
|
||||
return nil, fmt.Errorf("error calculating available tokens: %v", err)
|
||||
}
|
||||
|
||||
// Truncate content if needed
|
||||
truncatedContent, err := truncateContentByTokens(content, availableTokens)
|
||||
if err != nil {
|
||||
logger.Errorf("Error truncating content: %v", err)
|
||||
return nil, fmt.Errorf("error truncating content: %v", err)
|
||||
}
|
||||
|
||||
// Execute template with truncated content
|
||||
var promptBuffer bytes.Buffer
|
||||
templateData["Content"] = truncatedContent
|
||||
err = tagTemplate.Execute(&promptBuffer, templateData)
|
||||
if err != nil {
|
||||
logger.Errorf("Error executing tag template: %v", err)
|
||||
return nil, fmt.Errorf("error executing tag template: %v", err)
|
||||
|
@ -132,7 +164,6 @@ func (app *App) getSuggestedTags(
|
|||
}
|
||||
|
||||
func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte, logger *logrus.Entry) (string, error) {
|
||||
|
||||
templateMutex.RLock()
|
||||
defer templateMutex.RUnlock()
|
||||
likelyLanguage := getLikelyLanguage()
|
||||
|
@ -197,18 +228,36 @@ func (app *App) getSuggestedTitle(ctx context.Context, content string, originalT
|
|||
templateMutex.RLock()
|
||||
defer templateMutex.RUnlock()
|
||||
|
||||
var promptBuffer bytes.Buffer
|
||||
err := titleTemplate.Execute(&promptBuffer, map[string]interface{}{
|
||||
// Get available tokens for content
|
||||
templateData := map[string]interface{}{
|
||||
"Language": likelyLanguage,
|
||||
"Content": content,
|
||||
"Title": originalTitle,
|
||||
})
|
||||
}
|
||||
|
||||
availableTokens, err := getAvailableTokensForContent(titleTemplate, templateData)
|
||||
if err != nil {
|
||||
logger.Errorf("Error calculating available tokens: %v", err)
|
||||
return "", fmt.Errorf("error calculating available tokens: %v", err)
|
||||
}
|
||||
|
||||
// Truncate content if needed
|
||||
truncatedContent, err := truncateContentByTokens(content, availableTokens)
|
||||
if err != nil {
|
||||
logger.Errorf("Error truncating content: %v", err)
|
||||
return "", fmt.Errorf("error truncating content: %v", err)
|
||||
}
|
||||
|
||||
// Execute template with truncated content
|
||||
var promptBuffer bytes.Buffer
|
||||
templateData["Content"] = truncatedContent
|
||||
err = titleTemplate.Execute(&promptBuffer, templateData)
|
||||
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error executing title template: %v", err)
|
||||
}
|
||||
|
||||
prompt := promptBuffer.String()
|
||||
|
||||
logger.Debugf("Title suggestion prompt: %s", prompt)
|
||||
|
||||
completion, err := app.LLM.GenerateContent(ctx, []llms.MessageContent{
|
||||
|
@ -273,10 +322,6 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
|
|||
docLogger.Printf("Processing Document ID %d...", documentID)
|
||||
|
||||
content := doc.Content
|
||||
if len(content) > 5000 {
|
||||
content = content[:5000]
|
||||
}
|
||||
|
||||
suggestedTitle := doc.Title
|
||||
var suggestedTags []string
|
||||
var suggestedCorrespondent string
|
||||
|
@ -312,7 +357,6 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
|
|||
log.Errorf("Error generating correspondents for document %d: %v", documentID, err)
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
|
|
268
app_llm_test.go
Normal file
268
app_llm_test.go
Normal file
|
@ -0,0 +1,268 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
"text/template"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/tmc/langchaingo/llms"
|
||||
"github.com/tmc/langchaingo/textsplitter"
|
||||
)
|
||||
|
||||
// Mock LLM for testing
|
||||
type mockLLM struct {
|
||||
lastPrompt string
|
||||
}
|
||||
|
||||
func (m *mockLLM) CreateEmbedding(_ context.Context, texts []string) ([][]float32, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *mockLLM) Call(_ context.Context, prompt string, _ ...llms.CallOption) (string, error) {
|
||||
m.lastPrompt = prompt
|
||||
return "test response", nil
|
||||
}
|
||||
|
||||
func (m *mockLLM) GenerateContent(ctx context.Context, messages []llms.MessageContent, opts ...llms.CallOption) (*llms.ContentResponse, error) {
|
||||
m.lastPrompt = messages[0].Parts[0].(llms.TextContent).Text
|
||||
return &llms.ContentResponse{
|
||||
Choices: []*llms.ContentChoice{
|
||||
{
|
||||
Content: "test response",
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Mock templates for testing
|
||||
const (
|
||||
testTitleTemplate = `
|
||||
Language: {{.Language}}
|
||||
Title: {{.Title}}
|
||||
Content: {{.Content}}
|
||||
`
|
||||
testTagTemplate = `
|
||||
Language: {{.Language}}
|
||||
Tags: {{.AvailableTags}}
|
||||
Content: {{.Content}}
|
||||
`
|
||||
testCorrespondentTemplate = `
|
||||
Language: {{.Language}}
|
||||
Content: {{.Content}}
|
||||
`
|
||||
)
|
||||
|
||||
func TestPromptTokenLimits(t *testing.T) {
|
||||
testLogger := logrus.WithField("test", "test")
|
||||
|
||||
// Initialize test templates
|
||||
var err error
|
||||
titleTemplate, err = template.New("title").Parse(testTitleTemplate)
|
||||
require.NoError(t, err)
|
||||
tagTemplate, err = template.New("tag").Parse(testTagTemplate)
|
||||
require.NoError(t, err)
|
||||
correspondentTemplate, err = template.New("correspondent").Parse(testCorrespondentTemplate)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Save current env and restore after test
|
||||
originalLimit := os.Getenv("TOKEN_LIMIT")
|
||||
defer os.Setenv("TOKEN_LIMIT", originalLimit)
|
||||
|
||||
// Create a test app with mock LLM
|
||||
mockLLM := &mockLLM{}
|
||||
app := &App{
|
||||
LLM: mockLLM,
|
||||
}
|
||||
|
||||
// Set up test template
|
||||
testTemplate := template.Must(template.New("test").Parse(`
|
||||
Language: {{.Language}}
|
||||
Content: {{.Content}}
|
||||
`))
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
tokenLimit int
|
||||
content string
|
||||
}{
|
||||
{
|
||||
name: "no limit",
|
||||
tokenLimit: 0,
|
||||
content: "This is the original content that should not be truncated.",
|
||||
},
|
||||
{
|
||||
name: "content within limit",
|
||||
tokenLimit: 100,
|
||||
content: "Short content",
|
||||
},
|
||||
{
|
||||
name: "content exceeds limit",
|
||||
tokenLimit: 50,
|
||||
content: "This is a much longer content that should definitely be truncated to fit within token limits",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Set token limit for this test
|
||||
os.Setenv("TOKEN_LIMIT", fmt.Sprintf("%d", tc.tokenLimit))
|
||||
resetTokenLimit()
|
||||
|
||||
// Prepare test data
|
||||
data := map[string]interface{}{
|
||||
"Language": "English",
|
||||
}
|
||||
|
||||
// Calculate available tokens
|
||||
availableTokens, err := getAvailableTokensForContent(testTemplate, data)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Truncate content if needed
|
||||
truncatedContent, err := truncateContentByTokens(tc.content, availableTokens)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test with the app's LLM
|
||||
ctx := context.Background()
|
||||
_, err = app.getSuggestedTitle(ctx, truncatedContent, "Test Title", testLogger)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify truncation
|
||||
if tc.tokenLimit > 0 {
|
||||
// Count tokens in final prompt received by LLM
|
||||
splitter := textsplitter.NewTokenSplitter()
|
||||
tokens, err := splitter.SplitText(mockLLM.lastPrompt)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify prompt is within limits
|
||||
assert.LessOrEqual(t, len(tokens), tc.tokenLimit,
|
||||
"Final prompt should be within token limit")
|
||||
|
||||
if len(tc.content) > len(truncatedContent) {
|
||||
// Content was truncated
|
||||
t.Logf("Content truncated from %d to %d characters",
|
||||
len(tc.content), len(truncatedContent))
|
||||
}
|
||||
} else {
|
||||
// No limit set, content should be unchanged
|
||||
assert.Contains(t, mockLLM.lastPrompt, tc.content,
|
||||
"Original content should be in prompt when no limit is set")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenLimitInCorrespondentGeneration(t *testing.T) {
|
||||
// Save current env and restore after test
|
||||
originalLimit := os.Getenv("TOKEN_LIMIT")
|
||||
defer os.Setenv("TOKEN_LIMIT", originalLimit)
|
||||
|
||||
// Create a test app with mock LLM
|
||||
mockLLM := &mockLLM{}
|
||||
app := &App{
|
||||
LLM: mockLLM,
|
||||
}
|
||||
|
||||
// Test content that would exceed reasonable token limits
|
||||
longContent := "This is a very long content that would normally exceed token limits. " +
|
||||
"It contains multiple sentences and should be truncated appropriately " +
|
||||
"based on the token limit that we set."
|
||||
|
||||
// Set a small token limit
|
||||
os.Setenv("TOKEN_LIMIT", "50")
|
||||
resetTokenLimit()
|
||||
|
||||
// Call getSuggestedCorrespondent
|
||||
ctx := context.Background()
|
||||
availableCorrespondents := []string{"Test Corp", "Example Inc"}
|
||||
correspondentBlackList := []string{"Blocked Corp"}
|
||||
|
||||
_, err := app.getSuggestedCorrespondent(ctx, longContent, "Test Title", availableCorrespondents, correspondentBlackList)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify the final prompt size
|
||||
splitter := textsplitter.NewTokenSplitter()
|
||||
tokens, err := splitter.SplitText(mockLLM.lastPrompt)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Final prompt should be within token limit
|
||||
assert.LessOrEqual(t, len(tokens), 50, "Final prompt should be within token limit")
|
||||
}
|
||||
|
||||
func TestTokenLimitInTagGeneration(t *testing.T) {
|
||||
testLogger := logrus.WithField("test", "test")
|
||||
|
||||
// Save current env and restore after test
|
||||
originalLimit := os.Getenv("TOKEN_LIMIT")
|
||||
defer os.Setenv("TOKEN_LIMIT", originalLimit)
|
||||
|
||||
// Create a test app with mock LLM
|
||||
mockLLM := &mockLLM{}
|
||||
app := &App{
|
||||
LLM: mockLLM,
|
||||
}
|
||||
|
||||
// Test content that would exceed reasonable token limits
|
||||
longContent := "This is a very long content that would normally exceed token limits. " +
|
||||
"It contains multiple sentences and should be truncated appropriately."
|
||||
|
||||
// Set a small token limit
|
||||
os.Setenv("TOKEN_LIMIT", "50")
|
||||
resetTokenLimit()
|
||||
|
||||
// Call getSuggestedTags
|
||||
ctx := context.Background()
|
||||
availableTags := []string{"test", "example"}
|
||||
originalTags := []string{"original"}
|
||||
|
||||
_, err := app.getSuggestedTags(ctx, longContent, "Test Title", availableTags, originalTags, testLogger)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify the final prompt size
|
||||
splitter := textsplitter.NewTokenSplitter()
|
||||
tokens, err := splitter.SplitText(mockLLM.lastPrompt)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Final prompt should be within token limit
|
||||
assert.LessOrEqual(t, len(tokens), 50, "Final prompt should be within token limit")
|
||||
}
|
||||
|
||||
func TestTokenLimitInTitleGeneration(t *testing.T) {
|
||||
testLogger := logrus.WithField("test", "test")
|
||||
|
||||
// Save current env and restore after test
|
||||
originalLimit := os.Getenv("TOKEN_LIMIT")
|
||||
defer os.Setenv("TOKEN_LIMIT", originalLimit)
|
||||
|
||||
// Create a test app with mock LLM
|
||||
mockLLM := &mockLLM{}
|
||||
app := &App{
|
||||
LLM: mockLLM,
|
||||
}
|
||||
|
||||
// Test content that would exceed reasonable token limits
|
||||
longContent := "This is a very long content that would normally exceed token limits. " +
|
||||
"It contains multiple sentences and should be truncated appropriately."
|
||||
|
||||
// Set a small token limit
|
||||
os.Setenv("TOKEN_LIMIT", "50")
|
||||
resetTokenLimit()
|
||||
|
||||
// Call getSuggestedTitle
|
||||
ctx := context.Background()
|
||||
|
||||
_, err := app.getSuggestedTitle(ctx, longContent, "Original Title", testLogger)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify the final prompt size
|
||||
splitter := textsplitter.NewTokenSplitter()
|
||||
tokens, err := splitter.SplitText(mockLLM.lastPrompt)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Final prompt should be within token limit
|
||||
assert.LessOrEqual(t, len(tokens), 50, "Final prompt should be within token limit")
|
||||
}
|
6
go.mod
6
go.mod
|
@ -36,7 +36,6 @@ require (
|
|||
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
||||
github.com/goccy/go-json v0.10.2 // indirect
|
||||
github.com/huandu/xstrings v1.5.0 // indirect
|
||||
github.com/imdario/mergo v0.3.13 // indirect
|
||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||
github.com/jinzhu/now v1.1.5 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
|
@ -57,6 +56,11 @@ require (
|
|||
github.com/spf13/cast v1.7.0 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181 // indirect
|
||||
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82 // indirect
|
||||
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a // indirect
|
||||
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
|
||||
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
|
||||
golang.org/x/arch v0.8.0 // indirect
|
||||
golang.org/x/crypto v0.26.0 // indirect
|
||||
golang.org/x/net v0.25.0 // indirect
|
||||
|
|
84
go.sum
84
go.sum
|
@ -2,12 +2,8 @@ dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
|
|||
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
|
||||
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
|
||||
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
|
||||
github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
|
||||
github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
|
||||
github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
|
||||
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
|
||||
github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
|
||||
github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
|
||||
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
|
||||
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
|
||||
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
|
||||
|
@ -27,6 +23,8 @@ github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+Gv
|
|||
github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
||||
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
|
||||
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
|
||||
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
||||
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
||||
github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo=
|
||||
|
@ -48,16 +46,10 @@ github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MG
|
|||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
|
||||
github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
|
||||
github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk=
|
||||
github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg=
|
||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
||||
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
|
||||
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
|
||||
|
@ -70,6 +62,10 @@ github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa02
|
|||
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
|
||||
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
|
@ -79,12 +75,8 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
|
|||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
|
||||
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
|
||||
github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
|
||||
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
|
||||
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
|
||||
github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
|
||||
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
||||
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
|
||||
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
|
@ -98,29 +90,26 @@ github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAc
|
|||
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
|
||||
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
|
||||
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
|
||||
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
||||
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
|
||||
github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
|
||||
github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
|
||||
github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
|
@ -130,73 +119,46 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS
|
|||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
||||
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181 h1:K+bMSIx9A7mLES1rtG+qKduLIXq40DAzYHtb0XuCukA=
|
||||
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181/go.mod h1:dzYhVIwWCtzPAa4QP98wfB9+mzt33MSmM8wsKiMi2ow=
|
||||
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82 h1:oYrL81N608MLZhma3ruL8qTM4xcpYECGut8KSxRY59g=
|
||||
gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82/go.mod h1:Gn+LZmCrhPECMD3SOKlE+BOHwhOYD9j7WT9NUtkCrC8=
|
||||
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a h1:O85GKETcmnCNAfv4Aym9tepU8OE0NmcZNqPlXcsBKBs=
|
||||
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a/go.mod h1:LaSIs30YPGs1H5jwGgPhLzc8vkNc/k0rDX/fEZqiU/M=
|
||||
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 h1:qqjvoVXdWIcZCLPMlzgA7P9FZWdPGPvP/l3ef8GzV6o=
|
||||
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84/go.mod h1:IJZ+fdMvbW2qW6htJx7sLJ04FEs4Ldl/MDsJtMKywfw=
|
||||
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f h1:Wku8eEdeJqIOFHtrfkYUByc4bCaTeA6fL0UJgfEiFMI=
|
||||
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f/go.mod h1:Tiuhl+njh/JIg0uS/sOJVYi0x2HEa5rc1OAaVsb5tAs=
|
||||
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJWpU5qHKGNy///1pacZC9txiUI=
|
||||
gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU=
|
||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
||||
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
|
||||
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
|
||||
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
|
||||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
|
||||
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
|
||||
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
||||
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
|
||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
|
||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
|
||||
golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
|
||||
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
|
||||
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
|
||||
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gorm.io/driver/sqlite v1.5.6 h1:fO/X46qn5NUEEOZtnjJRWRzZMe8nqJiQ9E+0hi+hKQE=
|
||||
gorm.io/driver/sqlite v1.5.6/go.mod h1:U+J8craQU6Fzkcvu8oLeAQmi50TkwPEhHDEjQZXDah4=
|
||||
gorm.io/driver/sqlite v1.5.7 h1:8NvsrhP0ifM7LX9G4zPB97NwovUakUxc+2V2uuf3Z1I=
|
||||
gorm.io/driver/sqlite v1.5.7/go.mod h1:U+J8craQU6Fzkcvu8oLeAQmi50TkwPEhHDEjQZXDah4=
|
||||
gorm.io/gorm v1.25.12 h1:I0u8i2hWQItBq1WfE0o2+WuL9+8L21K9e2HHSTE/0f8=
|
||||
|
|
12
main.go
12
main.go
|
@ -50,6 +50,7 @@ var (
|
|||
autoGenerateTags = os.Getenv("AUTO_GENERATE_TAGS")
|
||||
autoGenerateCorrespondents = os.Getenv("AUTO_GENERATE_CORRESPONDENTS")
|
||||
limitOcrPages int // Will be read from OCR_LIMIT_PAGES
|
||||
tokenLimit = 0 // Will be read from TOKEN_LIMIT
|
||||
|
||||
// Templates
|
||||
titleTemplate *template.Template
|
||||
|
@ -382,6 +383,17 @@ func validateOrDefaultEnvVars() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize token limit from environment variable
|
||||
if limit := os.Getenv("TOKEN_LIMIT"); limit != "" {
|
||||
if parsed, err := strconv.Atoi(limit); err == nil {
|
||||
if parsed < 0 {
|
||||
log.Fatalf("TOKEN_LIMIT must be non-negative, got: %d", parsed)
|
||||
}
|
||||
tokenLimit = parsed
|
||||
log.Infof("Using token limit: %d", tokenLimit)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// documentLogger creates a logger with document context
|
||||
|
|
99
tokens.go
Normal file
99
tokens.go
Normal file
|
@ -0,0 +1,99 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"text/template"
|
||||
|
||||
"github.com/tmc/langchaingo/llms"
|
||||
)
|
||||
|
||||
// getAvailableTokensForContent calculates how many tokens are available for content
|
||||
// by rendering the template with empty content and counting tokens
|
||||
func getAvailableTokensForContent(tmpl *template.Template, data map[string]interface{}) (int, error) {
|
||||
if tokenLimit <= 0 {
|
||||
return -1, nil // No limit when disabled
|
||||
}
|
||||
|
||||
// Create a copy of data and set "Content" to empty
|
||||
templateData := make(map[string]interface{})
|
||||
for k, v := range data {
|
||||
templateData[k] = v
|
||||
}
|
||||
templateData["Content"] = ""
|
||||
|
||||
// Execute template with empty content
|
||||
var promptBuffer bytes.Buffer
|
||||
if err := tmpl.Execute(&promptBuffer, templateData); err != nil {
|
||||
return 0, fmt.Errorf("error executing template: %v", err)
|
||||
}
|
||||
|
||||
// Count tokens in prompt template
|
||||
promptTokens, err := getTokenCount(promptBuffer.String())
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error counting tokens in prompt: %v", err)
|
||||
}
|
||||
log.Debugf("Prompt template uses %d tokens", promptTokens)
|
||||
|
||||
// Add safety margin for prompt tokens
|
||||
promptTokens += 10
|
||||
|
||||
// Calculate available tokens for content
|
||||
availableTokens := tokenLimit - promptTokens
|
||||
if availableTokens < 0 {
|
||||
return 0, fmt.Errorf("prompt template exceeds token limit")
|
||||
}
|
||||
return availableTokens, nil
|
||||
}
|
||||
|
||||
func getTokenCount(content string) (int, error) {
|
||||
return llms.CountTokens(llmModel, content), nil
|
||||
}
|
||||
|
||||
// truncateContentByTokens truncates the content so that its token count does not exceed availableTokens.
|
||||
// This implementation uses a binary search on runes to find the longest prefix whose token count is within the limit.
|
||||
// If availableTokens is 0 or negative, the original content is returned.
|
||||
func truncateContentByTokens(content string, availableTokens int) (string, error) {
|
||||
if availableTokens < 0 || tokenLimit <= 0 {
|
||||
return content, nil
|
||||
}
|
||||
totalTokens, err := getTokenCount(content)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error counting tokens: %v", err)
|
||||
}
|
||||
if totalTokens <= availableTokens {
|
||||
return content, nil
|
||||
}
|
||||
|
||||
// Convert content to runes for safe slicing.
|
||||
runes := []rune(content)
|
||||
low := 0
|
||||
high := len(runes)
|
||||
validCut := 0
|
||||
|
||||
for low <= high {
|
||||
mid := (low + high) / 2
|
||||
substr := string(runes[:mid])
|
||||
count, err := getTokenCount(substr)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error counting tokens in substring: %v", err)
|
||||
}
|
||||
if count <= availableTokens {
|
||||
validCut = mid
|
||||
low = mid + 1
|
||||
} else {
|
||||
high = mid - 1
|
||||
}
|
||||
}
|
||||
|
||||
truncated := string(runes[:validCut])
|
||||
// Final verification
|
||||
finalTokens, err := getTokenCount(truncated)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error counting tokens in final truncated content: %v", err)
|
||||
}
|
||||
if finalTokens > availableTokens {
|
||||
return "", fmt.Errorf("truncated content still exceeds the available token limit")
|
||||
}
|
||||
return truncated, nil
|
||||
}
|
302
tokens_test.go
Normal file
302
tokens_test.go
Normal file
|
@ -0,0 +1,302 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"testing"
|
||||
"text/template"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/tmc/langchaingo/textsplitter"
|
||||
)
|
||||
|
||||
// resetTokenLimit parses TOKEN_LIMIT from environment and sets the tokenLimit variable
|
||||
func resetTokenLimit() {
|
||||
// Reset tokenLimit
|
||||
tokenLimit = 0
|
||||
// Parse from environment
|
||||
if limit := os.Getenv("TOKEN_LIMIT"); limit != "" {
|
||||
if parsed, err := strconv.Atoi(limit); err == nil {
|
||||
tokenLimit = parsed
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenLimit(t *testing.T) {
|
||||
// Save current env and restore after test
|
||||
originalLimit := os.Getenv("TOKEN_LIMIT")
|
||||
defer os.Setenv("TOKEN_LIMIT", originalLimit)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
envValue string
|
||||
wantLimit int
|
||||
}{
|
||||
{
|
||||
name: "empty value",
|
||||
envValue: "",
|
||||
wantLimit: 0,
|
||||
},
|
||||
{
|
||||
name: "zero value",
|
||||
envValue: "0",
|
||||
wantLimit: 0,
|
||||
},
|
||||
{
|
||||
name: "positive value",
|
||||
envValue: "1000",
|
||||
wantLimit: 1000,
|
||||
},
|
||||
{
|
||||
name: "invalid value",
|
||||
envValue: "not-a-number",
|
||||
wantLimit: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Set environment variable
|
||||
os.Setenv("TOKEN_LIMIT", tc.envValue)
|
||||
|
||||
// Set tokenLimit based on environment
|
||||
resetTokenLimit()
|
||||
|
||||
assert.Equal(t, tc.wantLimit, tokenLimit)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAvailableTokensForContent(t *testing.T) {
|
||||
// Save current env and restore after test
|
||||
originalLimit := os.Getenv("TOKEN_LIMIT")
|
||||
defer os.Setenv("TOKEN_LIMIT", originalLimit)
|
||||
|
||||
// Test template
|
||||
tmpl := template.Must(template.New("test").Parse("Template with {{.Var1}} and {{.Content}}"))
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
limit int
|
||||
data map[string]interface{}
|
||||
wantCount int
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "disabled token limit",
|
||||
limit: 0,
|
||||
data: map[string]interface{}{"Var1": "test"},
|
||||
wantCount: -1,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "template exceeds limit",
|
||||
limit: 2,
|
||||
data: map[string]interface{}{
|
||||
"Var1": "test",
|
||||
},
|
||||
wantCount: 0,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "available tokens calculation",
|
||||
limit: 100,
|
||||
data: map[string]interface{}{
|
||||
"Var1": "test",
|
||||
},
|
||||
wantCount: 85,
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Set token limit
|
||||
os.Setenv("TOKEN_LIMIT", fmt.Sprintf("%d", tc.limit))
|
||||
// Set tokenLimit based on environment
|
||||
resetTokenLimit()
|
||||
|
||||
count, err := getAvailableTokensForContent(tmpl, tc.data)
|
||||
|
||||
if tc.wantErr {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tc.wantCount, count)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateContentByTokens(t *testing.T) {
|
||||
// Save current env and restore after test
|
||||
originalLimit := os.Getenv("TOKEN_LIMIT")
|
||||
defer os.Setenv("TOKEN_LIMIT", originalLimit)
|
||||
|
||||
// Set a token limit for testing
|
||||
os.Setenv("TOKEN_LIMIT", "100")
|
||||
// Set tokenLimit based on environment
|
||||
resetTokenLimit()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
availableTokens int
|
||||
wantTruncated bool
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "no truncation needed",
|
||||
content: "short content",
|
||||
availableTokens: 20,
|
||||
wantTruncated: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "disabled by token limit",
|
||||
content: "any content",
|
||||
availableTokens: -1,
|
||||
wantTruncated: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "truncation needed",
|
||||
content: "This is a much longer content that will definitely need to be truncated because it exceeds the available tokens",
|
||||
availableTokens: 10,
|
||||
wantTruncated: true,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty content",
|
||||
content: "",
|
||||
availableTokens: 10,
|
||||
wantTruncated: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "exact token count",
|
||||
content: "one two three four five",
|
||||
availableTokens: 5,
|
||||
wantTruncated: false,
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
result, err := truncateContentByTokens(tc.content, tc.availableTokens)
|
||||
|
||||
if tc.wantErr {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
if tc.wantTruncated {
|
||||
assert.True(t, len(result) < len(tc.content), "Content should be truncated")
|
||||
} else {
|
||||
assert.Equal(t, tc.content, result, "Content should not be truncated")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenLimitIntegration(t *testing.T) {
|
||||
// Save current env and restore after test
|
||||
originalLimit := os.Getenv("TOKEN_LIMIT")
|
||||
defer os.Setenv("TOKEN_LIMIT", originalLimit)
|
||||
|
||||
// Create a test template
|
||||
tmpl := template.Must(template.New("test").Parse(`
|
||||
Template with variables:
|
||||
Language: {{.Language}}
|
||||
Title: {{.Title}}
|
||||
Content: {{.Content}}
|
||||
`))
|
||||
|
||||
// Test data
|
||||
data := map[string]interface{}{
|
||||
"Language": "English",
|
||||
"Title": "Test Document",
|
||||
}
|
||||
|
||||
// Test with different token limits
|
||||
tests := []struct {
|
||||
name string
|
||||
limit int
|
||||
content string
|
||||
wantSize int
|
||||
wantError bool
|
||||
}{
|
||||
{
|
||||
name: "no limit",
|
||||
limit: 0,
|
||||
content: "original content",
|
||||
wantSize: len("original content"),
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
name: "sufficient limit",
|
||||
limit: 1000,
|
||||
content: "original content",
|
||||
wantSize: len("original content"),
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
name: "tight limit",
|
||||
limit: 50,
|
||||
content: "This is a long content that should be truncated to fit within the token limit",
|
||||
wantSize: 50,
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
name: "very small limit",
|
||||
limit: 3,
|
||||
content: "Content too large for small limit",
|
||||
wantError: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Set token limit
|
||||
os.Setenv("TOKEN_LIMIT", fmt.Sprintf("%d", tc.limit))
|
||||
// Set tokenLimit based on environment
|
||||
resetTokenLimit()
|
||||
|
||||
// First get available tokens
|
||||
availableTokens, err := getAvailableTokensForContent(tmpl, data)
|
||||
if tc.wantError {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
// Then truncate content
|
||||
truncated, err := truncateContentByTokens(tc.content, availableTokens)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Finally execute template with truncated content
|
||||
data["Content"] = truncated
|
||||
var result string
|
||||
{
|
||||
var buf bytes.Buffer
|
||||
err = tmpl.Execute(&buf, data)
|
||||
require.NoError(t, err)
|
||||
result = buf.String()
|
||||
}
|
||||
|
||||
// Verify final size is within limit if limit is enabled
|
||||
if tc.limit > 0 {
|
||||
splitter := textsplitter.NewTokenSplitter()
|
||||
tokens, err := splitter.SplitText(result)
|
||||
require.NoError(t, err)
|
||||
assert.LessOrEqual(t, len(tokens), tc.limit)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue