mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-12 12:58:02 -05:00
Refactor getSuggestedTags function to filter out tags not in the available tags list
This commit is contained in:
parent
12e6acd5a9
commit
4ba0b36be7
1 changed files with 31 additions and 14 deletions
45
main.go
45
main.go
|
@ -345,6 +345,9 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
|
|||
// Prepare a list of tag names
|
||||
availableTagNames := make([]string, 0, len(availableTags))
|
||||
for tagName := range availableTags {
|
||||
if tagName == tagToFilter {
|
||||
continue
|
||||
}
|
||||
availableTagNames = append(availableTagNames, tagName)
|
||||
}
|
||||
|
||||
|
@ -400,22 +403,22 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
|
|||
}
|
||||
|
||||
func getSuggestedTags(ctx context.Context, llm llms.Model, content string, suggestedTitle string, availableTags []string) ([]string, error) {
|
||||
likelyLanguage := os.Getenv("LLM_LANGUAGE")
|
||||
if likelyLanguage == "" {
|
||||
likelyLanguage = "English"
|
||||
}
|
||||
likelyLanguage := getLikelyLanguage()
|
||||
|
||||
prompt := fmt.Sprintf(`I will provide you with the content and suggested title of a document. Your task is to select appropriate tags for the document from the list of available tags I will provide. Only select tags from the provided list. Respond only with the selected tags as a comma-separated list, without any additional information. The content is likely in %s.
|
||||
prompt := fmt.Sprintf(`I will provide you with the content and the title of a document. Your task is to select appropriate tags for the document from the list of available tags I will provide. Only select tags from the provided list. Respond only with the selected tags as a comma-separated list, without any additional information. The content is likely in %s.
|
||||
|
||||
Available Tags:
|
||||
%s
|
||||
|
||||
Suggested Title:
|
||||
Title:
|
||||
%s
|
||||
|
||||
Content:
|
||||
%s
|
||||
`, likelyLanguage, strings.Join(availableTags, ", "), suggestedTitle, content)
|
||||
|
||||
Please concisely select the %s tags from the list above that best describe the document.
|
||||
Be very selective and only choose the most relevant tags since too many tags will make the document less discoverable.
|
||||
`, likelyLanguage, strings.Join(availableTags, ", "), suggestedTitle, content, likelyLanguage)
|
||||
|
||||
completion, err := llm.GenerateContent(ctx, []llms.MessageContent{
|
||||
{
|
||||
|
@ -437,16 +440,30 @@ Content:
|
|||
suggestedTags[i] = strings.TrimSpace(tag)
|
||||
}
|
||||
|
||||
return suggestedTags, nil
|
||||
// Filter out tags that are not in the available tags list
|
||||
filteredTags := []string{}
|
||||
for _, tag := range suggestedTags {
|
||||
for _, availableTag := range availableTags {
|
||||
if strings.EqualFold(tag, availableTag) {
|
||||
filteredTags = append(filteredTags, availableTag)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return filteredTags, nil
|
||||
}
|
||||
|
||||
func getLikelyLanguage() string {
|
||||
likelyLanguage := os.Getenv("LLM_LANGUAGE")
|
||||
if likelyLanguage == "" {
|
||||
likelyLanguage = "English"
|
||||
}
|
||||
return strings.Title(strings.ToLower(likelyLanguage))
|
||||
}
|
||||
|
||||
func getSuggestedTitle(ctx context.Context, llm llms.Model, content string) (string, error) {
|
||||
likelyLanguage, ok := os.LookupEnv("LLM_LANGUAGE")
|
||||
if !ok {
|
||||
likelyLanguage = "English"
|
||||
} else {
|
||||
likelyLanguage = strings.Title(strings.ToLower(likelyLanguage))
|
||||
}
|
||||
likelyLanguage := getLikelyLanguage()
|
||||
|
||||
prompt := fmt.Sprintf(`I will provide you with the content of a document that has been partially read by OCR (so it may contain errors).
|
||||
Your task is to find a suitable document title that I can use as the title in the paperless-ngx program.
|
||||
|
|
Loading…
Reference in a new issue