mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-12 21:08:00 -05:00
Initial support for auto-tagging
This commit is contained in:
parent
47275e277a
commit
26a160209f
2 changed files with 154 additions and 8 deletions
120
main.go
120
main.go
|
@ -57,6 +57,7 @@ type Document struct {
|
|||
Content string `json:"content"`
|
||||
Tags []int `json:"tags"`
|
||||
SuggestedTitle string `json:"suggested_title,omitempty"`
|
||||
SuggestedTags []string `json:"suggested_tags,omitempty"`
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -130,6 +131,46 @@ func createLLM() (llms.Model, error) {
|
|||
}
|
||||
}
|
||||
|
||||
func getAllTags(ctx context.Context, baseURL, apiToken string) (map[string]int, error) {
|
||||
url := fmt.Sprintf("%s/api/tags/", baseURL)
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Token %s", apiToken))
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("Error fetching tags: %d, %s", resp.StatusCode, string(bodyBytes))
|
||||
}
|
||||
|
||||
var tagsResponse struct {
|
||||
Results []struct {
|
||||
ID int `json:"id"`
|
||||
Name string `json:"name"`
|
||||
} `json:"results"`
|
||||
}
|
||||
|
||||
err = json.NewDecoder(resp.Body).Decode(&tagsResponse)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tagIDMapping := make(map[string]int)
|
||||
for _, tag := range tagsResponse.Results {
|
||||
tagIDMapping[tag.Name] = tag.ID
|
||||
}
|
||||
|
||||
return tagIDMapping, nil
|
||||
}
|
||||
|
||||
// documentsHandler returns documents with the specific tag
|
||||
func documentsHandler(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
|
@ -290,6 +331,18 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
|
|||
return nil, fmt.Errorf("failed to create LLM client: %v", err)
|
||||
}
|
||||
|
||||
// Fetch all available tags from paperless-ngx
|
||||
availableTags, err := getAllTags(ctx, paperlessBaseURL, paperlessAPIToken)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch available tags: %v", err)
|
||||
}
|
||||
|
||||
// Prepare a list of tag names
|
||||
availableTagNames := make([]string, 0, len(availableTags))
|
||||
for tagName := range availableTags {
|
||||
availableTagNames = append(availableTagNames, tagName)
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
var mu sync.Mutex
|
||||
errors := make([]error, 0)
|
||||
|
@ -315,8 +368,18 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
|
|||
return
|
||||
}
|
||||
|
||||
suggestedTags, err := getSuggestedTags(ctx, llm, content, suggestedTitle, availableTagNames)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
errors = append(errors, fmt.Errorf("Document %d: %v", documentID, err))
|
||||
mu.Unlock()
|
||||
log.Printf("Error generating tags for document %d: %v", documentID, err)
|
||||
return
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
doc.SuggestedTitle = suggestedTitle
|
||||
doc.SuggestedTags = suggestedTags
|
||||
mu.Unlock()
|
||||
log.Printf("Document %d processed successfully.", documentID)
|
||||
}(&documents[i])
|
||||
|
@ -331,6 +394,47 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
|
|||
return documents, nil
|
||||
}
|
||||
|
||||
func getSuggestedTags(ctx context.Context, llm llms.Model, content string, suggestedTitle string, availableTags []string) ([]string, error) {
|
||||
likelyLanguage := os.Getenv("LLM_LANGUAGE")
|
||||
if likelyLanguage == "" {
|
||||
likelyLanguage = "English"
|
||||
}
|
||||
|
||||
prompt := fmt.Sprintf(`I will provide you with the content and suggested title of a document. Your task is to select appropriate tags for the document from the list of available tags I will provide. Only select tags from the provided list. Respond only with the selected tags as a comma-separated list, without any additional information. The content is likely in %s.
|
||||
|
||||
Available Tags:
|
||||
%s
|
||||
|
||||
Suggested Title:
|
||||
%s
|
||||
|
||||
Content:
|
||||
%s
|
||||
`, likelyLanguage, strings.Join(availableTags, ", "), suggestedTitle, content)
|
||||
|
||||
completion, err := llm.GenerateContent(ctx, []llms.MessageContent{
|
||||
{
|
||||
Parts: []llms.ContentPart{
|
||||
llms.TextContent{
|
||||
Text: prompt,
|
||||
},
|
||||
},
|
||||
Role: llms.ChatMessageTypeHuman,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error getting response from LLM: %v", err)
|
||||
}
|
||||
|
||||
response := strings.TrimSpace(completion.Choices[0].Content)
|
||||
suggestedTags := strings.Split(response, ",")
|
||||
for i, tag := range suggestedTags {
|
||||
suggestedTags[i] = strings.TrimSpace(tag)
|
||||
}
|
||||
|
||||
return suggestedTags, nil
|
||||
}
|
||||
|
||||
func getSuggestedTitle(ctx context.Context, llm llms.Model, content string) (string, error) {
|
||||
likelyLanguage, ok := os.LookupEnv("LLM_LANGUAGE")
|
||||
if !ok {
|
||||
|
@ -366,6 +470,13 @@ Content:
|
|||
func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []Document, paperlessGptTagID int) error {
|
||||
client := &http.Client{}
|
||||
|
||||
// Fetch all available tags
|
||||
availableTags, err := getAllTags(ctx, baseURL, apiToken)
|
||||
if err != nil {
|
||||
log.Printf("Error fetching available tags: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
for _, document := range documents {
|
||||
documentID := document.ID
|
||||
|
||||
|
@ -378,6 +489,15 @@ func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []
|
|||
}
|
||||
}
|
||||
|
||||
// Map suggested tag names to IDs
|
||||
for _, tagName := range document.SuggestedTags {
|
||||
if tagID, exists := availableTags[tagName]; exists {
|
||||
newTags = append(newTags, tagID)
|
||||
} else {
|
||||
log.Printf("Tag '%s' does not exist in paperless-ngx, skipping.", tagName)
|
||||
}
|
||||
}
|
||||
|
||||
updatedFields["tags"] = newTags
|
||||
|
||||
suggestedTitle := document.SuggestedTitle
|
||||
|
|
|
@ -13,6 +13,7 @@ interface Document {
|
|||
title: string;
|
||||
content: string;
|
||||
suggested_title?: string;
|
||||
suggested_tags?: string[];
|
||||
}
|
||||
|
||||
const DocumentProcessor: React.FC = () => {
|
||||
|
@ -30,7 +31,7 @@ const DocumentProcessor: React.FC = () => {
|
|||
} catch (error) {
|
||||
console.error("Error fetching filter tag:", error);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const fetchDocuments = async () => {
|
||||
try {
|
||||
|
@ -109,8 +110,10 @@ const DocumentProcessor: React.FC = () => {
|
|||
<div className="flex items-center justify-center h-screen">
|
||||
<div className="text-xl font-semibold">
|
||||
No documents found with filter tag{" "}
|
||||
<span className="bg-blue-100 text-blue-800 text-sm font-medium me-2 px-2.5 py-0.5 rounded dark:bg-blue-900 dark:text-blue-300bg-blue-100 text-blue-800 text-xs font-medium me-2 px-2.5 py-0.5 rounded-full dark:bg-blue-900 dark:text-blue-300">{filterTag}</span>
|
||||
{" "}found. Try{" "}
|
||||
<span className="bg-blue-100 text-blue-800 text-sm font-medium me-2 px-2.5 py-0.5 rounded dark:bg-blue-900 dark:text-blue-300bg-blue-100 text-blue-800 text-xs font-medium me-2 px-2.5 py-0.5 rounded-full dark:bg-blue-900 dark:text-blue-300">
|
||||
{filterTag}
|
||||
</span>{" "}
|
||||
found. Try{" "}
|
||||
<button
|
||||
onClick={() => {
|
||||
setDocuments([]);
|
||||
|
@ -199,6 +202,9 @@ const DocumentProcessor: React.FC = () => {
|
|||
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
|
||||
Suggested Title
|
||||
</th>
|
||||
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
|
||||
Suggested Tags
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="bg-white divide-y divide-gray-200">
|
||||
|
@ -227,6 +233,26 @@ const DocumentProcessor: React.FC = () => {
|
|||
className="w-full border border-gray-300 rounded px-2 py-1 focus:outline-none focus:ring-2 focus:ring-blue-500"
|
||||
/>
|
||||
</td>
|
||||
<td className="px-4 py-3 text-sm text-gray-900">
|
||||
<input
|
||||
type="text"
|
||||
value={doc.suggested_tags?.join(", ")}
|
||||
onChange={(e) => {
|
||||
const updatedDocuments = documents.map((d) =>
|
||||
d.id === doc.id
|
||||
? {
|
||||
...d,
|
||||
suggested_tags: e.target.value
|
||||
.split(",")
|
||||
.map((tag) => tag.trim()),
|
||||
}
|
||||
: d
|
||||
);
|
||||
setDocuments(updatedDocuments);
|
||||
}}
|
||||
className="w-full border border-gray-300 rounded px-2 py-1 focus:outline-none focus:ring-2 focus:ring-blue-500"
|
||||
/>
|
||||
</td>
|
||||
</tr>
|
||||
)
|
||||
)}
|
||||
|
|
Loading…
Reference in a new issue