mirror of
https://github.com/icereed/paperless-gpt.git
synced 2025-03-12 21:08:00 -05:00
Initial support for auto-tagging
This commit is contained in:
parent
47275e277a
commit
26a160209f
2 changed files with 154 additions and 8 deletions
120
main.go
120
main.go
|
@ -57,6 +57,7 @@ type Document struct {
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
Tags []int `json:"tags"`
|
Tags []int `json:"tags"`
|
||||||
SuggestedTitle string `json:"suggested_title,omitempty"`
|
SuggestedTitle string `json:"suggested_title,omitempty"`
|
||||||
|
SuggestedTags []string `json:"suggested_tags,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -130,6 +131,46 @@ func createLLM() (llms.Model, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getAllTags(ctx context.Context, baseURL, apiToken string) (map[string]int, error) {
|
||||||
|
url := fmt.Sprintf("%s/api/tags/", baseURL)
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("Authorization", fmt.Sprintf("Token %s", apiToken))
|
||||||
|
|
||||||
|
client := &http.Client{}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||||
|
return nil, fmt.Errorf("Error fetching tags: %d, %s", resp.StatusCode, string(bodyBytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
var tagsResponse struct {
|
||||||
|
Results []struct {
|
||||||
|
ID int `json:"id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
} `json:"results"`
|
||||||
|
}
|
||||||
|
|
||||||
|
err = json.NewDecoder(resp.Body).Decode(&tagsResponse)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tagIDMapping := make(map[string]int)
|
||||||
|
for _, tag := range tagsResponse.Results {
|
||||||
|
tagIDMapping[tag.Name] = tag.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
return tagIDMapping, nil
|
||||||
|
}
|
||||||
|
|
||||||
// documentsHandler returns documents with the specific tag
|
// documentsHandler returns documents with the specific tag
|
||||||
func documentsHandler(c *gin.Context) {
|
func documentsHandler(c *gin.Context) {
|
||||||
ctx := c.Request.Context()
|
ctx := c.Request.Context()
|
||||||
|
@ -290,6 +331,18 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
|
||||||
return nil, fmt.Errorf("failed to create LLM client: %v", err)
|
return nil, fmt.Errorf("failed to create LLM client: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fetch all available tags from paperless-ngx
|
||||||
|
availableTags, err := getAllTags(ctx, paperlessBaseURL, paperlessAPIToken)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch available tags: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepare a list of tag names
|
||||||
|
availableTagNames := make([]string, 0, len(availableTags))
|
||||||
|
for tagName := range availableTags {
|
||||||
|
availableTagNames = append(availableTagNames, tagName)
|
||||||
|
}
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
var mu sync.Mutex
|
var mu sync.Mutex
|
||||||
errors := make([]error, 0)
|
errors := make([]error, 0)
|
||||||
|
@ -315,8 +368,18 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
suggestedTags, err := getSuggestedTags(ctx, llm, content, suggestedTitle, availableTagNames)
|
||||||
|
if err != nil {
|
||||||
|
mu.Lock()
|
||||||
|
errors = append(errors, fmt.Errorf("Document %d: %v", documentID, err))
|
||||||
|
mu.Unlock()
|
||||||
|
log.Printf("Error generating tags for document %d: %v", documentID, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
doc.SuggestedTitle = suggestedTitle
|
doc.SuggestedTitle = suggestedTitle
|
||||||
|
doc.SuggestedTags = suggestedTags
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
log.Printf("Document %d processed successfully.", documentID)
|
log.Printf("Document %d processed successfully.", documentID)
|
||||||
}(&documents[i])
|
}(&documents[i])
|
||||||
|
@ -331,6 +394,47 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
|
||||||
return documents, nil
|
return documents, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getSuggestedTags(ctx context.Context, llm llms.Model, content string, suggestedTitle string, availableTags []string) ([]string, error) {
|
||||||
|
likelyLanguage := os.Getenv("LLM_LANGUAGE")
|
||||||
|
if likelyLanguage == "" {
|
||||||
|
likelyLanguage = "English"
|
||||||
|
}
|
||||||
|
|
||||||
|
prompt := fmt.Sprintf(`I will provide you with the content and suggested title of a document. Your task is to select appropriate tags for the document from the list of available tags I will provide. Only select tags from the provided list. Respond only with the selected tags as a comma-separated list, without any additional information. The content is likely in %s.
|
||||||
|
|
||||||
|
Available Tags:
|
||||||
|
%s
|
||||||
|
|
||||||
|
Suggested Title:
|
||||||
|
%s
|
||||||
|
|
||||||
|
Content:
|
||||||
|
%s
|
||||||
|
`, likelyLanguage, strings.Join(availableTags, ", "), suggestedTitle, content)
|
||||||
|
|
||||||
|
completion, err := llm.GenerateContent(ctx, []llms.MessageContent{
|
||||||
|
{
|
||||||
|
Parts: []llms.ContentPart{
|
||||||
|
llms.TextContent{
|
||||||
|
Text: prompt,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Role: llms.ChatMessageTypeHuman,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("Error getting response from LLM: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
response := strings.TrimSpace(completion.Choices[0].Content)
|
||||||
|
suggestedTags := strings.Split(response, ",")
|
||||||
|
for i, tag := range suggestedTags {
|
||||||
|
suggestedTags[i] = strings.TrimSpace(tag)
|
||||||
|
}
|
||||||
|
|
||||||
|
return suggestedTags, nil
|
||||||
|
}
|
||||||
|
|
||||||
func getSuggestedTitle(ctx context.Context, llm llms.Model, content string) (string, error) {
|
func getSuggestedTitle(ctx context.Context, llm llms.Model, content string) (string, error) {
|
||||||
likelyLanguage, ok := os.LookupEnv("LLM_LANGUAGE")
|
likelyLanguage, ok := os.LookupEnv("LLM_LANGUAGE")
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -366,6 +470,13 @@ Content:
|
||||||
func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []Document, paperlessGptTagID int) error {
|
func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []Document, paperlessGptTagID int) error {
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
|
|
||||||
|
// Fetch all available tags
|
||||||
|
availableTags, err := getAllTags(ctx, baseURL, apiToken)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("Error fetching available tags: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
for _, document := range documents {
|
for _, document := range documents {
|
||||||
documentID := document.ID
|
documentID := document.ID
|
||||||
|
|
||||||
|
@ -378,6 +489,15 @@ func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Map suggested tag names to IDs
|
||||||
|
for _, tagName := range document.SuggestedTags {
|
||||||
|
if tagID, exists := availableTags[tagName]; exists {
|
||||||
|
newTags = append(newTags, tagID)
|
||||||
|
} else {
|
||||||
|
log.Printf("Tag '%s' does not exist in paperless-ngx, skipping.", tagName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
updatedFields["tags"] = newTags
|
updatedFields["tags"] = newTags
|
||||||
|
|
||||||
suggestedTitle := document.SuggestedTitle
|
suggestedTitle := document.SuggestedTitle
|
||||||
|
|
|
@ -13,6 +13,7 @@ interface Document {
|
||||||
title: string;
|
title: string;
|
||||||
content: string;
|
content: string;
|
||||||
suggested_title?: string;
|
suggested_title?: string;
|
||||||
|
suggested_tags?: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
const DocumentProcessor: React.FC = () => {
|
const DocumentProcessor: React.FC = () => {
|
||||||
|
@ -30,7 +31,7 @@ const DocumentProcessor: React.FC = () => {
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error fetching filter tag:", error);
|
console.error("Error fetching filter tag:", error);
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
const fetchDocuments = async () => {
|
const fetchDocuments = async () => {
|
||||||
try {
|
try {
|
||||||
|
@ -109,8 +110,10 @@ const DocumentProcessor: React.FC = () => {
|
||||||
<div className="flex items-center justify-center h-screen">
|
<div className="flex items-center justify-center h-screen">
|
||||||
<div className="text-xl font-semibold">
|
<div className="text-xl font-semibold">
|
||||||
No documents found with filter tag{" "}
|
No documents found with filter tag{" "}
|
||||||
<span className="bg-blue-100 text-blue-800 text-sm font-medium me-2 px-2.5 py-0.5 rounded dark:bg-blue-900 dark:text-blue-300bg-blue-100 text-blue-800 text-xs font-medium me-2 px-2.5 py-0.5 rounded-full dark:bg-blue-900 dark:text-blue-300">{filterTag}</span>
|
<span className="bg-blue-100 text-blue-800 text-sm font-medium me-2 px-2.5 py-0.5 rounded dark:bg-blue-900 dark:text-blue-300bg-blue-100 text-blue-800 text-xs font-medium me-2 px-2.5 py-0.5 rounded-full dark:bg-blue-900 dark:text-blue-300">
|
||||||
{" "}found. Try{" "}
|
{filterTag}
|
||||||
|
</span>{" "}
|
||||||
|
found. Try{" "}
|
||||||
<button
|
<button
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
setDocuments([]);
|
setDocuments([]);
|
||||||
|
@ -199,6 +202,9 @@ const DocumentProcessor: React.FC = () => {
|
||||||
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
|
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
|
||||||
Suggested Title
|
Suggested Title
|
||||||
</th>
|
</th>
|
||||||
|
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
|
||||||
|
Suggested Tags
|
||||||
|
</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody className="bg-white divide-y divide-gray-200">
|
<tbody className="bg-white divide-y divide-gray-200">
|
||||||
|
@ -227,6 +233,26 @@ const DocumentProcessor: React.FC = () => {
|
||||||
className="w-full border border-gray-300 rounded px-2 py-1 focus:outline-none focus:ring-2 focus:ring-blue-500"
|
className="w-full border border-gray-300 rounded px-2 py-1 focus:outline-none focus:ring-2 focus:ring-blue-500"
|
||||||
/>
|
/>
|
||||||
</td>
|
</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-900">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={doc.suggested_tags?.join(", ")}
|
||||||
|
onChange={(e) => {
|
||||||
|
const updatedDocuments = documents.map((d) =>
|
||||||
|
d.id === doc.id
|
||||||
|
? {
|
||||||
|
...d,
|
||||||
|
suggested_tags: e.target.value
|
||||||
|
.split(",")
|
||||||
|
.map((tag) => tag.trim()),
|
||||||
|
}
|
||||||
|
: d
|
||||||
|
);
|
||||||
|
setDocuments(updatedDocuments);
|
||||||
|
}}
|
||||||
|
className="w-full border border-gray-300 rounded px-2 py-1 focus:outline-none focus:ring-2 focus:ring-blue-500"
|
||||||
|
/>
|
||||||
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
)
|
)
|
||||||
)}
|
)}
|
||||||
|
|
Loading…
Reference in a new issue