diff --git a/main.go b/main.go index 7c5a3bb..735c661 100644 --- a/main.go +++ b/main.go @@ -51,13 +51,28 @@ type GetDocumentsApiResponse struct { } `json:"results"` } +// Document is a stripped down version of the document object from paperless-ngx. +// Response payload for /documents endpoint and part of request payload for /generate-suggestions endpoint type Document struct { - ID int `json:"id"` - Title string `json:"title"` - Content string `json:"content"` - Tags []string `json:"tags"` - SuggestedTitle string `json:"suggested_title,omitempty"` - SuggestedTags []string `json:"suggested_tags,omitempty"` + ID int `json:"id"` + Title string `json:"title"` + Content string `json:"content"` + Tags []string `json:"tags"` +} + +// GenerateSuggestionsRequest is the request payload for generating suggestions for /generate-suggestions endpoint +type GenerateSuggestionsRequest struct { + Documents []Document `json:"documents"` + GenerateTitles bool `json:"generate_titles,omitempty"` + GenerateTags bool `json:"generate_tags,omitempty"` +} + +// DocumentSuggestion is the response payload for /generate-suggestions endpoint and the request payload for /update-documents endpoint (as an array) +type DocumentSuggestion struct { + ID int `json:"id"` + OriginalDocument Document `json:"original_document"` + SuggestedTitle string `json:"suggested_title,omitempty"` + SuggestedTags []string `json:"suggested_tags,omitempty"` } var ( @@ -207,14 +222,14 @@ func documentsHandler(c *gin.Context) { func generateSuggestionsHandler(c *gin.Context) { ctx := c.Request.Context() - var documents []Document - if err := c.ShouldBindJSON(&documents); err != nil { + var suggestionRequest GenerateSuggestionsRequest + if err := c.ShouldBindJSON(&suggestionRequest); err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid request payload: %v", err)}) log.Printf("Invalid request payload: %v", err) return } - results, err := processDocuments(ctx, documents) + results, err := generateDocumentSuggestions(ctx, suggestionRequest) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error processing documents: %v", err)}) log.Printf("Error processing documents: %v", err) @@ -227,7 +242,7 @@ func generateSuggestionsHandler(c *gin.Context) { // updateDocumentsHandler updates documents with new titles func updateDocumentsHandler(c *gin.Context) { ctx := c.Request.Context() - var documents []Document + var documents []DocumentSuggestion if err := c.ShouldBindJSON(&documents); err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid request payload: %v", err)}) log.Printf("Invalid request payload: %v", err) @@ -348,7 +363,7 @@ func getDocumentsByTags(ctx context.Context, baseURL, apiToken string, tags []st return documents, nil } -func processDocuments(ctx context.Context, documents []Document) ([]Document, error) { +func generateDocumentSuggestions(ctx context.Context, suggestionRequest GenerateSuggestionsRequest) ([]DocumentSuggestion, error) { llm, err := createLLM() if err != nil { return nil, fmt.Errorf("failed to create LLM client: %v", err) @@ -369,6 +384,9 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er availableTagNames = append(availableTagNames, tagName) } + documents := suggestionRequest.Documents + documentSuggestions := []DocumentSuggestion{} + var wg sync.WaitGroup var mu sync.Mutex errors := make([]error, 0) @@ -385,27 +403,50 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er content = content[:5000] } - suggestedTitle, err := getSuggestedTitle(ctx, llm, content) - if err != nil { - mu.Lock() - errors = append(errors, fmt.Errorf("Document %d: %v", documentID, err)) - mu.Unlock() - log.Printf("Error processing document %d: %v", documentID, err) - return + var suggestedTitle string + var suggestedTags []string + + if suggestionRequest.GenerateTitles { + suggestedTitle, err = getSuggestedTitle(ctx, llm, content) + if err != nil { + mu.Lock() + errors = append(errors, fmt.Errorf("Document %d: %v", documentID, err)) + mu.Unlock() + log.Printf("Error processing document %d: %v", documentID, err) + return + } } - suggestedTags, err := getSuggestedTags(ctx, llm, content, suggestedTitle, availableTagNames) - if err != nil { - mu.Lock() - errors = append(errors, fmt.Errorf("Document %d: %v", documentID, err)) - mu.Unlock() - log.Printf("Error generating tags for document %d: %v", documentID, err) - return + if suggestionRequest.GenerateTags { + suggestedTags, err = getSuggestedTags(ctx, llm, content, suggestedTitle, availableTagNames) + if err != nil { + mu.Lock() + errors = append(errors, fmt.Errorf("Document %d: %v", documentID, err)) + mu.Unlock() + log.Printf("Error generating tags for document %d: %v", documentID, err) + return + } } mu.Lock() - doc.SuggestedTitle = suggestedTitle - doc.SuggestedTags = suggestedTags + suggestion := DocumentSuggestion{ + ID: documentID, + OriginalDocument: *doc, + } + // Titles + if suggestionRequest.GenerateTitles { + suggestion.SuggestedTitle = suggestedTitle + } else { + suggestion.SuggestedTitle = doc.Title + } + + // Tags + if suggestionRequest.GenerateTags { + suggestion.SuggestedTags = suggestedTags + } else { + suggestion.SuggestedTags = removeTagFromList(doc.Tags, tagToFilter) + } + documentSuggestions = append(documentSuggestions, suggestion) mu.Unlock() log.Printf("Document %d processed successfully.", documentID) }(&documents[i]) @@ -417,7 +458,17 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er return nil, errors[0] } - return documents, nil + return documentSuggestions, nil +} + +func removeTagFromList(tags []string, tagToRemove string) []string { + filteredTags := []string{} + for _, tag := range tags { + if tag != tagToRemove { + filteredTags = append(filteredTags, tag) + } + } + return filteredTags } func getSuggestedTags(ctx context.Context, llm llms.Model, content string, suggestedTitle string, availableTags []string) ([]string, error) { @@ -507,7 +558,7 @@ Content: return strings.TrimSpace(strings.Trim(completion.Choices[0].Content, "\"")), nil } -func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []Document) error { +func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []DocumentSuggestion) error { client := &http.Client{} // Fetch all available tags @@ -524,8 +575,13 @@ func updateDocuments(ctx context.Context, baseURL, apiToken string, documents [] newTags := []int{} + tags := document.SuggestedTags + if len(tags) == 0 { + tags = document.OriginalDocument.Tags + } + // Map suggested tag names to IDs - for _, tagName := range document.SuggestedTags { + for _, tagName := range tags { if tagID, exists := availableTags[tagName]; exists { // Skip the tag that we are filtering if tagName == tagToFilter { @@ -537,13 +593,20 @@ func updateDocuments(ctx context.Context, baseURL, apiToken string, documents [] } } - updatedFields["tags"] = newTags - + if len(newTags) > 0 { + updatedFields["tags"] = newTags + } else { + log.Printf("No valid tags found for document %d, skipping.", documentID) + } suggestedTitle := document.SuggestedTitle if len(suggestedTitle) > 128 { suggestedTitle = suggestedTitle[:128] } - updatedFields["title"] = suggestedTitle + if suggestedTitle != "" { + updatedFields["title"] = suggestedTitle + } else { + log.Printf("No valid title found for document %d, skipping.", documentID) + } // Send the update request url := fmt.Sprintf("%s/api/documents/%d/", baseURL, documentID) diff --git a/web-app/src/components/DocumentProcessor.tsx b/web-app/src/components/DocumentProcessor.tsx index a1bea32..70f5704 100644 --- a/web-app/src/components/DocumentProcessor.tsx +++ b/web-app/src/components/DocumentProcessor.tsx @@ -15,44 +15,58 @@ interface Document { title: string; content: string; tags: string[]; - suggested_title?: string; - suggested_tags?: { value: string; label: string }[]; } -type ApiDocument = Omit & { +interface GenerateSuggestionsRequest { + documents: Document[]; + generate_titles?: boolean; + generate_tags?: boolean; +} + +interface DocumentSuggestion { + id: number; + original_document: Document; + suggested_title?: string; suggested_tags?: string[]; -}; +} const DocumentProcessor: React.FC = () => { const [documents, setDocuments] = useState([]); - const [availableTags, setAvailableTags] = useState<{ value: string; label: string }[]>([]); + const [documentSuggestions, setDocumentSuggestions] = useState< + DocumentSuggestion[] + >([]); + const [availableTags, setAvailableTags] = useState< + { value: string; label: string }[] + >([]); const [loading, setLoading] = useState(true); const [processing, setProcessing] = useState(false); const [updating, setUpdating] = useState(false); const [successModalOpen, setSuccessModalOpen] = useState(false); const [filterTag, setFilterTag] = useState(undefined); + const [generateTitles, setGenerateTitles] = useState(true); + const [generateTags, setGenerateTags] = useState(true); useEffect(() => { const fetchData = async () => { try { const [filterTagResponse, documentsResponse, tagsResponse] = await Promise.all([ - axios.get("/api/filter-tag"), - axios.get("/api/documents"), - axios.get("/api/tags"), + axios.get< + { tag: string } | undefined + > + ("/api/filter-tag"), + axios.get< + Document[] + >("/api/documents"), + axios.get<{ + [tag: string]: number; + }>("/api/tags"), ]); setFilterTag(filterTagResponse.data?.tag); - const rawDocuments = documentsResponse.data as ApiDocument[]; - const documents = rawDocuments.map((doc) => ({ - ...doc, - suggested_tags: doc.tags.map((tag) => ({ value: tag, label: tag })), - })); - console.log(documents); - setDocuments(documents); + setDocuments(documentsResponse.data); // Store available tags as objects with value and label - // tagsResponse.data is a map of name to id const tags = Object.entries(tagsResponse.data).map(([name]) => ({ value: name, label: name, @@ -71,16 +85,17 @@ const DocumentProcessor: React.FC = () => { const handleProcessDocuments = async () => { setProcessing(true); try { - const apiDocuments: ApiDocument[] = documents.map((doc) => ({ - ...doc, - suggested_tags: doc.suggested_tags?.map((tag) => tag.value) || [], - })); + const requestPayload: GenerateSuggestionsRequest = { + documents, + generate_titles: generateTitles, + generate_tags: generateTags, + }; - const response = await axios.post("/api/generate-suggestions", apiDocuments); - setDocuments(response.data.map((doc) => ({ - ...doc, - suggested_tags: doc.suggested_tags?.map((tag) => ({ value: tag, label: tag })) || [], - }))); + const response = await axios.post( + "/api/generate-suggestions", + requestPayload + ); + setDocumentSuggestions(response.data); } catch (error) { console.error("Error generating suggestions:", error); } finally { @@ -91,13 +106,9 @@ const DocumentProcessor: React.FC = () => { const handleUpdateDocuments = async () => { setUpdating(true); try { - const apiDocuments: ApiDocument[] = documents.map((doc) => ({ - ...doc, - tags: [], // Remove tags from the API document - suggested_tags: doc.suggested_tags?.map((tag) => tag.value) || [], - })); - await axios.patch("/api/update-documents", apiDocuments); + await axios.patch("/api/update-documents", documentSuggestions); setSuccessModalOpen(true); + resetSuggestions(); } catch (error) { console.error("Error updating documents:", error); } finally { @@ -106,17 +117,12 @@ const DocumentProcessor: React.FC = () => { }; const resetSuggestions = () => { - const resetDocs = documents.map((doc) => ({ - ...doc, - suggested_title: undefined, - suggested_tags: [], - })); - setDocuments(resetDocs); + setDocumentSuggestions([]); }; const fetchDocuments = async () => { try { - const response = await axios.get("/api/documents"); // API endpoint to fetch documents + const response = await axios.get("/api/documents"); setDocuments(response.data); } catch (error) { console.error("Error fetching documents:", error); @@ -171,7 +177,7 @@ const DocumentProcessor: React.FC = () => { )} - {!documents.some((doc) => doc.suggested_title) && ( + {documentSuggestions.length === 0 && (

@@ -196,120 +202,130 @@ const DocumentProcessor: React.FC = () => { {processing ? "Processing..." : "Generate Suggestions"}

-
- - - - - - - - - {documents.map((doc) => ( - - - - - ))} - -
- ID - - Title -
- {doc.id} - - {doc.title} -
+
+ + +
+
+ {documents.map((doc) => ( +
+

{doc.title}

+
+                  {doc.content.length > 100 ? `${doc.content.substring(0, 100)}...` : doc.content}
+                
+
+ {doc.tags.map((tag, index) => ( + + {tag} + + ))} +
+
+
+

{doc.title}

+
+                      {doc.content}
+                    
+
+ {doc.tags.map((tag, index) => ( + + {tag} + + ))} +
+
+
+
+ ))}
)} - {documents.some((doc) => doc.suggested_title) && ( + {documentSuggestions.length > 0 && (

Review and Edit Suggested Titles

-
- - - - - - - - - - - {documents.map( - (doc) => - doc.suggested_title && ( - - - - - - - ) - )} - -
- ID - - Original Title - - Suggested Title - - Suggested Tags -
- {doc.id} - - {doc.title} - - { - const updatedDocuments = documents.map((d) => - d.id === doc.id - ? { ...d, suggested_title: e.target.value } - : d - ); - setDocuments(updatedDocuments); - }} - className="w-full border border-gray-300 rounded px-2 py-1 focus:outline-none focus:ring-2 focus:ring-blue-500" - /> - - { - const updatedTags = [...(doc.suggested_tags || []), { value: tag.value as string, label: tag.label }]; - const updatedDocuments = documents.map((d) => - d.id === doc.id - ? { ...d, suggested_tags: updatedTags } - : d - ); - setDocuments(updatedDocuments); - }} - onDelete={(i) => { - const updatedTags = doc.suggested_tags?.filter( - (_, index) => index !== i - ); - const updatedDocuments = documents.map((d) => - d.id === doc.id - ? { ...d, suggested_tags: updatedTags } - : d - ); - setDocuments(updatedDocuments); - }} - allowNew={false} - placeholderText="Add a tag" - /> -
+
+ {documentSuggestions.map((doc) => ( +
+

+ {doc.original_document.title} +

+ { + const updatedSuggestions = documentSuggestions.map((d) => + d.id === doc.id + ? { ...d, suggested_title: e.target.value } + : d + ); + setDocumentSuggestions(updatedSuggestions); + }} + className="w-full border border-gray-300 rounded px-2 py-1 mt-2 focus:outline-none focus:ring-2 focus:ring-blue-500" + /> +
+ ({ + value: tag, + label: tag, + })) || [] + } + suggestions={availableTags} + onAdd={(tag) => { + const tagValue = tag.value as string; + const updatedTags = [ + ...(doc.suggested_tags || []), + tagValue, + ]; + const updatedSuggestions = documentSuggestions.map((d) => + d.id === doc.id + ? { ...d, suggested_tags: updatedTags } + : d + ); + setDocumentSuggestions(updatedSuggestions); + }} + onDelete={(i) => { + const updatedTags = doc.suggested_tags?.filter( + (_, index) => index !== i + ); + const updatedSuggestions = documentSuggestions.map((d) => + d.id === doc.id + ? { ...d, suggested_tags: updatedTags } + : d + ); + setDocumentSuggestions(updatedSuggestions); + }} + allowNew={false} + placeholderText="Add a tag" + /> +
+
+ ))}
-
+