Redesign for optional titles or tags

This commit is contained in:
Dominik Schröter 2024-10-07 13:40:17 +02:00
parent 4776486fab
commit 51bf716854
2 changed files with 257 additions and 178 deletions

89
main.go
View file

@ -51,11 +51,26 @@ type GetDocumentsApiResponse struct {
} `json:"results"`
}
// Document is a stripped down version of the document object from paperless-ngx.
// Response payload for /documents endpoint and part of request payload for /generate-suggestions endpoint
type Document struct {
ID int `json:"id"`
Title string `json:"title"`
Content string `json:"content"`
Tags []string `json:"tags"`
}
// GenerateSuggestionsRequest is the request payload for generating suggestions for /generate-suggestions endpoint
type GenerateSuggestionsRequest struct {
Documents []Document `json:"documents"`
GenerateTitles bool `json:"generate_titles,omitempty"`
GenerateTags bool `json:"generate_tags,omitempty"`
}
// DocumentSuggestion is the response payload for /generate-suggestions endpoint and the request payload for /update-documents endpoint (as an array)
type DocumentSuggestion struct {
ID int `json:"id"`
OriginalDocument Document `json:"original_document"`
SuggestedTitle string `json:"suggested_title,omitempty"`
SuggestedTags []string `json:"suggested_tags,omitempty"`
}
@ -207,14 +222,14 @@ func documentsHandler(c *gin.Context) {
func generateSuggestionsHandler(c *gin.Context) {
ctx := c.Request.Context()
var documents []Document
if err := c.ShouldBindJSON(&documents); err != nil {
var suggestionRequest GenerateSuggestionsRequest
if err := c.ShouldBindJSON(&suggestionRequest); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid request payload: %v", err)})
log.Printf("Invalid request payload: %v", err)
return
}
results, err := processDocuments(ctx, documents)
results, err := generateDocumentSuggestions(ctx, suggestionRequest)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Error processing documents: %v", err)})
log.Printf("Error processing documents: %v", err)
@ -227,7 +242,7 @@ func generateSuggestionsHandler(c *gin.Context) {
// updateDocumentsHandler updates documents with new titles
func updateDocumentsHandler(c *gin.Context) {
ctx := c.Request.Context()
var documents []Document
var documents []DocumentSuggestion
if err := c.ShouldBindJSON(&documents); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid request payload: %v", err)})
log.Printf("Invalid request payload: %v", err)
@ -348,7 +363,7 @@ func getDocumentsByTags(ctx context.Context, baseURL, apiToken string, tags []st
return documents, nil
}
func processDocuments(ctx context.Context, documents []Document) ([]Document, error) {
func generateDocumentSuggestions(ctx context.Context, suggestionRequest GenerateSuggestionsRequest) ([]DocumentSuggestion, error) {
llm, err := createLLM()
if err != nil {
return nil, fmt.Errorf("failed to create LLM client: %v", err)
@ -369,6 +384,9 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
availableTagNames = append(availableTagNames, tagName)
}
documents := suggestionRequest.Documents
documentSuggestions := []DocumentSuggestion{}
var wg sync.WaitGroup
var mu sync.Mutex
errors := make([]error, 0)
@ -385,7 +403,11 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
content = content[:5000]
}
suggestedTitle, err := getSuggestedTitle(ctx, llm, content)
var suggestedTitle string
var suggestedTags []string
if suggestionRequest.GenerateTitles {
suggestedTitle, err = getSuggestedTitle(ctx, llm, content)
if err != nil {
mu.Lock()
errors = append(errors, fmt.Errorf("Document %d: %v", documentID, err))
@ -393,8 +415,10 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
log.Printf("Error processing document %d: %v", documentID, err)
return
}
}
suggestedTags, err := getSuggestedTags(ctx, llm, content, suggestedTitle, availableTagNames)
if suggestionRequest.GenerateTags {
suggestedTags, err = getSuggestedTags(ctx, llm, content, suggestedTitle, availableTagNames)
if err != nil {
mu.Lock()
errors = append(errors, fmt.Errorf("Document %d: %v", documentID, err))
@ -402,10 +426,27 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
log.Printf("Error generating tags for document %d: %v", documentID, err)
return
}
}
mu.Lock()
doc.SuggestedTitle = suggestedTitle
doc.SuggestedTags = suggestedTags
suggestion := DocumentSuggestion{
ID: documentID,
OriginalDocument: *doc,
}
// Titles
if suggestionRequest.GenerateTitles {
suggestion.SuggestedTitle = suggestedTitle
} else {
suggestion.SuggestedTitle = doc.Title
}
// Tags
if suggestionRequest.GenerateTags {
suggestion.SuggestedTags = suggestedTags
} else {
suggestion.SuggestedTags = removeTagFromList(doc.Tags, tagToFilter)
}
documentSuggestions = append(documentSuggestions, suggestion)
mu.Unlock()
log.Printf("Document %d processed successfully.", documentID)
}(&documents[i])
@ -417,7 +458,17 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
return nil, errors[0]
}
return documents, nil
return documentSuggestions, nil
}
func removeTagFromList(tags []string, tagToRemove string) []string {
filteredTags := []string{}
for _, tag := range tags {
if tag != tagToRemove {
filteredTags = append(filteredTags, tag)
}
}
return filteredTags
}
func getSuggestedTags(ctx context.Context, llm llms.Model, content string, suggestedTitle string, availableTags []string) ([]string, error) {
@ -507,7 +558,7 @@ Content:
return strings.TrimSpace(strings.Trim(completion.Choices[0].Content, "\"")), nil
}
func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []Document) error {
func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []DocumentSuggestion) error {
client := &http.Client{}
// Fetch all available tags
@ -524,8 +575,13 @@ func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []
newTags := []int{}
tags := document.SuggestedTags
if len(tags) == 0 {
tags = document.OriginalDocument.Tags
}
// Map suggested tag names to IDs
for _, tagName := range document.SuggestedTags {
for _, tagName := range tags {
if tagID, exists := availableTags[tagName]; exists {
// Skip the tag that we are filtering
if tagName == tagToFilter {
@ -537,13 +593,20 @@ func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []
}
}
if len(newTags) > 0 {
updatedFields["tags"] = newTags
} else {
log.Printf("No valid tags found for document %d, skipping.", documentID)
}
suggestedTitle := document.SuggestedTitle
if len(suggestedTitle) > 128 {
suggestedTitle = suggestedTitle[:128]
}
if suggestedTitle != "" {
updatedFields["title"] = suggestedTitle
} else {
log.Printf("No valid title found for document %d, skipping.", documentID)
}
// Send the update request
url := fmt.Sprintf("%s/api/documents/%d/", baseURL, documentID)

View file

@ -15,44 +15,58 @@ interface Document {
title: string;
content: string;
tags: string[];
suggested_title?: string;
suggested_tags?: { value: string; label: string }[];
}
type ApiDocument = Omit<Document, "suggested_tags"> & {
interface GenerateSuggestionsRequest {
documents: Document[];
generate_titles?: boolean;
generate_tags?: boolean;
}
interface DocumentSuggestion {
id: number;
original_document: Document;
suggested_title?: string;
suggested_tags?: string[];
};
}
const DocumentProcessor: React.FC = () => {
const [documents, setDocuments] = useState<Document[]>([]);
const [availableTags, setAvailableTags] = useState<{ value: string; label: string }[]>([]);
const [documentSuggestions, setDocumentSuggestions] = useState<
DocumentSuggestion[]
>([]);
const [availableTags, setAvailableTags] = useState<
{ value: string; label: string }[]
>([]);
const [loading, setLoading] = useState<boolean>(true);
const [processing, setProcessing] = useState<boolean>(false);
const [updating, setUpdating] = useState<boolean>(false);
const [successModalOpen, setSuccessModalOpen] = useState<boolean>(false);
const [filterTag, setFilterTag] = useState<string | undefined>(undefined);
const [generateTitles, setGenerateTitles] = useState<boolean>(true);
const [generateTags, setGenerateTags] = useState<boolean>(true);
useEffect(() => {
const fetchData = async () => {
try {
const [filterTagResponse, documentsResponse, tagsResponse] =
await Promise.all([
axios.get("/api/filter-tag"),
axios.get("/api/documents"),
axios.get("/api/tags"),
axios.get<
{ tag: string } | undefined
>
("/api/filter-tag"),
axios.get<
Document[]
>("/api/documents"),
axios.get<{
[tag: string]: number;
}>("/api/tags"),
]);
setFilterTag(filterTagResponse.data?.tag);
const rawDocuments = documentsResponse.data as ApiDocument[];
const documents = rawDocuments.map((doc) => ({
...doc,
suggested_tags: doc.tags.map((tag) => ({ value: tag, label: tag })),
}));
console.log(documents);
setDocuments(documents);
setDocuments(documentsResponse.data);
// Store available tags as objects with value and label
// tagsResponse.data is a map of name to id
const tags = Object.entries(tagsResponse.data).map(([name]) => ({
value: name,
label: name,
@ -71,16 +85,17 @@ const DocumentProcessor: React.FC = () => {
const handleProcessDocuments = async () => {
setProcessing(true);
try {
const apiDocuments: ApiDocument[] = documents.map((doc) => ({
...doc,
suggested_tags: doc.suggested_tags?.map((tag) => tag.value) || [],
}));
const requestPayload: GenerateSuggestionsRequest = {
documents,
generate_titles: generateTitles,
generate_tags: generateTags,
};
const response = await axios.post<ApiDocument[]>("/api/generate-suggestions", apiDocuments);
setDocuments(response.data.map((doc) => ({
...doc,
suggested_tags: doc.suggested_tags?.map((tag) => ({ value: tag, label: tag })) || [],
})));
const response = await axios.post<DocumentSuggestion[]>(
"/api/generate-suggestions",
requestPayload
);
setDocumentSuggestions(response.data);
} catch (error) {
console.error("Error generating suggestions:", error);
} finally {
@ -91,13 +106,9 @@ const DocumentProcessor: React.FC = () => {
const handleUpdateDocuments = async () => {
setUpdating(true);
try {
const apiDocuments: ApiDocument[] = documents.map((doc) => ({
...doc,
tags: [], // Remove tags from the API document
suggested_tags: doc.suggested_tags?.map((tag) => tag.value) || [],
}));
await axios.patch("/api/update-documents", apiDocuments);
await axios.patch("/api/update-documents", documentSuggestions);
setSuccessModalOpen(true);
resetSuggestions();
} catch (error) {
console.error("Error updating documents:", error);
} finally {
@ -106,17 +117,12 @@ const DocumentProcessor: React.FC = () => {
};
const resetSuggestions = () => {
const resetDocs = documents.map((doc) => ({
...doc,
suggested_title: undefined,
suggested_tags: [],
}));
setDocuments(resetDocs);
setDocumentSuggestions([]);
};
const fetchDocuments = async () => {
try {
const response = await axios.get("/api/documents"); // API endpoint to fetch documents
const response = await axios.get("/api/documents");
setDocuments(response.data);
} catch (error) {
console.error("Error fetching documents:", error);
@ -171,7 +177,7 @@ const DocumentProcessor: React.FC = () => {
</div>
)}
{!documents.some((doc) => doc.suggested_title) && (
{documentSuggestions.length === 0 && (
<div className="space-y-6">
<div className="flex justify-between items-center">
<h2 className="text-2xl font-semibold text-gray-700">
@ -196,120 +202,130 @@ const DocumentProcessor: React.FC = () => {
{processing ? "Processing..." : "Generate Suggestions"}
</button>
</div>
<div className="bg-white shadow rounded-md overflow-x-auto">
<table className="min-w-full divide-y divide-gray-200">
<thead className="bg-gray-50">
<tr>
<th className="px-6 py-3 text-left text-sm font-medium text-gray-500">
ID
</th>
<th className="px-6 py-3 text-left text-sm font-medium text-gray-500">
Title
</th>
</tr>
</thead>
<tbody className="bg-white divide-y divide-gray-200">
<div className="flex space-x-4 mt-4">
<label className="flex items-center space-x-2">
<input
type="checkbox"
checked={generateTitles}
onChange={(e) => setGenerateTitles(e.target.checked)}
/>
<span>Generate Titles</span>
</label>
<label className="flex items-center space-x-2">
<input
type="checkbox"
checked={generateTags}
onChange={(e) => setGenerateTags(e.target.checked)}
/>
<span>Generate Tags</span>
</label>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4 mt-6">
{documents.map((doc) => (
<tr key={doc.id}>
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
{doc.id}
</td>
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
{doc.title}
</td>
</tr>
<div key={doc.id} className="bg-white shadow shadow-blue-500/50 rounded-md p-4 relative group overflow-hidden">
<h3 className="text-lg font-semibold text-gray-800">{doc.title}</h3>
<pre className="text-sm text-gray-600 mt-2 truncate">
{doc.content.length > 100 ? `${doc.content.substring(0, 100)}...` : doc.content}
</pre>
<div className="mt-4">
{doc.tags.map((tag, index) => (
<span
key={index}
className="bg-blue-100 text-blue-800 text-xs font-medium mr-2 px-2.5 py-0.5 rounded-full dark:bg-blue-900 dark:text-blue-300"
>
{tag}
</span>
))}
</div>
<div className="absolute inset-0 bg-black bg-opacity-50 opacity-0 group-hover:opacity-100 transition-opacity duration-300 flex items-center justify-center p-4 rounded-md overflow-hidden">
<div className="text-sm text-white p-2 bg-gray-800 rounded-md w-full max-h-full overflow-y-auto">
<h3 className="text-lg font-semibold text-white">{doc.title}</h3>
<pre className="mt-2 whitespace-pre-wrap">
{doc.content}
</pre>
<div className="mt-4">
{doc.tags.map((tag, index) => (
<span
key={index}
className="bg-blue-100 text-blue-800 text-xs font-medium mr-2 px-2.5 py-0.5 rounded-full dark:bg-blue-900 dark:text-blue-300"
>
{tag}
</span>
))}
</div>
</div>
</div>
</div>
))}
</tbody>
</table>
</div>
</div>
)}
{documents.some((doc) => doc.suggested_title) && (
{documentSuggestions.length > 0 && (
<div className="space-y-6">
<h2 className="text-2xl font-semibold text-gray-700">
Review and Edit Suggested Titles
</h2>
<div className="bg-white shadow rounded-md overflow-x-auto">
<table className="min-w-full divide-y divide-gray-200">
<thead className="bg-gray-50">
<tr>
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
ID
</th>
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
Original Title
</th>
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
Suggested Title
</th>
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
Suggested Tags
</th>
</tr>
</thead>
<tbody className="bg-white divide-y divide-gray-200">
{documents.map(
(doc) =>
doc.suggested_title && (
<tr key={doc.id}>
<td className="px-4 py-3 text-sm text-gray-500">
{doc.id}
</td>
<td className="px-4 py-3 text-sm text-gray-900">
{doc.title}
</td>
<td className="px-4 py-3 text-sm text-gray-900">
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
{documentSuggestions.map((doc) => (
<div key={doc.id} className="bg-white shadow shadow-blue-500/50 rounded-md p-4">
<h3 className="text-lg font-semibold text-gray-800">
{doc.original_document.title}
</h3>
<input
type="text"
value={doc.suggested_title}
value={doc.suggested_title || ""}
onChange={(e) => {
const updatedDocuments = documents.map((d) =>
const updatedSuggestions = documentSuggestions.map((d) =>
d.id === doc.id
? { ...d, suggested_title: e.target.value }
: d
);
setDocuments(updatedDocuments);
setDocumentSuggestions(updatedSuggestions);
}}
className="w-full border border-gray-300 rounded px-2 py-1 focus:outline-none focus:ring-2 focus:ring-blue-500"
className="w-full border border-gray-300 rounded px-2 py-1 mt-2 focus:outline-none focus:ring-2 focus:ring-blue-500"
/>
</td>
<td className="px-4 py-3 text-sm text-gray-900">
<div className="mt-4">
<ReactTags
selected={doc.suggested_tags || []}
selected={
doc.suggested_tags?.map((tag) => ({
value: tag,
label: tag,
})) || []
}
suggestions={availableTags}
onAdd={(tag) => {
const updatedTags = [...(doc.suggested_tags || []), { value: tag.value as string, label: tag.label }];
const updatedDocuments = documents.map((d) =>
const tagValue = tag.value as string;
const updatedTags = [
...(doc.suggested_tags || []),
tagValue,
];
const updatedSuggestions = documentSuggestions.map((d) =>
d.id === doc.id
? { ...d, suggested_tags: updatedTags }
: d
);
setDocuments(updatedDocuments);
setDocumentSuggestions(updatedSuggestions);
}}
onDelete={(i) => {
const updatedTags = doc.suggested_tags?.filter(
(_, index) => index !== i
);
const updatedDocuments = documents.map((d) =>
const updatedSuggestions = documentSuggestions.map((d) =>
d.id === doc.id
? { ...d, suggested_tags: updatedTags }
: d
);
setDocuments(updatedDocuments);
setDocumentSuggestions(updatedSuggestions);
}}
allowNew={false}
placeholderText="Add a tag"
/>
</td>
</tr>
)
)}
</tbody>
</table>
</div>
<div className="flex justify-end space-x-4">
</div>
))}
</div>
<div className="flex justify-end space-x-4 mt-6">
<button
onClick={resetSuggestions}
className="bg-gray-200 text-gray-700 px-4 py-2 rounded hover:bg-gray-300 focus:outline-none"