diff --git a/go.mod b/go.mod index 0600ba0..6825cd4 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/gin-gonic/gin v1.10.0 github.com/stretchr/testify v1.9.0 github.com/tmc/langchaingo v0.1.12 + golang.org/x/sync v0.7.0 ) require ( diff --git a/go.sum b/go.sum index 14b1880..d4dfaab 100644 --- a/go.sum +++ b/go.sum @@ -117,6 +117,8 @@ golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/paperless.go b/paperless.go index dc38109..6c5433d 100644 --- a/paperless.go +++ b/paperless.go @@ -12,8 +12,10 @@ import ( "os" "path/filepath" "strings" + "sync" "github.com/gen2brain/go-fitz" + "golang.org/x/sync/errgroup" ) // PaperlessClient struct to interact with the Paperless-NGX API @@ -307,37 +309,52 @@ func (c *PaperlessClient) DownloadDocumentAsImages(ctx context.Context, document } defer doc.Close() + var mu sync.Mutex + var g errgroup.Group + for n := 0; n < doc.NumPage(); n++ { - img, err := doc.Image(n) - if err != nil { - return nil, err - } + n := n // capture loop variable + g.Go(func() error { + img, err := doc.Image(n) + if err != nil { + return err + } - imagePath := filepath.Join(docDir, fmt.Sprintf("page%03d.jpg", n)) - f, err := os.Create(imagePath) - if err != nil { - return nil, err - } + imagePath := filepath.Join(docDir, fmt.Sprintf("page%03d.jpg", n)) + f, err := os.Create(imagePath) + if err != nil { + return err + } - err = jpeg.Encode(f, img, &jpeg.Options{Quality: jpeg.DefaultQuality}) - if err != nil { - return nil, err - } - f.Close() + err = jpeg.Encode(f, img, &jpeg.Options{Quality: jpeg.DefaultQuality}) + if err != nil { + f.Close() + return err + } + f.Close() - // Verify the JPEG file - file, err := os.Open(imagePath) - if err != nil { - return nil, err - } - defer file.Close() + // Verify the JPEG file + file, err := os.Open(imagePath) + if err != nil { + return err + } + defer file.Close() - _, err = jpeg.Decode(file) - if err != nil { - return nil, fmt.Errorf("invalid JPEG file: %s", imagePath) - } + _, err = jpeg.Decode(file) + if err != nil { + return fmt.Errorf("invalid JPEG file: %s", imagePath) + } - imagePaths = append(imagePaths, imagePath) + mu.Lock() + imagePaths = append(imagePaths, imagePath) + mu.Unlock() + + return nil + }) + } + + if err := g.Wait(); err != nil { + return nil, err } return imagePaths, nil