diff --git a/README.md b/README.md
index c829818..6b00991 100644
--- a/README.md
+++ b/README.md
@@ -75,8 +75,8 @@ services:
       OPENAI_API_KEY: 'your_openai_api_key' # Required if using OpenAI
       LLM_LANGUAGE: 'English' # Optional, default is 'English'
       OLLAMA_HOST: 'http://host.docker.internal:11434' # If using Ollama
-      VISION_LLM_PROVIDER: 'ollama' # Optional, for OCR
-      VISION_LLM_MODEL: 'minicpm-v' # Optional, for OCR
+      VISION_LLM_PROVIDER: 'ollama' # Optional (for OCR) - ollama or openai
+      VISION_LLM_MODEL: 'minicpm-v' # Optional (for OCR) - minicpm-v, for example for ollama, gpt-4o for openai
       LOG_LEVEL: 'info' # Optional or 'debug', 'warn', 'error'
     volumes:
       - ./prompts:/app/prompts # Mount the prompts directory
diff --git a/app_llm.go b/app_llm.go
index b7228f6..a4a4c6b 100644
--- a/app_llm.go
+++ b/app_llm.go
@@ -3,6 +3,7 @@ package main
 import (
 	"bytes"
 	"context"
+	"encoding/base64"
 	"fmt"
 	"strings"
 	"sync"
@@ -81,14 +82,26 @@ func (app *App) doOCRViaLLM(ctx context.Context, jpegBytes []byte) (string, erro
 
 	prompt := promptBuffer.String()
 
+	// If not OpenAI then use binary part for image, otherwise, use the ImageURL part with encoding from https://platform.openai.com/docs/guides/vision
+	var parts []llms.ContentPart
+	if strings.ToLower(visionLlmProvider) != "openai" {
+		parts = []llms.ContentPart{
+			llms.BinaryPart("image/jpeg", jpegBytes),
+			llms.TextPart(prompt),
+		}
+	} else {
+		base64Image := base64.StdEncoding.EncodeToString(jpegBytes)
+		parts = []llms.ContentPart{
+			llms.ImageURLPart(fmt.Sprintf("data:image/jpeg;base64,%s", base64Image)),
+			llms.TextPart(prompt),
+		}
+	}
+
 	// Convert the image to text
 	completion, err := app.VisionLLM.GenerateContent(ctx, []llms.MessageContent{
 		{
-			Parts: []llms.ContentPart{
-				llms.BinaryPart("image/jpeg", jpegBytes),
-				llms.TextPart(prompt),
-			},
-			Role: llms.ChatMessageTypeHuman,
+			Parts: parts,
+			Role:  llms.ChatMessageTypeHuman,
 		},
 	})
 	if err != nil {