From 6fd6060dd76633b6b816293191c69169d010ce65 Mon Sep 17 00:00:00 2001
From: Harald Hoyer <harald@hoyer.xyz>
Date: Fri, 22 May 2026 08:34:42 +0200
Subject: [PATCH] fix(rag): send explicit encoding_format to avoid llama.cpp
 null error

When encoding_format is unset, LiteLLM forwards it to the backend as JSON
null, and llama.cpp's embeddings endpoint rejects it with a 500
("type must be string, but is null"). Pin encoding_format="float" so the
gateway always relays a string.
---
 packages/rag/default.nix | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/packages/rag/default.nix b/packages/rag/default.nix
index 423c53a..925a8cb 100644
--- a/packages/rag/default.nix
+++ b/packages/rag/default.nix
@@ -34,7 +34,9 @@ writers.writePython3Bin "rag"
 
 
     def embed(texts):
-        resp = client.embeddings.create(model=EMBED_MODEL, input=texts)
+        # encoding_format is explicit: llama.cpp rejects a null value, and
+        # LiteLLM forwards an unset one as JSON null.
+        resp = client.embeddings.create(model=EMBED_MODEL, input=texts, encoding_format="float")
         return [d.embedding for d in resp.data]