From 6fd6060dd76633b6b816293191c69169d010ce65 Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Fri, 22 May 2026 08:34:42 +0200 Subject: [PATCH] fix(rag): send explicit encoding_format to avoid llama.cpp null error When encoding_format is unset, LiteLLM forwards it to the backend as JSON null, and llama.cpp's embeddings endpoint rejects it with a 500 ("type must be string, but is null"). Pin encoding_format="float" so the gateway always relays a string. --- packages/rag/default.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/rag/default.nix b/packages/rag/default.nix index 423c53a..925a8cb 100644 --- a/packages/rag/default.nix +++ b/packages/rag/default.nix @@ -34,7 +34,9 @@ writers.writePython3Bin "rag" def embed(texts): - resp = client.embeddings.create(model=EMBED_MODEL, input=texts) + # encoding_format is explicit: llama.cpp rejects a null value, and + # LiteLLM forwards an unset one as JSON null. + resp = client.embeddings.create(model=EMBED_MODEL, input=texts, encoding_format="float") return [d.embedding for d in resp.data]