From 6c5ce8742c186ebaffa29c6834f209aba5201f45 Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Wed, 20 May 2026 14:23:20 +0200 Subject: [PATCH] fix(halo): only one model --- systems/x86_64-linux/halo/llama-server.nix | 2 +- systems/x86_64-linux/halo/models.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/systems/x86_64-linux/halo/llama-server.nix b/systems/x86_64-linux/halo/llama-server.nix index 8264f3a..4cca9f8 100644 --- a/systems/x86_64-linux/halo/llama-server.nix +++ b/systems/x86_64-linux/halo/llama-server.nix @@ -29,7 +29,7 @@ "--host 0.0.0.0" "--port 8000" "--models-preset ${./models.ini}" - "--models-max 2" + "--models-max 1" ]; Restart = "on-failure"; RestartSec = 10; diff --git a/systems/x86_64-linux/halo/models.ini b/systems/x86_64-linux/halo/models.ini index ddb3509..6632557 100644 --- a/systems/x86_64-linux/halo/models.ini +++ b/systems/x86_64-linux/halo/models.ini @@ -11,7 +11,6 @@ cache-type-k = bf16 cache-type-v = bf16 mmap = false no-context-shift = true -chat-template-kwargs = {"preserve_thinking": true} fit = on c = 131072 @@ -27,3 +26,4 @@ min-p = 0.0 parallel = 2 c = 524288 load-on-startup = true +chat-template-kwargs = {"preserve_thinking": true}