From 87dfe74daa076c0b91f6982ff97b332f8bfc319a Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Fri, 22 May 2026 10:37:00 +0200 Subject: [PATCH] chore(halo): max models --- systems/x86_64-linux/halo/llama-server.nix | 2 +- systems/x86_64-linux/halo/models.ini | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/systems/x86_64-linux/halo/llama-server.nix b/systems/x86_64-linux/halo/llama-server.nix index 8264f3a..22775d0 100644 --- a/systems/x86_64-linux/halo/llama-server.nix +++ b/systems/x86_64-linux/halo/llama-server.nix @@ -29,7 +29,7 @@ "--host 0.0.0.0" "--port 8000" "--models-preset ${./models.ini}" - "--models-max 2" + "--models-max 3" ]; Restart = "on-failure"; RestartSec = 10; diff --git a/systems/x86_64-linux/halo/models.ini b/systems/x86_64-linux/halo/models.ini index 5fca2f8..be3387e 100644 --- a/systems/x86_64-linux/halo/models.ini +++ b/systems/x86_64-linux/halo/models.ini @@ -54,5 +54,6 @@ min-p = 0.0 presence-penalty = 1.5 parallel = 1 c = 131072 +load-on-startup = true chat-template-kwargs = {"preserve_thinking": true}