diff --git a/systems/x86_64-linux/halo/llama-server.nix b/systems/x86_64-linux/halo/llama-server.nix index 8264f3a..22775d0 100644 --- a/systems/x86_64-linux/halo/llama-server.nix +++ b/systems/x86_64-linux/halo/llama-server.nix @@ -29,7 +29,7 @@ "--host 0.0.0.0" "--port 8000" "--models-preset ${./models.ini}" - "--models-max 2" + "--models-max 3" ]; Restart = "on-failure"; RestartSec = 10; diff --git a/systems/x86_64-linux/halo/models.ini b/systems/x86_64-linux/halo/models.ini index 5fca2f8..be3387e 100644 --- a/systems/x86_64-linux/halo/models.ini +++ b/systems/x86_64-linux/halo/models.ini @@ -54,5 +54,6 @@ min-p = 0.0 presence-penalty = 1.5 parallel = 1 c = 131072 +load-on-startup = true chat-template-kwargs = {"preserve_thinking": true}