diff --git a/systems/x86_64-linux/halo/llama-server.nix b/systems/x86_64-linux/halo/llama-server.nix index 9ea5870..a488c8c 100644 --- a/systems/x86_64-linux/halo/llama-server.nix +++ b/systems/x86_64-linux/halo/llama-server.nix @@ -29,7 +29,7 @@ ExecStart = lib.concatStringsSep " " [ "${pkgs.llama-cpp-rocm}/bin/llama-server" "--flash-attn on" - "--parallel 2" + "--parallel 1" "--jinja" "--host 0.0.0.0" "--port 8000" @@ -43,7 +43,7 @@ "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00" "--no-context-shift" ''--chat-template-kwargs '{"preserve_thinking": true}' '' - "-c 524288" + "-c 262144" "--fit on" "--slot-save-path %C/llama-server/kv-slots" "--spec-type mtp --spec-draft-n-max 3"