diff --git a/systems/x86_64-linux/halo/llama-server-27B-MTP.nix b/systems/x86_64-linux/halo/llama-server-27B-MTP.nix index d32041c..94e7efc 100644 --- a/systems/x86_64-linux/halo/llama-server-27B-MTP.nix +++ b/systems/x86_64-linux/halo/llama-server-27B-MTP.nix @@ -29,7 +29,7 @@ ExecStart = lib.concatStringsSep " " [ "${pkgs.llama-cpp-rocm}/bin/llama-server" "--flash-attn on" - "--parallel 1" + "--parallel 2" "--jinja" "--host 0.0.0.0" "--port 8000" @@ -43,7 +43,7 @@ "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00" "--no-context-shift" ''--chat-template-kwargs '{"preserve_thinking": true}' '' - "-c 262144" + "-c 524288" "--fit on" "--slot-save-path %C/llama-server/kv-slots-27B-MTP" "--spec-type draft-mtp --spec-draft-n-max 6"