feat(halo): MTP --parallel 2

This commit is contained in:
Harald Hoyer 2026-05-19 23:48:01 +02:00
parent f07af7f5da
commit b4063fda66

View file

@ -29,7 +29,7 @@
ExecStart = lib.concatStringsSep " " [ ExecStart = lib.concatStringsSep " " [
"${pkgs.llama-cpp-rocm}/bin/llama-server" "${pkgs.llama-cpp-rocm}/bin/llama-server"
"--flash-attn on" "--flash-attn on"
"--parallel 1" "--parallel 2"
"--jinja" "--jinja"
"--host 0.0.0.0" "--host 0.0.0.0"
"--port 8000" "--port 8000"
@ -43,7 +43,7 @@
"--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00" "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00"
"--no-context-shift" "--no-context-shift"
''--chat-template-kwargs '{"preserve_thinking": true}' '' ''--chat-template-kwargs '{"preserve_thinking": true}' ''
"-c 262144" "-c 524288"
"--fit on" "--fit on"
"--slot-save-path %C/llama-server/kv-slots-27B-MTP" "--slot-save-path %C/llama-server/kv-slots-27B-MTP"
"--spec-type draft-mtp --spec-draft-n-max 6" "--spec-type draft-mtp --spec-draft-n-max 6"