feat(halo): MTP --parallel 2
This commit is contained in:
parent
f07af7f5da
commit
b4063fda66
1 changed files with 2 additions and 2 deletions
|
|
@ -29,7 +29,7 @@
|
|||
ExecStart = lib.concatStringsSep " " [
|
||||
"${pkgs.llama-cpp-rocm}/bin/llama-server"
|
||||
"--flash-attn on"
|
||||
"--parallel 1"
|
||||
"--parallel 2"
|
||||
"--jinja"
|
||||
"--host 0.0.0.0"
|
||||
"--port 8000"
|
||||
|
|
@ -43,7 +43,7 @@
|
|||
"--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00"
|
||||
"--no-context-shift"
|
||||
''--chat-template-kwargs '{"preserve_thinking": true}' ''
|
||||
"-c 262144"
|
||||
"-c 524288"
|
||||
"--fit on"
|
||||
"--slot-save-path %C/llama-server/kv-slots-27B-MTP"
|
||||
"--spec-type draft-mtp --spec-draft-n-max 6"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue