feat(halo): use am17an/Qwen3.6-27B-MTP-GGUF:Q8_0 with MTP spec
This commit is contained in:
parent
9c465ac9fa
commit
7ebd97629d
1 changed files with 2 additions and 1 deletions
|
|
@ -35,7 +35,7 @@
|
|||
"--port 8000"
|
||||
"--no-mmap"
|
||||
"--n-gpu-layers 99"
|
||||
"-hf unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL"
|
||||
"-hf am17an/Qwen3.6-27B-MTP-GGUF:Q8_0"
|
||||
"--alias qwen3.6-27b"
|
||||
"--threads 8"
|
||||
"--ubatch-size 256"
|
||||
|
|
@ -46,6 +46,7 @@
|
|||
"-c 524288"
|
||||
"--fit on"
|
||||
"--slot-save-path %C/llama-server/kv-slots"
|
||||
"--spec-type mtp --spec-draft-n-max 3"
|
||||
];
|
||||
Restart = "on-failure";
|
||||
RestartSec = 10;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue