feat(halo): use am17an/Qwen3.6-27B-MTP-GGUF:Q8_0 with MTP spec
This commit is contained in:
parent
9c465ac9fa
commit
7ebd97629d
1 changed files with 2 additions and 1 deletions
|
|
@ -35,7 +35,7 @@
|
||||||
"--port 8000"
|
"--port 8000"
|
||||||
"--no-mmap"
|
"--no-mmap"
|
||||||
"--n-gpu-layers 99"
|
"--n-gpu-layers 99"
|
||||||
"-hf unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL"
|
"-hf am17an/Qwen3.6-27B-MTP-GGUF:Q8_0"
|
||||||
"--alias qwen3.6-27b"
|
"--alias qwen3.6-27b"
|
||||||
"--threads 8"
|
"--threads 8"
|
||||||
"--ubatch-size 256"
|
"--ubatch-size 256"
|
||||||
|
|
@ -46,6 +46,7 @@
|
||||||
"-c 524288"
|
"-c 524288"
|
||||||
"--fit on"
|
"--fit on"
|
||||||
"--slot-save-path %C/llama-server/kv-slots"
|
"--slot-save-path %C/llama-server/kv-slots"
|
||||||
|
"--spec-type mtp --spec-draft-n-max 3"
|
||||||
];
|
];
|
||||||
Restart = "on-failure";
|
Restart = "on-failure";
|
||||||
RestartSec = 10;
|
RestartSec = 10;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue