fix(halo): q6_k with mtp 2
This commit is contained in:
parent
baaab26eb7
commit
38d2d4f4ae
2 changed files with 6 additions and 6 deletions
|
|
@ -35,7 +35,7 @@
|
|||
"--port 8000"
|
||||
"--no-mmap"
|
||||
"--n-gpu-layers 99"
|
||||
"-hf unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL"
|
||||
"-hf unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K"
|
||||
"--alias halo-8000"
|
||||
"--threads 8"
|
||||
"--ubatch-size 256"
|
||||
|
|
@ -46,7 +46,7 @@
|
|||
"-c 262144"
|
||||
"--fit on"
|
||||
"--slot-save-path %C/llama-server/kv-slots-27B-MTP"
|
||||
"--spec-type mtp --spec-draft-n-max 3"
|
||||
"--spec-type draft-mtp --spec-draft-n-max 2"
|
||||
];
|
||||
Restart = "on-failure";
|
||||
RestartSec = 10;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue