fix(halo): q6_k with mtp 2

This commit is contained in:
Harald Hoyer 2026-05-15 07:47:43 +02:00
parent baaab26eb7
commit 38d2d4f4ae
2 changed files with 6 additions and 6 deletions

View file

@ -35,7 +35,7 @@
"--port 8000"
"--no-mmap"
"--n-gpu-layers 99"
"-hf unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL"
"-hf unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K"
"--alias halo-8000"
"--threads 8"
"--ubatch-size 256"
@ -46,7 +46,7 @@
"-c 262144"
"--fit on"
"--slot-save-path %C/llama-server/kv-slots-27B-MTP"
"--spec-type mtp --spec-draft-n-max 3"
"--spec-type draft-mtp --spec-draft-n-max 2"
];
Restart = "on-failure";
RestartSec = 10;