feat(halo): use am17an/Qwen3.6-27B-MTP-GGUF:Q8_0 with MTP spec

This commit is contained in:
Harald Hoyer 2026-05-06 14:01:31 +02:00
parent 9c465ac9fa
commit 7ebd97629d

View file

@ -35,7 +35,7 @@
"--port 8000" "--port 8000"
"--no-mmap" "--no-mmap"
"--n-gpu-layers 99" "--n-gpu-layers 99"
"-hf unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL" "-hf am17an/Qwen3.6-27B-MTP-GGUF:Q8_0"
"--alias qwen3.6-27b" "--alias qwen3.6-27b"
"--threads 8" "--threads 8"
"--ubatch-size 256" "--ubatch-size 256"
@ -46,6 +46,7 @@
"-c 524288" "-c 524288"
"--fit on" "--fit on"
"--slot-save-path %C/llama-server/kv-slots" "--slot-save-path %C/llama-server/kv-slots"
"--spec-type mtp --spec-draft-n-max 3"
]; ];
Restart = "on-failure"; Restart = "on-failure";
RestartSec = 10; RestartSec = 10;