fix(halo): q6_k with mtp 2

2026-05-15 07:47:43 +02:00 · 2026-05-15 07:47:43 +02:00 · 38d2d4f4ae
commit 38d2d4f4ae
parent baaab26eb7
2 changed files with 6 additions and 6 deletions
--- a/systems/x86_64-linux/halo/llama-server-27B-MTP.nix
+++ b/systems/x86_64-linux/halo/llama-server-27B-MTP.nix
@ -35,7 +35,7 @@
        "--port 8000"
        "--no-mmap"
        "--n-gpu-layers 99"
-        "-hf unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL"
+        "-hf unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K"
        "--alias halo-8000"
        "--threads 8"
        "--ubatch-size 256"
@ -46,7 +46,7 @@
        "-c 262144"
        "--fit on"
        "--slot-save-path %C/llama-server/kv-slots-27B-MTP"
-        "--spec-type mtp --spec-draft-n-max 3"
+        "--spec-type draft-mtp --spec-draft-n-max 2"
      ];
      Restart = "on-failure";
      RestartSec = 10;