feat(halo): use am17an/Qwen3.6-27B-MTP-GGUF:Q8_0 with MTP spec

2026-05-06 14:01:31 +02:00 · 2026-05-06 14:01:31 +02:00 · 7ebd97629d
commit 7ebd97629d
parent 9c465ac9fa
1 changed files with 2 additions and 1 deletions
--- a/systems/x86_64-linux/halo/llama-server.nix
+++ b/systems/x86_64-linux/halo/llama-server.nix
@ -35,7 +35,7 @@
        "--port 8000"
        "--no-mmap"
        "--n-gpu-layers 99"
-        "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL"
+        "-hf am17an/Qwen3.6-27B-MTP-GGUF:Q8_0"
        "--alias qwen3.6-27b"
        "--threads 8"
        "--ubatch-size 256"
@ -46,6 +46,7 @@
        "-c 524288"
        "--fit on"
        "--slot-save-path %C/llama-server/kv-slots"
+        "--spec-type mtp --spec-draft-n-max 3"
      ];
      Restart = "on-failure";
      RestartSec = 10;