From 7ebd97629d95668a2e9ef3aad54ba1ffaf68f34c Mon Sep 17 00:00:00 2001
From: Harald Hoyer <harald@hoyer.xyz>
Date: Wed, 6 May 2026 14:01:31 +0200
Subject: [PATCH] feat(halo): use am17an/Qwen3.6-27B-MTP-GGUF:Q8_0 with MTP
 spec

---
 systems/x86_64-linux/halo/llama-server.nix | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/systems/x86_64-linux/halo/llama-server.nix b/systems/x86_64-linux/halo/llama-server.nix
index 84f1831..ffe6f0f 100644
--- a/systems/x86_64-linux/halo/llama-server.nix
+++ b/systems/x86_64-linux/halo/llama-server.nix
@@ -35,7 +35,7 @@
         "--port 8000"
         "--no-mmap"
         "--n-gpu-layers 99"
-        "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL"
+        "-hf am17an/Qwen3.6-27B-MTP-GGUF:Q8_0"
         "--alias qwen3.6-27b"
         "--threads 8"
         "--ubatch-size 256"
@@ -46,6 +46,7 @@
         "-c 524288"
         "--fit on"
         "--slot-save-path %C/llama-server/kv-slots"
+        "--spec-type mtp --spec-draft-n-max 3"
       ];
       Restart = "on-failure";
       RestartSec = 10;