From 7ebd97629d95668a2e9ef3aad54ba1ffaf68f34c Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Wed, 6 May 2026 14:01:31 +0200 Subject: [PATCH] feat(halo): use am17an/Qwen3.6-27B-MTP-GGUF:Q8_0 with MTP spec --- systems/x86_64-linux/halo/llama-server.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/systems/x86_64-linux/halo/llama-server.nix b/systems/x86_64-linux/halo/llama-server.nix index 84f1831..ffe6f0f 100644 --- a/systems/x86_64-linux/halo/llama-server.nix +++ b/systems/x86_64-linux/halo/llama-server.nix @@ -35,7 +35,7 @@ "--port 8000" "--no-mmap" "--n-gpu-layers 99" - "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL" + "-hf am17an/Qwen3.6-27B-MTP-GGUF:Q8_0" "--alias qwen3.6-27b" "--threads 8" "--ubatch-size 256" @@ -46,6 +46,7 @@ "-c 524288" "--fit on" "--slot-save-path %C/llama-server/kv-slots" + "--spec-type mtp --spec-draft-n-max 3" ]; Restart = "on-failure"; RestartSec = 10;