From 807a3d0d8e4c1634a3e1d0fdf2502b0b63990c2d Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Wed, 20 May 2026 01:18:52 +0200 Subject: [PATCH] fix(halo): context --- systems/x86_64-linux/halo/models.ini | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/systems/x86_64-linux/halo/models.ini b/systems/x86_64-linux/halo/models.ini index 81de281..9d23606 100644 --- a/systems/x86_64-linux/halo/models.ini +++ b/systems/x86_64-linux/halo/models.ini @@ -2,7 +2,6 @@ version = 1 [*] flash-attn = on -parallel = 2 jinja = true n-gpu-layers = 99 threads = 8 @@ -16,15 +15,18 @@ min-p = 0.0 mmap = false no-context-shift = true chat-template-kwargs = {"preserve_thinking": true} -c = 524288 fit = on spec-type = draft-mtp [Qwen3.6-35B-A3B] -hf = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q6_K +hf = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q8_K_XL spec-draft-n-max = 3 +parallel = 1 +c = 262144 [Qwen3.6-27B] hf = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K spec-draft-n-max = 6 +parallel = 2 +c = 524288