From 3a070413e4a57f54da4bd5bca80d77581bb43156 Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Thu, 21 May 2026 23:11:00 +0200 Subject: [PATCH] chore(halo): upgrade coder model to Q8 quant and bump spec draft Switch the coder model from Q6_K to the UD-Q8_K_XL quant for better output quality, and raise spec-draft-n-max from 4 to 5 to allow longer speculative draft sequences. --- systems/x86_64-linux/halo/models.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/systems/x86_64-linux/halo/models.ini b/systems/x86_64-linux/halo/models.ini index f6121b9..5b1bbac 100644 --- a/systems/x86_64-linux/halo/models.ini +++ b/systems/x86_64-linux/halo/models.ini @@ -15,9 +15,9 @@ fit = on c = 131072 [coder] -hf = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K +hf = unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL spec-type = ngram-simple,draft-mtp -spec-draft-n-max = 4 +spec-draft-n-max = 5 threads-batch = 16 temp = 0.6 top-p = 0.95