chore(halo): upgrade coder model to Q8 quant and bump spec draft

Switch the coder model from Q6_K to the UD-Q8_K_XL quant for better output quality, and raise spec-draft-n-max from 4 to 5 to allow longer speculative draft sequences.
2026-05-21 23:11:00 +02:00 · 2026-05-21 23:11:00 +02:00 · 3a070413e4
commit 3a070413e4
parent 689389ebf8
1 changed files with 2 additions and 2 deletions
--- a/systems/x86_64-linux/halo/models.ini
+++ b/systems/x86_64-linux/halo/models.ini
@ -15,9 +15,9 @@ fit                 = on
 c                   = 131072

 [coder]
-hf                  = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K
+hf                  = unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL
 spec-type           = ngram-simple,draft-mtp
-spec-draft-n-max    = 4
+spec-draft-n-max    = 5
 threads-batch       = 16
 temp                = 0.6
 top-p               = 0.95