chore(halo): upgrade coder model to Q8 quant and bump spec draft

Switch the coder model from Q6_K to the UD-Q8_K_XL quant for better
output quality, and raise spec-draft-n-max from 4 to 5 to allow longer
speculative draft sequences.
This commit is contained in:
Harald Hoyer 2026-05-21 23:11:00 +02:00
parent 689389ebf8
commit 3a070413e4

View file

@ -15,9 +15,9 @@ fit = on
c = 131072
[coder]
hf = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K
hf = unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL
spec-type = ngram-simple,draft-mtp
spec-draft-n-max = 4
spec-draft-n-max = 5
threads-batch = 16
temp = 0.6
top-p = 0.95