nixcfg/systems/x86_64-linux/halo/models.ini
Harald Hoyer 3a070413e4 chore(halo): upgrade coder model to Q8 quant and bump spec draft
Switch the coder model from Q6_K to the UD-Q8_K_XL quant for better
output quality, and raise spec-draft-n-max from 4 to 5 to allow longer
speculative draft sequences.
2026-05-21 23:11:00 +02:00

29 lines
732 B
INI

version = 1
[*]
flash-attn = on
parallel = 1
jinja = true
n-gpu-layers = 99
threads = 8
ubatch-size = 256
cache-type-k = bf16
cache-type-v = bf16
mmap = false
no-context-shift = true
fit = on
c = 131072
[coder]
hf = unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL
spec-type = ngram-simple,draft-mtp
spec-draft-n-max = 5
threads-batch = 16
temp = 0.6
top-p = 0.95
top-k = 20
min-p = 0.0
parallel = 4
c = 1048576
load-on-startup = true
chat-template-kwargs = {"preserve_thinking": true}