nixcfg/systems/x86_64-linux/halo/models.ini

version = 1

[*]
flash-attn          = on
parallel            = 1
jinja               = true
n-gpu-layers        = 99
threads             = 8
ubatch-size         = 256
cache-type-k        = bf16
cache-type-v        = bf16
mmap                = false
no-context-shift    = true
fit                 = on
c                   = 131072

[coder]
hf                  = unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL
spec-type           = ngram-simple,draft-mtp
spec-draft-n-max    = 5
spec-draft-p-min    = 0.74
threads-batch       = 16
temp                = 0.6
top-p               = 0.95
top-k               = 20
min-p               = 0.0
parallel            = 4
c                   = 1048576
load-on-startup     = true
chat-template-kwargs = {"preserve_thinking": true}