nixcfg/systems/x86_64-linux/halo/models.ini

version = 1

[*]
flash-attn          = on
parallel            = 1
jinja               = true
n-gpu-layers        = 99
threads             = 8
ubatch-size         = 256
cache-type-k        = bf16
cache-type-v        = bf16
temp                = 0.6
top-p               = 0.95
top-k               = 20
min-p               = 0.0
mmap                = false
no-context-shift    = true
chat-template-kwargs = {"preserve_thinking": true}
fit                 = on
spec-type           = draft-mtp

[Qwen3.6-35B-A3B]
hf                  = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q8_K_XL
spec-draft-n-max    = 2
parallel            = 1
c                   = 131072
load-on-startup     = true

[Qwen3.6-27B]
hf                  = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K
spec-draft-n-max    = 6
parallel            = 2
c                   = 524288
load-on-startup     = true