version = 1 [*] flash-attn = on parallel = 1 jinja = true n-gpu-layers = 99 threads = 8 ubatch-size = 256 cache-type-k = bf16 cache-type-v = bf16 temp = 0.6 top-p = 0.95 top-k = 20 min-p = 0.0 mmap = false no-context-shift = true chat-template-kwargs = {"preserve_thinking": true} fit = on spec-type = draft-mtp [Qwen3.6-35B-A3B] hf = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q8_K_XL spec-draft-n-max = 2 parallel = 1 c = 131072 load-on-startup = true [Qwen3.6-27B] hf = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K spec-draft-n-max = 6 parallel = 2 c = 524288 load-on-startup = true