fix(halo): context

This commit is contained in:
Harald Hoyer 2026-05-20 01:18:52 +02:00
parent 0edf975c30
commit 807a3d0d8e

View file

@ -2,7 +2,6 @@ version = 1
[*] [*]
flash-attn = on flash-attn = on
parallel = 2
jinja = true jinja = true
n-gpu-layers = 99 n-gpu-layers = 99
threads = 8 threads = 8
@ -16,15 +15,18 @@ min-p = 0.0
mmap = false mmap = false
no-context-shift = true no-context-shift = true
chat-template-kwargs = {"preserve_thinking": true} chat-template-kwargs = {"preserve_thinking": true}
c = 524288
fit = on fit = on
spec-type = draft-mtp spec-type = draft-mtp
[Qwen3.6-35B-A3B] [Qwen3.6-35B-A3B]
hf = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q6_K hf = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q8_K_XL
spec-draft-n-max = 3 spec-draft-n-max = 3
parallel = 1
c = 262144
[Qwen3.6-27B] [Qwen3.6-27B]
hf = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K hf = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K
spec-draft-n-max = 6 spec-draft-n-max = 6
parallel = 2
c = 524288