diff --git a/systems/x86_64-linux/halo/models.ini b/systems/x86_64-linux/halo/models.ini index 81de281..9d23606 100644 --- a/systems/x86_64-linux/halo/models.ini +++ b/systems/x86_64-linux/halo/models.ini @@ -2,7 +2,6 @@ version = 1 [*] flash-attn = on -parallel = 2 jinja = true n-gpu-layers = 99 threads = 8 @@ -16,15 +15,18 @@ min-p = 0.0 mmap = false no-context-shift = true chat-template-kwargs = {"preserve_thinking": true} -c = 524288 fit = on spec-type = draft-mtp [Qwen3.6-35B-A3B] -hf = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q6_K +hf = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q8_K_XL spec-draft-n-max = 3 +parallel = 1 +c = 262144 [Qwen3.6-27B] hf = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K spec-draft-n-max = 6 +parallel = 2 +c = 524288