diff --git a/systems/x86_64-linux/halo/models.ini b/systems/x86_64-linux/halo/models.ini
index 83b160b..ddb3509 100644
--- a/systems/x86_64-linux/halo/models.ini
+++ b/systems/x86_64-linux/halo/models.ini
@@ -6,30 +6,24 @@ parallel            = 1
 jinja               = true
 n-gpu-layers        = 99
 threads             = 8
-ubatch-size         = 256
+ubatch-size         = 512
 cache-type-k        = bf16
 cache-type-v        = bf16
-temp                = 0.6
-top-p               = 0.95
-top-k               = 20
-min-p               = 0.0
 mmap                = false
 no-context-shift    = true
 chat-template-kwargs = {"preserve_thinking": true}
 fit                 = on
-spec-type           = draft-mtp
-
-[Qwen3.6-35B-A3B]
-hf                  = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q8_K_XL
-spec-draft-n-max    = 2
-parallel            = 1
 c                   = 131072
-load-on-startup     = true
 
 [Qwen3.6-27B]
 hf                  = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K
+spec-type           = draft-mtp
 spec-draft-n-max    = 6
+threads-batch       = 16
+temp                = 0.6
+top-p               = 0.95
+top-k               = 20
+min-p               = 0.0
 parallel            = 2
 c                   = 524288
 load-on-startup     = true
-