chore(halo): set spec-draft-p-min for coder model

Add a 0.74 confidence threshold so speculative drafting stops early
once the draft model's predicted token probability drops below it,
favoring shorter, higher-acceptance draft sequences.
This commit is contained in:
Harald Hoyer 2026-05-21 23:15:09 +02:00
parent 3a070413e4
commit ccd8750899

View file

@ -18,6 +18,7 @@ c = 131072
hf = unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL
spec-type = ngram-simple,draft-mtp
spec-draft-n-max = 5
spec-draft-p-min = 0.74
threads-batch = 16
temp = 0.6
top-p = 0.95