chore(halo): add fast model

This commit is contained in:
Harald Hoyer 2026-05-22 09:10:30 +02:00
parent 6fd6060dd7
commit fc3e40686a

View file

@ -41,3 +41,18 @@ parallel = 2
c = 524288 c = 524288
load-on-startup = true load-on-startup = true
chat-template-kwargs = {"preserve_thinking": true} chat-template-kwargs = {"preserve_thinking": true}
[fast]
hf = byteshape/Qwen3.6-35B-A3B-MTP-GGUF:IQ4_XS
spec-type = ngram-simple,draft-mtp
spec-draft-n-max = 3
threads-batch = 16
temp = 0.6
top-p = 0.95
top-k = 20
min-p = 0.0
presence-penalty = 1.5
parallel = 1
c = 131072
chat-template-kwargs = {"preserve_thinking": true}