chore(halo): add fast model
This commit is contained in:
parent
6fd6060dd7
commit
fc3e40686a
1 changed files with 15 additions and 0 deletions
|
|
@ -41,3 +41,18 @@ parallel = 2
|
||||||
c = 524288
|
c = 524288
|
||||||
load-on-startup = true
|
load-on-startup = true
|
||||||
chat-template-kwargs = {"preserve_thinking": true}
|
chat-template-kwargs = {"preserve_thinking": true}
|
||||||
|
|
||||||
|
[fast]
|
||||||
|
hf = byteshape/Qwen3.6-35B-A3B-MTP-GGUF:IQ4_XS
|
||||||
|
spec-type = ngram-simple,draft-mtp
|
||||||
|
spec-draft-n-max = 3
|
||||||
|
threads-batch = 16
|
||||||
|
temp = 0.6
|
||||||
|
top-p = 0.95
|
||||||
|
top-k = 20
|
||||||
|
min-p = 0.0
|
||||||
|
presence-penalty = 1.5
|
||||||
|
parallel = 1
|
||||||
|
c = 131072
|
||||||
|
chat-template-kwargs = {"preserve_thinking": true}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue