feat(halo): use qwen-35b-a3b

This commit is contained in:
Harald Hoyer 2026-05-07 16:58:03 +02:00
parent 267c05b107
commit bef528e26a
3 changed files with 62 additions and 3 deletions

View file

@ -35,8 +35,8 @@
"--port 8000"
"--no-mmap"
"--n-gpu-layers 99"
"-hf unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL"
"--alias qwen3.6-27b"
"-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL"
"--alias qwen3.6-35b-a3b"
"--threads 8"
"--ubatch-size 256"
"-ctk bf16 -ctv bf16"