diff --git a/config/opencode/config.json b/config/opencode/config.json index 296d75a..b65dc32 100644 --- a/config/opencode/config.json +++ b/config/opencode/config.json @@ -9,7 +9,7 @@ "baseURL": "http://halo.fritz.box:8000/v1" }, "models": { - "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL": { "name" : "qwen3.6-35B-A3B" } + "unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL": { "name" : "qwen3.6-27B" } } }, "halo-8001": { diff --git a/systems/x86_64-linux/halo/llama-server.nix b/systems/x86_64-linux/halo/llama-server.nix index 9e655f1..84f1831 100644 --- a/systems/x86_64-linux/halo/llama-server.nix +++ b/systems/x86_64-linux/halo/llama-server.nix @@ -35,11 +35,11 @@ "--port 8000" "--no-mmap" "--n-gpu-layers 99" - "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL" - "--alias qwen3.6-35b-a3b" + "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL" + "--alias qwen3.6-27b" "--threads 8" "--ubatch-size 256" - "-ctk q8_0 -ctv q8_0" + "-ctk bf16 -ctv bf16" "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00" "--no-context-shift" ''--chat-template-kwargs '{"preserve_thinking": true}' ''