{ pkgs, lib, ... }: { systemd.services.llama-server = { description = "llama.cpp server (Qwen3.6-35B-A3B, ROCm)"; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; wantedBy = [ "multi-user.target" ]; environment = { HOME = "%S/llama-server"; HF_HOME = "%S/llama-server"; HSA_OVERRIDE_GFX_VERSION = lib.mkDefault "11.0.0"; }; serviceConfig = { Type = "simple"; DynamicUser = true; SupplementaryGroups = [ "video" "render" ]; StateDirectory = "llama-server"; CacheDirectory = "llama-server"; WorkingDirectory = "%S/llama-server"; ExecStartPre = "${pkgs.coreutils}/bin/mkdir -p %C/llama-server/kv-slots"; ExecStart = lib.concatStringsSep " " [ "${pkgs.llama-cpp-rocm}/bin/llama-server" "--flash-attn on" "--parallel 2" "--jinja" "--host 0.0.0.0" "--port 8000" "--no-mmap" "--n-gpu-layers 99" "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL" "--alias qwen3.6-35b-a3b" "--threads 8" "--ubatch-size 256" "-ctk q8_0 -ctv q8_0" "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00" "--no-context-shift" ''--chat-template-kwargs '{"preserve_thinking": true}' '' "-c 524288" "--fit on" "--slot-save-path %C/llama-server/kv-slots" ]; Restart = "on-failure"; RestartSec = 10; PrivateTmp = true; ProtectSystem = "strict"; ProtectHome = true; NoNewPrivileges = true; }; }; }