{ pkgs, lib, ... }: { systemd.services.llama-server = { description = "llama.cpp server (Qwen3-Coder-Next, ROCm)"; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; wantedBy = [ "multi-user.target" ]; environment = { HOME = "%S/llama-server"; HF_HOME = "%S/llama-server"; }; serviceConfig = { Type = "simple"; DynamicUser = true; SupplementaryGroups = [ "video" "render" ]; StateDirectory = "llama-server"; CacheDirectory = "llama-server"; WorkingDirectory = "%S/llama-server"; ExecStartPre = "${pkgs.coreutils}/bin/mkdir -p %C/llama-server/kv-slots-coder-next"; ExecStart = lib.concatStringsSep " " [ "${pkgs.llama-cpp-rocm}/bin/llama-server" "--flash-attn on" "--parallel 1" "--jinja" "--host 0.0.0.0" "--port 8000" "--no-mmap" "--n-gpu-layers 99" "--threads 8" "--ubatch-size 256" "-ctk bf16 -ctv bf16" "--fit on" "--no-context-shift" "-hf unsloth/Qwen3-Coder-Next-GGUF:UD-Q8_K_XL" "--alias qwen3-coder-next" "--temp 1.0 --top-p 0.95 --min-p 0.01 --top-k 40" "--slot-save-path %C/llama-server/kv-slots-coder-next" ]; Restart = "on-failure"; RestartSec = 10; PrivateTmp = true; ProtectSystem = "strict"; ProtectHome = true; NoNewPrivileges = true; }; }; }