{ pkgs, lib, ... }: { systemd.services.llama-server = { description = "llama.cpp server (Qwen3.6-27B-MTP, ROCm)"; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; wantedBy = [ "multi-user.target" ]; environment = { HOME = "%S/llama-server"; HF_HOME = "%S/llama-server"; }; serviceConfig = { Type = "simple"; DynamicUser = true; SupplementaryGroups = [ "video" "render" ]; StateDirectory = "llama-server"; CacheDirectory = "llama-server"; WorkingDirectory = "%S/llama-server"; ExecStartPre = "${pkgs.coreutils}/bin/mkdir -p %C/llama-server/kv-slots"; ExecStart = lib.concatStringsSep " " [ "${pkgs.llama-cpp-rocm}/bin/llama-server" "--flash-attn on" "--parallel 1" "--jinja" "--host 0.0.0.0" "--port 8000" "--no-mmap" "--n-gpu-layers 99" "-hf am17an/Qwen3.6-27B-MTP-GGUF:Q8_0" "--alias qwen3.6-27b" "--threads 8" "--ubatch-size 256" "-ctk bf16 -ctv bf16" "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00" "--no-context-shift" ''--chat-template-kwargs '{"preserve_thinking": true}' '' "-c 262144" "--fit on" "--slot-save-path %C/llama-server/kv-slots" "--spec-type mtp --spec-draft-n-max 3" ]; Restart = "on-failure"; RestartSec = 10; PrivateTmp = true; ProtectSystem = "strict"; ProtectHome = true; NoNewPrivileges = true; }; }; }