feat(halo): add llama-server systemd unit for Qwen3.6-35B-A3B

Runs llama.cpp's ROCm build under DynamicUser, with the HF model cache in StateDirectory (survives systemctl clean) and KV slot saves in CacheDirectory. Listens on :8000. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 10:02:51 +02:00 · 2026-05-05 10:02:51 +02:00 · b11e5c8356
commit b11e5c8356
parent 603e435db8
2 changed files with 61 additions and 0 deletions
--- a/systems/x86_64-linux/halo/llama-server.nix
+++ b/systems/x86_64-linux/halo/llama-server.nix
@ -0,0 +1,60 @@
+{
+  pkgs,
+  lib,
+  ...
+}:
+{
+  systemd.services.llama-server = {
+    description = "llama.cpp server (Qwen3.6-35B-A3B, ROCm)";
+    after = [ "network-online.target" ];
+    wants = [ "network-online.target" ];
+    wantedBy = [ "multi-user.target" ];
+
+    environment = {
+      HOME = "%S/llama-server";
+      HF_HOME = "%S/llama-server";
+      HSA_OVERRIDE_GFX_VERSION = lib.mkDefault "11.0.0";
+    };
+
+    serviceConfig = {
+      Type = "simple";
+      DynamicUser = true;
+      SupplementaryGroups = [
+        "video"
+        "render"
+      ];
+      StateDirectory = "llama-server";
+      CacheDirectory = "llama-server";
+      WorkingDirectory = "%S/llama-server";
+      ExecStartPre = "${pkgs.coreutils}/bin/mkdir -p %C/llama-server/kv-slots";
+      ExecStart = lib.concatStringsSep " " [
+        "${pkgs.llama-cpp-rocm}/bin/llama-server"
+        "--flash-attn on"
+        "--parallel 2"
+        "--jinja"
+        "--host 0.0.0.0"
+        "--port 8000"
+        "--no-mmap"
+        "--n-gpu-layers 99"
+        "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL"
+        "--alias qwen3.6-35b-a3b"
+        "--threads 8"
+        "--ubatch-size 256"
+        "-ctk q8_0 -ctv q8_0"
+        "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00"
+        "--no-context-shift"
+        ''--chat-template-kwargs '{"preserve_thinking": true}' ''
+        "-c 524288"
+        "--fit on"
+        "--slot-save-path %C/llama-server/kv-slots"
+      ];
+      Restart = "on-failure";
+      RestartSec = 10;
+
+      PrivateTmp = true;
+      ProtectSystem = "strict";
+      ProtectHome = true;
+      NoNewPrivileges = true;
+    };
+  };
+}