feat(halo): add different llama servers

This commit is contained in:
Harald Hoyer 2026-05-07 14:34:58 +02:00
parent b548126fb8
commit d47bb6e15b
4 changed files with 122 additions and 6 deletions

View file

@ -0,0 +1,57 @@
{
pkgs,
lib,
...
}:
{
systemd.services.llama-server = {
description = "llama.cpp server (Qwen3-Coder-Next, ROCm)";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
environment = {
HOME = "%S/llama-server";
HF_HOME = "%S/llama-server";
};
serviceConfig = {
Type = "simple";
DynamicUser = true;
SupplementaryGroups = [
"video"
"render"
];
StateDirectory = "llama-server";
CacheDirectory = "llama-server";
WorkingDirectory = "%S/llama-server";
ExecStartPre = "${pkgs.coreutils}/bin/mkdir -p %C/llama-server/kv-slots-coder-next";
ExecStart = lib.concatStringsSep " " [
"${pkgs.llama-cpp-rocm}/bin/llama-server"
"--flash-attn on"
"--parallel 1"
"--jinja"
"--host 0.0.0.0"
"--port 8000"
"--no-mmap"
"--n-gpu-layers 99"
"--threads 8"
"--ubatch-size 256"
"-ctk bf16 -ctv bf16"
"--fit on"
"--no-context-shift"
"-hf unsloth/Qwen3-Coder-Next-GGUF:UD-Q8_K_XL"
"--alias qwen3-coder-next"
"--temp 1.0 --top-p 0.95 --min-p 0.01 --top-k 40"
"--slot-save-path %C/llama-server/kv-slots-coder-next"
];
Restart = "on-failure";
RestartSec = 10;
PrivateTmp = true;
ProtectSystem = "strict";
ProtectHome = true;
NoNewPrivileges = true;
};
};
}