nixcfg/systems/x86_64-linux/halo/llama-server.nix
Harald Hoyer 0edf975c30 feat(halo): serve multiple llama models via models.ini preset
Replace the per-model llama-server units with a single service that
uses llama-server's --models-preset (models.ini) and --models-max 2,
so the 35B-A3B and 27B models are loaded on demand from one config.

Drop the now-redundant 27B / 27B-MTP / coder-next variant files and
the unused CacheDirectory + slot-save-path KV-slot handling.
2026-05-20 00:23:50 +02:00

43 lines
972 B
Nix

{
pkgs,
lib,
...
}:
{
systemd.services.llama-server = {
description = "llama.cpp server (multi-model preset, ROCm)";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
environment = {
HOME = "%S/llama-server";
HF_HOME = "%S/llama-server";
};
serviceConfig = {
Type = "simple";
DynamicUser = true;
SupplementaryGroups = [
"video"
"render"
];
StateDirectory = "llama-server";
WorkingDirectory = "%S/llama-server";
ExecStart = lib.concatStringsSep " " [
"${pkgs.llama-cpp-rocm}/bin/llama-server"
"--host 0.0.0.0"
"--port 8000"
"--models-preset ${./models.ini}"
"--models-max 2"
];
Restart = "on-failure";
RestartSec = 10;
PrivateTmp = true;
ProtectSystem = "strict";
ProtectHome = true;
NoNewPrivileges = true;
};
};
}