Replace the per-model llama-server units with a single service that uses llama-server's --models-preset (models.ini) and --models-max 2, so the 35B-A3B and 27B models are loaded on demand from one config. Drop the now-redundant 27B / 27B-MTP / coder-next variant files and the unused CacheDirectory + slot-save-path KV-slot handling.
43 lines
972 B
Nix
43 lines
972 B
Nix
{
|
|
pkgs,
|
|
lib,
|
|
...
|
|
}:
|
|
{
|
|
systemd.services.llama-server = {
|
|
description = "llama.cpp server (multi-model preset, ROCm)";
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
wantedBy = [ "multi-user.target" ];
|
|
|
|
environment = {
|
|
HOME = "%S/llama-server";
|
|
HF_HOME = "%S/llama-server";
|
|
};
|
|
|
|
serviceConfig = {
|
|
Type = "simple";
|
|
DynamicUser = true;
|
|
SupplementaryGroups = [
|
|
"video"
|
|
"render"
|
|
];
|
|
StateDirectory = "llama-server";
|
|
WorkingDirectory = "%S/llama-server";
|
|
ExecStart = lib.concatStringsSep " " [
|
|
"${pkgs.llama-cpp-rocm}/bin/llama-server"
|
|
"--host 0.0.0.0"
|
|
"--port 8000"
|
|
"--models-preset ${./models.ini}"
|
|
"--models-max 2"
|
|
];
|
|
Restart = "on-failure";
|
|
RestartSec = 10;
|
|
|
|
PrivateTmp = true;
|
|
ProtectSystem = "strict";
|
|
ProtectHome = true;
|
|
NoNewPrivileges = true;
|
|
};
|
|
};
|
|
}
|