60 lines
1.5 KiB
Nix
60 lines
1.5 KiB
Nix
{
|
|
pkgs,
|
|
lib,
|
|
...
|
|
}:
|
|
{
|
|
systemd.services.llama-server = {
|
|
description = "llama.cpp server (Qwen3.6-27B-MTP, ROCm)";
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
wantedBy = [ "multi-user.target" ];
|
|
|
|
environment = {
|
|
HOME = "%S/llama-server";
|
|
HF_HOME = "%S/llama-server";
|
|
};
|
|
|
|
serviceConfig = {
|
|
Type = "simple";
|
|
DynamicUser = true;
|
|
SupplementaryGroups = [
|
|
"video"
|
|
"render"
|
|
];
|
|
StateDirectory = "llama-server";
|
|
CacheDirectory = "llama-server";
|
|
WorkingDirectory = "%S/llama-server";
|
|
ExecStartPre = "${pkgs.coreutils}/bin/mkdir -p %C/llama-server/kv-slots";
|
|
ExecStart = lib.concatStringsSep " " [
|
|
"${pkgs.llama-cpp-rocm}/bin/llama-server"
|
|
"--flash-attn on"
|
|
"--parallel 1"
|
|
"--jinja"
|
|
"--host 0.0.0.0"
|
|
"--port 8000"
|
|
"--no-mmap"
|
|
"--n-gpu-layers 99"
|
|
"-hf am17an/Qwen3.6-27B-MTP-GGUF:Q8_0"
|
|
"--alias qwen3.6-27b"
|
|
"--threads 8"
|
|
"--ubatch-size 256"
|
|
"-ctk bf16 -ctv bf16"
|
|
"--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00"
|
|
"--no-context-shift"
|
|
''--chat-template-kwargs '{"preserve_thinking": true}' ''
|
|
"-c 262144"
|
|
"--fit on"
|
|
"--slot-save-path %C/llama-server/kv-slots"
|
|
"--spec-type mtp --spec-draft-n-max 3"
|
|
];
|
|
Restart = "on-failure";
|
|
RestartSec = 10;
|
|
|
|
PrivateTmp = true;
|
|
ProtectSystem = "strict";
|
|
ProtectHome = true;
|
|
NoNewPrivileges = true;
|
|
};
|
|
};
|
|
}
|