nixcfg/systems/x86_64-linux/sgx/litellm.nix
Harald Hoyer 116d04665d fix(sgx): move LiteLLM off port 4000 to avoid uptime-kuma clash
Uptime Kuma already binds 4000, so the gateway never got the port and
requests hit the wrong service. Move LiteLLM to 4001 and update the rag
CLI default endpoint to match.
2026-05-22 07:08:26 +02:00

53 lines
1.7 KiB
Nix

{ config, ... }:
{
# OpenAI-compatible gateway in front of halo's llama-server, exposed as a
# shared endpoint across the Tailnet (per-key routing, logging, future cloud
# fallback) so clients hit sgx:4000 instead of hardcoding halo's address.
services.litellm = {
enable = true;
host = "0.0.0.0";
port = 4001; # 8080 Open WebUI, 8081 searx, 4000 uptime-kuma
openFirewall = true; # reachable across the LAN
environmentFile = config.sops.secrets."litellm/env".path;
settings = {
model_list = [
{
# halo exposes the `[coder]` preset from systems/.../halo/models.ini.
# llama-server speaks the OpenAI API, so route it as an openai/* model.
model_name = "coder";
litellm_params = {
model = "openai/coder";
api_base = "http://halo:8000/v1";
api_key = "none"; # llama-server requires no key; value is ignored
};
}
{
# Multilingual embeddings, also served by halo's router (the `[bge-m3]`
# preset). Exposes /v1/embeddings on this gateway for the rag CLI.
model_name = "bge-m3";
litellm_params = {
model = "openai/bge-m3";
api_base = "http://halo:8000/v1";
api_key = "none";
};
}
];
general_settings = {
master_key = "os.environ/LITELLM_MASTER_KEY";
};
litellm_settings = {
drop_params = true;
};
};
};
# Decrypted file must contain the env line: LITELLM_MASTER_KEY=sk-...
# Read by systemd (as root) before dropping to litellm's DynamicUser.
sops.secrets."litellm/env" = {
sopsFile = ../../../.secrets/sgx/litellm.yaml;
restartUnits = [ "litellm.service" ];
};
}