feat(litellm): add LiteLLM gateway on sgx fronting halo's llama-server
Exposes an OpenAI-compatible endpoint on sgx:4000 (LAN-reachable) that routes the `coder` model to halo's llama-server, so clients get a stable gateway with per-key auth instead of hardcoding halo's address. Master key is sourced from a sops-encrypted env file.
This commit is contained in:
parent
ccd8750899
commit
fdefdf31b2
3 changed files with 79 additions and 0 deletions
35
.secrets/sgx/litellm.yaml
Normal file
35
.secrets/sgx/litellm.yaml
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
litellm:
|
||||||
|
env: ENC[AES256_GCM,data:422srY8SJ0sXOTX22BcNiOfFGutR6lJ2XjM/B7Gf2dqt92HtCG+IYSQPydLwSL7SN0zOrBni2E6Qk23NpaJfG855k68a9A==,iv:7VXIJjAoISxw+iOA1M/uU3FlUylgwAuu0LYYh68NdH0=,tag:L5RkDHVU/OUov2C61vZOdQ==,type:str]
|
||||||
|
sops:
|
||||||
|
age:
|
||||||
|
- recipient: age149fqcw5jze00vd7jauylrp4j5xyv7amlu57jjfuzghkqtzlnxajs704uz3
|
||||||
|
enc: |
|
||||||
|
-----BEGIN AGE ENCRYPTED FILE-----
|
||||||
|
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSAvZElNdGVTdDRBMlVMSnRE
|
||||||
|
anZLRzBJVCthVWdwZnlRTEN5MzExaVFabWljCnBzbTlIUXJuYVhBcmhqQ21aWDh4
|
||||||
|
NDRKZk0vZldIWXVmekU1MEQwd3ZaNWsKLS0tIHV1dGVUeW1RWDQ2cFFVQ3NXaEIv
|
||||||
|
d2NFSDZkZ2tEYWM5UTNDL2YvdXZsVGMKaNh9j1uG/lQfrManPKSIvzNstgdDw9nh
|
||||||
|
2ftjSTuxQgKk70E8vs1jTqi3aXvyH/08jrdJfWMSkaFwvvjG2ZFiIA==
|
||||||
|
-----END AGE ENCRYPTED FILE-----
|
||||||
|
- recipient: age1dwcz3fmp29ju4svy0t0wz4ylhpwlqa8xpw4l7t4gmgqr0ev37qrsfn840l
|
||||||
|
enc: |
|
||||||
|
-----BEGIN AGE ENCRYPTED FILE-----
|
||||||
|
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSA4K3YrR2FzdnJqci93eWJq
|
||||||
|
WnIycStpZTc0OUsrN09KdHg0bkc1NGNmcWhVCldzNWMxZktBVTBXUEJLYWRHMzIy
|
||||||
|
RXNTcjlDb2RMKzJTUytRTW1KRnB6OVkKLS0tIElwWmpyS25JRjZEZ0JVSlZ5WmpE
|
||||||
|
Z0UwZ200L2hEMk9hSjNId3pKNW93WnMKFSasBjoGXV4kkxs0v3e8BbFDXcC0Rc1U
|
||||||
|
n8eoU+Kzbg7luZXKrryfiFYWiqSqZGbZI8/7HmdToaPh8mKg+IAWEw==
|
||||||
|
-----END AGE ENCRYPTED FILE-----
|
||||||
|
- recipient: age1cpm9xhgue7sjvq7zyeeaxwr96c93sfzxxxj76sxsq7s7kgnygvcq5jxren
|
||||||
|
enc: |
|
||||||
|
-----BEGIN AGE ENCRYPTED FILE-----
|
||||||
|
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBIVUgySWtPTHBYendvUHE3
|
||||||
|
OU52ckVVOXoxTmUwMEN5aTdBUzQ0OG0zM3ljCmdzYUdKbUMzOWt2ZlpObXJJZUZh
|
||||||
|
ODllbnFPbFhZd0EzUlpObFU5Q2pqVkkKLS0tIGJoLy9HejVhaVBxT1lPb25LVWxT
|
||||||
|
TTBqZGluMW9zdnlVay8wU3ZRSm53YXcKdBkDgWN4yf1S9VT7JKyHeuMXuGc5DxW2
|
||||||
|
JyalmP7K+7Ux0kIDbLku1VjHtrHwIHdc7r/DeoRw+4yur961BjBtdA==
|
||||||
|
-----END AGE ENCRYPTED FILE-----
|
||||||
|
lastmodified: "2026-05-21T21:03:08Z"
|
||||||
|
mac: ENC[AES256_GCM,data:WNHW/Jya3OjZJiOLi1gjlNJAqpWegYbKoLBUT//guJXaN9XNQo0Kyjz6RqeOl9+MjIF3caldhGWffMfsRVaEupbgbdFYs4OZjUK7yIw/nRFtMiWZQdKc+21gE6BVXBHZEexx6uiDt7kJ8v14dQcEB8CrssBo3g1B8tZrTVjvnUY=,iv:lZwe6JX+xUiM5ke7ji+H/er/byixIzL9WxjgbjEjixU=,tag:WJ0B2rzNOm/Qr76rBXlFhw==,type:str]
|
||||||
|
unencrypted_suffix: _unencrypted
|
||||||
|
version: 3.12.1
|
||||||
|
|
@ -11,6 +11,7 @@
|
||||||
./mail.nix
|
./mail.nix
|
||||||
./wyoming.nix
|
./wyoming.nix
|
||||||
./searx.nix
|
./searx.nix
|
||||||
|
./litellm.nix
|
||||||
./uptime-kuma.nix
|
./uptime-kuma.nix
|
||||||
./firefly.nix
|
./firefly.nix
|
||||||
./opencode.nix
|
./opencode.nix
|
||||||
|
|
|
||||||
43
systems/x86_64-linux/sgx/litellm.nix
Normal file
43
systems/x86_64-linux/sgx/litellm.nix
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
{ config, ... }:
|
||||||
|
{
|
||||||
|
# OpenAI-compatible gateway in front of halo's llama-server, exposed as a
|
||||||
|
# shared endpoint across the Tailnet (per-key routing, logging, future cloud
|
||||||
|
# fallback) so clients hit sgx:4000 instead of hardcoding halo's address.
|
||||||
|
services.litellm = {
|
||||||
|
enable = true;
|
||||||
|
host = "0.0.0.0";
|
||||||
|
port = 4000; # 8080 is Open WebUI, 8081 is searx
|
||||||
|
openFirewall = true; # reachable across the LAN
|
||||||
|
environmentFile = config.sops.secrets."litellm/env".path;
|
||||||
|
|
||||||
|
settings = {
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
# halo exposes the `[coder]` preset from systems/.../halo/models.ini.
|
||||||
|
# llama-server speaks the OpenAI API, so route it as an openai/* model.
|
||||||
|
model_name = "coder";
|
||||||
|
litellm_params = {
|
||||||
|
model = "openai/coder";
|
||||||
|
api_base = "http://halo:8000/v1";
|
||||||
|
api_key = "none"; # llama-server requires no key; value is ignored
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
general_settings = {
|
||||||
|
master_key = "os.environ/LITELLM_MASTER_KEY";
|
||||||
|
};
|
||||||
|
|
||||||
|
litellm_settings = {
|
||||||
|
drop_params = true;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# Decrypted file must contain the env line: LITELLM_MASTER_KEY=sk-...
|
||||||
|
# Read by systemd (as root) before dropping to litellm's DynamicUser.
|
||||||
|
sops.secrets."litellm/env" = {
|
||||||
|
sopsFile = ../../../.secrets/sgx/litellm.yaml;
|
||||||
|
restartUnits = [ "litellm.service" ];
|
||||||
|
};
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue