feat(litellm): add LiteLLM gateway on sgx fronting halo's llama-server
Exposes an OpenAI-compatible endpoint on sgx:4000 (LAN-reachable) that routes the `coder` model to halo's llama-server, so clients get a stable gateway with per-key auth instead of hardcoding halo's address. Master key is sourced from a sops-encrypted env file.
This commit is contained in:
parent
ccd8750899
commit
fdefdf31b2
3 changed files with 79 additions and 0 deletions
43
systems/x86_64-linux/sgx/litellm.nix
Normal file
43
systems/x86_64-linux/sgx/litellm.nix
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
{ config, ... }:
|
||||
{
|
||||
# OpenAI-compatible gateway in front of halo's llama-server, exposed as a
|
||||
# shared endpoint across the Tailnet (per-key routing, logging, future cloud
|
||||
# fallback) so clients hit sgx:4000 instead of hardcoding halo's address.
|
||||
services.litellm = {
|
||||
enable = true;
|
||||
host = "0.0.0.0";
|
||||
port = 4000; # 8080 is Open WebUI, 8081 is searx
|
||||
openFirewall = true; # reachable across the LAN
|
||||
environmentFile = config.sops.secrets."litellm/env".path;
|
||||
|
||||
settings = {
|
||||
model_list = [
|
||||
{
|
||||
# halo exposes the `[coder]` preset from systems/.../halo/models.ini.
|
||||
# llama-server speaks the OpenAI API, so route it as an openai/* model.
|
||||
model_name = "coder";
|
||||
litellm_params = {
|
||||
model = "openai/coder";
|
||||
api_base = "http://halo:8000/v1";
|
||||
api_key = "none"; # llama-server requires no key; value is ignored
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
general_settings = {
|
||||
master_key = "os.environ/LITELLM_MASTER_KEY";
|
||||
};
|
||||
|
||||
litellm_settings = {
|
||||
drop_params = true;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Decrypted file must contain the env line: LITELLM_MASTER_KEY=sk-...
|
||||
# Read by systemd (as root) before dropping to litellm's DynamicUser.
|
||||
sops.secrets."litellm/env" = {
|
||||
sopsFile = ../../../.secrets/sgx/litellm.yaml;
|
||||
restartUnits = [ "litellm.service" ];
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue