From fdefdf31b2b25bd5cb2b538eab2be396a077e7e3 Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Thu, 21 May 2026 23:08:15 +0200 Subject: [PATCH] feat(litellm): add LiteLLM gateway on sgx fronting halo's llama-server Exposes an OpenAI-compatible endpoint on sgx:4000 (LAN-reachable) that routes the `coder` model to halo's llama-server, so clients get a stable gateway with per-key auth instead of hardcoding halo's address. Master key is sourced from a sops-encrypted env file. --- .secrets/sgx/litellm.yaml | 35 ++++++++++++++++++++++ systems/x86_64-linux/sgx/default.nix | 1 + systems/x86_64-linux/sgx/litellm.nix | 43 ++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 .secrets/sgx/litellm.yaml create mode 100644 systems/x86_64-linux/sgx/litellm.nix diff --git a/.secrets/sgx/litellm.yaml b/.secrets/sgx/litellm.yaml new file mode 100644 index 0000000..1475ecc --- /dev/null +++ b/.secrets/sgx/litellm.yaml @@ -0,0 +1,35 @@ +litellm: + env: ENC[AES256_GCM,data:422srY8SJ0sXOTX22BcNiOfFGutR6lJ2XjM/B7Gf2dqt92HtCG+IYSQPydLwSL7SN0zOrBni2E6Qk23NpaJfG855k68a9A==,iv:7VXIJjAoISxw+iOA1M/uU3FlUylgwAuu0LYYh68NdH0=,tag:L5RkDHVU/OUov2C61vZOdQ==,type:str] +sops: + age: + - recipient: age149fqcw5jze00vd7jauylrp4j5xyv7amlu57jjfuzghkqtzlnxajs704uz3 + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSAvZElNdGVTdDRBMlVMSnRE + anZLRzBJVCthVWdwZnlRTEN5MzExaVFabWljCnBzbTlIUXJuYVhBcmhqQ21aWDh4 + NDRKZk0vZldIWXVmekU1MEQwd3ZaNWsKLS0tIHV1dGVUeW1RWDQ2cFFVQ3NXaEIv + d2NFSDZkZ2tEYWM5UTNDL2YvdXZsVGMKaNh9j1uG/lQfrManPKSIvzNstgdDw9nh + 2ftjSTuxQgKk70E8vs1jTqi3aXvyH/08jrdJfWMSkaFwvvjG2ZFiIA== + -----END AGE ENCRYPTED FILE----- + - recipient: age1dwcz3fmp29ju4svy0t0wz4ylhpwlqa8xpw4l7t4gmgqr0ev37qrsfn840l + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSA4K3YrR2FzdnJqci93eWJq + WnIycStpZTc0OUsrN09KdHg0bkc1NGNmcWhVCldzNWMxZktBVTBXUEJLYWRHMzIy + RXNTcjlDb2RMKzJTUytRTW1KRnB6OVkKLS0tIElwWmpyS25JRjZEZ0JVSlZ5WmpE + Z0UwZ200L2hEMk9hSjNId3pKNW93WnMKFSasBjoGXV4kkxs0v3e8BbFDXcC0Rc1U + n8eoU+Kzbg7luZXKrryfiFYWiqSqZGbZI8/7HmdToaPh8mKg+IAWEw== + -----END AGE ENCRYPTED FILE----- + - recipient: age1cpm9xhgue7sjvq7zyeeaxwr96c93sfzxxxj76sxsq7s7kgnygvcq5jxren + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBIVUgySWtPTHBYendvUHE3 + OU52ckVVOXoxTmUwMEN5aTdBUzQ0OG0zM3ljCmdzYUdKbUMzOWt2ZlpObXJJZUZh + ODllbnFPbFhZd0EzUlpObFU5Q2pqVkkKLS0tIGJoLy9HejVhaVBxT1lPb25LVWxT + TTBqZGluMW9zdnlVay8wU3ZRSm53YXcKdBkDgWN4yf1S9VT7JKyHeuMXuGc5DxW2 + JyalmP7K+7Ux0kIDbLku1VjHtrHwIHdc7r/DeoRw+4yur961BjBtdA== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2026-05-21T21:03:08Z" + mac: ENC[AES256_GCM,data:WNHW/Jya3OjZJiOLi1gjlNJAqpWegYbKoLBUT//guJXaN9XNQo0Kyjz6RqeOl9+MjIF3caldhGWffMfsRVaEupbgbdFYs4OZjUK7yIw/nRFtMiWZQdKc+21gE6BVXBHZEexx6uiDt7kJ8v14dQcEB8CrssBo3g1B8tZrTVjvnUY=,iv:lZwe6JX+xUiM5ke7ji+H/er/byixIzL9WxjgbjEjixU=,tag:WJ0B2rzNOm/Qr76rBXlFhw==,type:str] + unencrypted_suffix: _unencrypted + version: 3.12.1 diff --git a/systems/x86_64-linux/sgx/default.nix b/systems/x86_64-linux/sgx/default.nix index 2e7e6e7..c4290a0 100644 --- a/systems/x86_64-linux/sgx/default.nix +++ b/systems/x86_64-linux/sgx/default.nix @@ -11,6 +11,7 @@ ./mail.nix ./wyoming.nix ./searx.nix + ./litellm.nix ./uptime-kuma.nix ./firefly.nix ./opencode.nix diff --git a/systems/x86_64-linux/sgx/litellm.nix b/systems/x86_64-linux/sgx/litellm.nix new file mode 100644 index 0000000..89095fe --- /dev/null +++ b/systems/x86_64-linux/sgx/litellm.nix @@ -0,0 +1,43 @@ +{ config, ... }: +{ + # OpenAI-compatible gateway in front of halo's llama-server, exposed as a + # shared endpoint across the Tailnet (per-key routing, logging, future cloud + # fallback) so clients hit sgx:4000 instead of hardcoding halo's address. + services.litellm = { + enable = true; + host = "0.0.0.0"; + port = 4000; # 8080 is Open WebUI, 8081 is searx + openFirewall = true; # reachable across the LAN + environmentFile = config.sops.secrets."litellm/env".path; + + settings = { + model_list = [ + { + # halo exposes the `[coder]` preset from systems/.../halo/models.ini. + # llama-server speaks the OpenAI API, so route it as an openai/* model. + model_name = "coder"; + litellm_params = { + model = "openai/coder"; + api_base = "http://halo:8000/v1"; + api_key = "none"; # llama-server requires no key; value is ignored + }; + } + ]; + + general_settings = { + master_key = "os.environ/LITELLM_MASTER_KEY"; + }; + + litellm_settings = { + drop_params = true; + }; + }; + }; + + # Decrypted file must contain the env line: LITELLM_MASTER_KEY=sk-... + # Read by systemd (as root) before dropping to litellm's DynamicUser. + sops.secrets."litellm/env" = { + sopsFile = ../../../.secrets/sgx/litellm.yaml; + restartUnits = [ "litellm.service" ]; + }; +}