{ config, ... }: { # OpenAI-compatible gateway in front of halo's llama-server, exposed as a # shared endpoint across the Tailnet (per-key routing, logging, future cloud # fallback) so clients hit sgx:4000 instead of hardcoding halo's address. services.litellm = { enable = true; host = "0.0.0.0"; port = 4000; # 8080 is Open WebUI, 8081 is searx openFirewall = true; # reachable across the LAN environmentFile = config.sops.secrets."litellm/env".path; settings = { model_list = [ { # halo exposes the `[coder]` preset from systems/.../halo/models.ini. # llama-server speaks the OpenAI API, so route it as an openai/* model. model_name = "coder"; litellm_params = { model = "openai/coder"; api_base = "http://halo:8000/v1"; api_key = "none"; # llama-server requires no key; value is ignored }; } ]; general_settings = { master_key = "os.environ/LITELLM_MASTER_KEY"; }; litellm_settings = { drop_params = true; }; }; }; # Decrypted file must contain the env line: LITELLM_MASTER_KEY=sk-... # Read by systemd (as root) before dropping to litellm's DynamicUser. sops.secrets."litellm/env" = { sopsFile = ../../../.secrets/sgx/litellm.yaml; restartUnits = [ "litellm.service" ]; }; }