nixcfg/systems/x86_64-linux/sgx/litellm.nix

{ config, ... }:
{
  # OpenAI-compatible gateway in front of halo's llama-server, exposed as a
  # shared endpoint across the Tailnet (per-key routing, logging, future cloud
  # fallback) so clients hit sgx:4000 instead of hardcoding halo's address.
  services.litellm = {
    enable = true;
    host = "0.0.0.0";
    port = 4001; # 8080 Open WebUI, 8081 searx, 4000 uptime-kuma
    openFirewall = true; # reachable across the LAN
    environmentFile = config.sops.secrets."litellm/env".path;

    settings = {
      model_list = [
        {
          # halo exposes the `[coder]` preset from systems/.../halo/models.ini.
          # llama-server speaks the OpenAI API, so route it as an openai/* model.
          model_name = "coder";
          litellm_params = {
            model = "openai/coder";
            api_base = "http://halo:8000/v1";
            api_key = "none"; # llama-server requires no key; value is ignored
          };
        }
        {
          # Multilingual embeddings, also served by halo's router (the `[bge-m3]`
          # preset). Exposes /v1/embeddings on this gateway for the rag CLI.
          model_name = "bge-m3";
          litellm_params = {
            model = "openai/bge-m3";
            api_base = "http://halo:8000/v1";
            api_key = "none";
          };
        }
      ];

      general_settings = {
        master_key = "os.environ/LITELLM_MASTER_KEY";
      };

      litellm_settings = {
        drop_params = true;
      };
    };
  };

  # Decrypted file must contain the env line: LITELLM_MASTER_KEY=sk-...
  # Read by systemd (as root) before dropping to litellm's DynamicUser.
  sops.secrets."litellm/env" = {
    sopsFile = ../../../.secrets/sgx/litellm.yaml;
    restartUnits = [ "litellm.service" ];
  };
}