feat(sgx): add CLI RAG stack (Qdrant + embeddings gateway + rag tool)
Stand up document retrieval as shared, client-agnostic primitives rather than locking it inside Open WebUI: - Qdrant as the LAN-reachable vector store - LiteLLM gains a bge-m3 route so sgx:4000 also serves /v1/embeddings - a thin `rag` CLI (ingest/query, optional coder synthesis) usable from any machine and from scripts Embeddings and synthesis run on halo via the gateway; the CLI is configured entirely through RAG_* env vars.
This commit is contained in:
parent
ab729a0720
commit
95668b71a7
4 changed files with 180 additions and 0 deletions
|
|
@ -12,6 +12,7 @@
|
|||
./wyoming.nix
|
||||
./searx.nix
|
||||
./litellm.nix
|
||||
./qdrant.nix
|
||||
./uptime-kuma.nix
|
||||
./firefly.nix
|
||||
./opencode.nix
|
||||
|
|
@ -25,6 +26,7 @@
|
|||
environment.systemPackages = with pkgs; [
|
||||
claude-code
|
||||
opencode
|
||||
metacfg.rag
|
||||
];
|
||||
|
||||
services.tailscale.enable = true;
|
||||
|
|
|
|||
|
|
@ -22,6 +22,16 @@
|
|||
api_key = "none"; # llama-server requires no key; value is ignored
|
||||
};
|
||||
}
|
||||
{
|
||||
# Multilingual embeddings, also served by halo's router (the `[bge-m3]`
|
||||
# preset). Exposes /v1/embeddings on this gateway for the rag CLI.
|
||||
model_name = "bge-m3";
|
||||
litellm_params = {
|
||||
model = "openai/bge-m3";
|
||||
api_base = "http://halo:8000/v1";
|
||||
api_key = "none";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
general_settings = {
|
||||
|
|
|
|||
9
systems/x86_64-linux/sgx/qdrant.nix
Normal file
9
systems/x86_64-linux/sgx/qdrant.nix
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
_: {
|
||||
# Shared vector store for RAG, queried from any LAN machine by the rag CLI.
|
||||
services.qdrant = {
|
||||
enable = true;
|
||||
settings.service.host = "0.0.0.0"; # default 127.0.0.1; LAN-reachable
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 6333 ]; # HTTP/REST API
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue