Stand up document retrieval as shared, client-agnostic primitives rather than locking it inside Open WebUI: - Qdrant as the LAN-reachable vector store - LiteLLM gains a bge-m3 route so sgx:4000 also serves /v1/embeddings - a thin `rag` CLI (ingest/query, optional coder synthesis) usable from any machine and from scripts Embeddings and synthesis run on halo via the gateway; the CLI is configured entirely through RAG_* env vars.
9 lines
272 B
Nix
9 lines
272 B
Nix
_: {
|
|
# Shared vector store for RAG, queried from any LAN machine by the rag CLI.
|
|
services.qdrant = {
|
|
enable = true;
|
|
settings.service.host = "0.0.0.0"; # default 127.0.0.1; LAN-reachable
|
|
};
|
|
|
|
networking.firewall.allowedTCPPorts = [ 6333 ]; # HTTP/REST API
|
|
}
|