LiteLLM 1.75.5 in nixpkgs 25.11 crashes on /v1/embeddings with "RuntimeError: Stream consumed" (the proxy reads the request body twice). Source litellm from nixos-unstable (1.83.14) where this is resolved, so the gateway can proxy embeddings to halo's bge-m3 for any client.
56 lines
1.7 KiB
Nix
56 lines
1.7 KiB
Nix
{ channels, ... }:
|
|
final: prev: {
|
|
inherit (channels.unstable)
|
|
gemini-cli
|
|
opencode
|
|
tailscale
|
|
mistral-vibe
|
|
claude-code
|
|
qwen-code
|
|
litellm # 25.11's 1.75.5 has a /v1/embeddings "Stream consumed" bug
|
|
# llama-cpp-rocm
|
|
# open-webui
|
|
# vscode
|
|
# nodejs_20
|
|
;
|
|
|
|
# Tuned for Strix Halo (Ryzen AI Max+ 395 / Radeon 8060S, gfx1151).
|
|
llama-cpp-rocm =
|
|
(channels.unstable.llama-cpp.override {
|
|
rocmSupport = true;
|
|
rocmGpuTargets = [ "gfx1151" ];
|
|
}).overrideAttrs
|
|
(prevAttrs: {
|
|
version = "9264";
|
|
src = final.fetchFromGitHub {
|
|
owner = "ggml-org";
|
|
repo = "llama.cpp";
|
|
tag = "b9264";
|
|
hash = "sha256-LA4SgE20Dvz1g3degdIx4CYfYhVNEIQM5Q/5rDT/icg=";
|
|
leaveDotGit = true;
|
|
postFetch = ''
|
|
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
|
find "$out" -name .git -print0 | xargs -0 rm -rf
|
|
'';
|
|
};
|
|
npmDepsHash = "sha256-Iyg8FpcTKf2UYHuK7mA3cTAqVaLcQPcS0YCa5Qf01Gc=";
|
|
npmRoot = "tools/ui";
|
|
});
|
|
|
|
geekbench_6 = channels.unstable.geekbench_6.overrideAttrs (prevAttrs: rec {
|
|
version = "6.7.0";
|
|
src = prev.fetchurl (
|
|
{
|
|
"x86_64-linux" = {
|
|
url = "https://cdn.geekbench.com/Geekbench-${version}-Linux.tar.gz";
|
|
hash = "sha256-Snt3179Re/zwxop1pvzWF39TXXi8ZUBlNWB+v7+YE38=";
|
|
};
|
|
"aarch64-linux" = {
|
|
url = "https://cdn.geekbench.com/Geekbench-${version}-LinuxARMPreview.tar.gz";
|
|
hash = "sha256-GCAOKYyijaQPVBgAixoZRPHIdiUfV8mPeeflE7aX8Ac=";
|
|
};
|
|
}
|
|
.${prev.stdenv.system} or (throw "unsupported system ${prev.stdenv.hostPlatform.system}")
|
|
);
|
|
});
|
|
}
|