From f62e8ac4708eb1c45b62aa53267704e4d080700c Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Wed, 6 May 2026 09:13:54 +0200 Subject: [PATCH] perf(llama-cpp-rocm): tune for Strix Halo (gfx1151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Restrict rocmGpuTargets to gfx1151 (Radeon 8060S, RDNA 3.5) — smaller closure, faster compile, no wasted device kernels. - Enable GGML_HIP_ROCWMMA_FATTN: rocWMMA-backed flash attention is a major win on RDNA3+ for the GPU-offloaded attention path. - Enable GGML_HIP_GRAPHS to lower per-token launch overhead. - Add rocwmma to buildInputs to satisfy the WMMA path. llama-server on halo runs with -ngl 99 --flash-attn on, so these flags target the hot path. CPU-side AVX-512 was skipped intentionally — Zen 5 has it, but with full GPU offload the CPU paths barely run. --- overlays/unstable/default.nix | 39 ++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/overlays/unstable/default.nix b/overlays/unstable/default.nix index 9ec328e..4ba1183 100644 --- a/overlays/unstable/default.nix +++ b/overlays/unstable/default.nix @@ -12,18 +12,33 @@ final: prev: { # nodejs_20 ; - llama-cpp-rocm = channels.unstable.llama-cpp-rocm.overrideAttrs (_: { - src = prev.fetchFromGitHub { - owner = "am17an"; - repo = "llama.cpp"; - rev = "267f8afe857b7bd1a49e4fde9138ab0f7be36625"; - hash = "sha256-VYvRjnNZpPE60wjpBVO1FbURMRRMg71sM5kBxiEkElk="; - postFetch = '' - echo -n "267f8af" > $out/COMMIT - ''; - }; - npmDepsHash = "sha256-k62LIbyY2DXvs7XXbX0lNPiYxuYzeJUyQtS4eA+68f8="; - }); + # Tuned for Strix Halo (Ryzen AI Max+ 395 / Radeon 8060S, gfx1151). + llama-cpp-rocm = + (channels.unstable.llama-cpp.override { + rocmSupport = true; + rocmGpuTargets = [ "gfx1151" ]; + }).overrideAttrs + (prevAttrs: { + src = prev.fetchFromGitHub { + owner = "am17an"; + repo = "llama.cpp"; + rev = "267f8afe857b7bd1a49e4fde9138ab0f7be36625"; + hash = "sha256-VYvRjnNZpPE60wjpBVO1FbURMRRMg71sM5kBxiEkElk="; + postFetch = '' + echo -n "267f8af" > $out/COMMIT + ''; + }; + npmDepsHash = "sha256-k62LIbyY2DXvs7XXbX0lNPiYxuYzeJUyQtS4eA+68f8="; + + buildInputs = (prevAttrs.buildInputs or [ ]) ++ [ + channels.unstable.rocmPackages.rocwmma + ]; + + cmakeFlags = (prevAttrs.cmakeFlags or [ ]) ++ [ + "-DGGML_HIP_ROCWMMA_FATTN=ON" + "-DGGML_HIP_GRAPHS=ON" + ]; + }); /* gnome-remote-desktop = channels.unstable.gnome-remote-desktop.overrideAttrs (prevAttrs: {