diff --git a/systems/x86_64-linux/halo/default.nix b/systems/x86_64-linux/halo/default.nix index 1934cd8..c25e491 100644 --- a/systems/x86_64-linux/halo/default.nix +++ b/systems/x86_64-linux/halo/default.nix @@ -10,7 +10,7 @@ with lib.metacfg; ./hardware-configuration.nix #./xremap.nix ./wyoming.nix - ./llama-server.nix + ./llama-server-27B.nix ]; boot.lanzaboote.pkiBundle = "/var/lib/sbctl"; diff --git a/systems/x86_64-linux/halo/llama-server.nix b/systems/x86_64-linux/halo/llama-server.nix index b6ad09c..340b775 100644 --- a/systems/x86_64-linux/halo/llama-server.nix +++ b/systems/x86_64-linux/halo/llama-server.nix @@ -29,7 +29,7 @@ ExecStart = lib.concatStringsSep " " [ "${pkgs.llama-cpp-rocm}/bin/llama-server" "--flash-attn on" - "--parallel 2" + "--parallel 1" "--jinja" "--host 0.0.0.0" "--port 8000" @@ -43,7 +43,7 @@ "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00" "--no-context-shift" ''--chat-template-kwargs '{"preserve_thinking": true}' '' - "-c 524288" + "-c 262144" "--fit on" "--slot-save-path %C/llama-server/kv-slots" ];