fix(halo): q6_k with mtp 2

This commit is contained in:
Harald Hoyer 2026-05-15 07:47:43 +02:00
parent baaab26eb7
commit 38d2d4f4ae
2 changed files with 6 additions and 6 deletions

View file

@ -22,13 +22,13 @@ final: prev: {
src = prev.fetchFromGitHub { src = prev.fetchFromGitHub {
owner = "am17an"; owner = "am17an";
repo = "llama.cpp"; repo = "llama.cpp";
rev = "ebe4fca4b59ef8871bb07c34d148bc37fe57fadd"; rev = "08b147428e7db0760acda2b4e0bd49f5b2ffe945";
hash = "sha256-0ogXU/70e7TXtnhN8po0hT+GMEz2wWgob3ah4nBbmuw="; hash = "sha256-g74JX/ucsnNI1dWSrrzTUVVbZSg+ShIJfKLbjcD0+ac=";
postFetch = '' postFetch = ''
echo -n "ebe4fca4b59ef8871bb07c34d148bc37fe57fadd" > $out/COMMIT echo -n "08b147428e7db0760acda2b4e0bd49f5b2ffe945" > $out/COMMIT
''; '';
}; };
npmDepsHash = "sha256-cV3noOyKmst9vfxyvkCNhihPgwfVGhmPPT4UMloeWZM="; npmDepsHash = "sha256-WaEePrEZ7O/7deP2KJhe0AwiSKYA8HOqETmMHUkmBe0=";
}); });
/* /*

View file

@ -35,7 +35,7 @@
"--port 8000" "--port 8000"
"--no-mmap" "--no-mmap"
"--n-gpu-layers 99" "--n-gpu-layers 99"
"-hf unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL" "-hf unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K"
"--alias halo-8000" "--alias halo-8000"
"--threads 8" "--threads 8"
"--ubatch-size 256" "--ubatch-size 256"
@ -46,7 +46,7 @@
"-c 262144" "-c 262144"
"--fit on" "--fit on"
"--slot-save-path %C/llama-server/kv-slots-27B-MTP" "--slot-save-path %C/llama-server/kv-slots-27B-MTP"
"--spec-type mtp --spec-draft-n-max 3" "--spec-type draft-mtp --spec-draft-n-max 2"
]; ];
Restart = "on-failure"; Restart = "on-failure";
RestartSec = 10; RestartSec = 10;