fix(halo): q6_k with mtp 2
This commit is contained in:
parent
baaab26eb7
commit
38d2d4f4ae
2 changed files with 6 additions and 6 deletions
|
|
@ -22,13 +22,13 @@ final: prev: {
|
|||
src = prev.fetchFromGitHub {
|
||||
owner = "am17an";
|
||||
repo = "llama.cpp";
|
||||
rev = "ebe4fca4b59ef8871bb07c34d148bc37fe57fadd";
|
||||
hash = "sha256-0ogXU/70e7TXtnhN8po0hT+GMEz2wWgob3ah4nBbmuw=";
|
||||
rev = "08b147428e7db0760acda2b4e0bd49f5b2ffe945";
|
||||
hash = "sha256-g74JX/ucsnNI1dWSrrzTUVVbZSg+ShIJfKLbjcD0+ac=";
|
||||
postFetch = ''
|
||||
echo -n "ebe4fca4b59ef8871bb07c34d148bc37fe57fadd" > $out/COMMIT
|
||||
echo -n "08b147428e7db0760acda2b4e0bd49f5b2ffe945" > $out/COMMIT
|
||||
'';
|
||||
};
|
||||
npmDepsHash = "sha256-cV3noOyKmst9vfxyvkCNhihPgwfVGhmPPT4UMloeWZM=";
|
||||
npmDepsHash = "sha256-WaEePrEZ7O/7deP2KJhe0AwiSKYA8HOqETmMHUkmBe0=";
|
||||
});
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@
|
|||
"--port 8000"
|
||||
"--no-mmap"
|
||||
"--n-gpu-layers 99"
|
||||
"-hf unsloth/Qwen3.6-27B-MTP-GGUF:UD-Q8_K_XL"
|
||||
"-hf unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K"
|
||||
"--alias halo-8000"
|
||||
"--threads 8"
|
||||
"--ubatch-size 256"
|
||||
|
|
@ -46,7 +46,7 @@
|
|||
"-c 262144"
|
||||
"--fit on"
|
||||
"--slot-save-path %C/llama-server/kv-slots-27B-MTP"
|
||||
"--spec-type mtp --spec-draft-n-max 3"
|
||||
"--spec-type draft-mtp --spec-draft-n-max 2"
|
||||
];
|
||||
Restart = "on-failure";
|
||||
RestartSec = 10;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue