diff --git a/systems/x86_64-linux/halo/models.ini b/systems/x86_64-linux/halo/models.ini index 6632557..d2ec173 100644 --- a/systems/x86_64-linux/halo/models.ini +++ b/systems/x86_64-linux/halo/models.ini @@ -6,7 +6,7 @@ parallel = 1 jinja = true n-gpu-layers = 99 threads = 8 -ubatch-size = 512 +ubatch-size = 256 cache-type-k = bf16 cache-type-v = bf16 mmap = false