diff --git a/systems/x86_64-linux/halo/default.nix b/systems/x86_64-linux/halo/default.nix
index 41b5ebd..1934cd8 100644
--- a/systems/x86_64-linux/halo/default.nix
+++ b/systems/x86_64-linux/halo/default.nix
@@ -10,8 +10,7 @@ with lib.metacfg;
     ./hardware-configuration.nix
     #./xremap.nix
     ./wyoming.nix
-    #./llama-server-coder-next.nix
-    ./llama-server-27B-MTP.nix
+    ./llama-server.nix
   ];
 
   boot.lanzaboote.pkiBundle = "/var/lib/sbctl";
diff --git a/systems/x86_64-linux/halo/llama-server-27B-MTP.nix b/systems/x86_64-linux/halo/llama-server-27B-MTP.nix
deleted file mode 100644
index 94e7efc..0000000
--- a/systems/x86_64-linux/halo/llama-server-27B-MTP.nix
+++ /dev/null
@@ -1,61 +0,0 @@
-{
-  pkgs,
-  lib,
-  ...
-}:
-{
-  systemd.services.llama-server = {
-    description = "llama.cpp server (Qwen3.6-27B-MTP, ROCm)";
-    after = [ "network-online.target" ];
-    wants = [ "network-online.target" ];
-    wantedBy = [ "multi-user.target" ];
-
-    environment = {
-      HOME = "%S/llama-server";
-      HF_HOME = "%S/llama-server";
-    };
-
-    serviceConfig = {
-      Type = "simple";
-      DynamicUser = true;
-      SupplementaryGroups = [
-        "video"
-        "render"
-      ];
-      StateDirectory = "llama-server";
-      CacheDirectory = "llama-server";
-      WorkingDirectory = "%S/llama-server";
-      ExecStartPre = "${pkgs.coreutils}/bin/mkdir -p %C/llama-server/kv-slots-27B-MTP";
-      ExecStart = lib.concatStringsSep " " [
-        "${pkgs.llama-cpp-rocm}/bin/llama-server"
-        "--flash-attn on"
-        "--parallel 2"
-        "--jinja"
-        "--host 0.0.0.0"
-        "--port 8000"
-        "--no-mmap"
-        "--n-gpu-layers 99"
-        "-hf unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K"
-        "--alias halo-8000"
-        "--threads 8"
-        "--ubatch-size 256"
-        "-ctk bf16 -ctv bf16"
-        "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00"
-        "--no-context-shift"
-        ''--chat-template-kwargs '{"preserve_thinking": true}' ''
-        "-c 524288"
-        "--fit on"
-        "--slot-save-path %C/llama-server/kv-slots-27B-MTP"
-        "--spec-type draft-mtp --spec-draft-n-max 6"
-        #"--spec-type ngram-mod --spec-ngram-mod-n-match 24 --spec-ngram-mod-n-min 48 --spec-ngram-mod-n-max 64"
-      ];
-      Restart = "on-failure";
-      RestartSec = 10;
-
-      PrivateTmp = true;
-      ProtectSystem = "strict";
-      ProtectHome = true;
-      NoNewPrivileges = true;
-    };
-  };
-}
diff --git a/systems/x86_64-linux/halo/llama-server-27B.nix b/systems/x86_64-linux/halo/llama-server-27B.nix
deleted file mode 100644
index 94f0cdc..0000000
--- a/systems/x86_64-linux/halo/llama-server-27B.nix
+++ /dev/null
@@ -1,60 +0,0 @@
-{
-  pkgs,
-  lib,
-  ...
-}:
-{
-  systemd.services.llama-server = {
-    description = "llama.cpp server (Qwen3.6-27B, ROCm)";
-    after = [ "network-online.target" ];
-    wants = [ "network-online.target" ];
-    wantedBy = [ "multi-user.target" ];
-
-    environment = {
-      HOME = "%S/llama-server";
-      HF_HOME = "%S/llama-server";
-    };
-
-    serviceConfig = {
-      Type = "simple";
-      DynamicUser = true;
-      SupplementaryGroups = [
-        "video"
-        "render"
-      ];
-      StateDirectory = "llama-server";
-      CacheDirectory = "llama-server";
-      WorkingDirectory = "%S/llama-server";
-      ExecStartPre = "${pkgs.coreutils}/bin/mkdir -p %C/llama-server/kv-slots-27B";
-      ExecStart = lib.concatStringsSep " " [
-        "${pkgs.llama-cpp-rocm}/bin/llama-server"
-        "--flash-attn on"
-        "--parallel 2"
-        "--jinja"
-        "--host 0.0.0.0"
-        "--port 8000"
-        "--no-mmap"
-        "--n-gpu-layers 99"
-        "-hf unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL"
-        "--alias halo-8000"
-        "--threads 8"
-        "--ubatch-size 256"
-        "-ctk bf16 -ctv bf16"
-        "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00"
-        "--no-context-shift"
-        ''--chat-template-kwargs '{"preserve_thinking": true}' ''
-        "-c 524288"
-        "--fit on"
-        "--slot-save-path %C/llama-server/kv-slots-27B"
-        "--cache-ram 0"
-      ];
-      Restart = "on-failure";
-      RestartSec = 10;
-
-      PrivateTmp = true;
-      ProtectSystem = "strict";
-      ProtectHome = true;
-      NoNewPrivileges = true;
-    };
-  };
-}
diff --git a/systems/x86_64-linux/halo/llama-server-coder-next.nix b/systems/x86_64-linux/halo/llama-server-coder-next.nix
deleted file mode 100644
index 7f34a3c..0000000
--- a/systems/x86_64-linux/halo/llama-server-coder-next.nix
+++ /dev/null
@@ -1,57 +0,0 @@
-{
-  pkgs,
-  lib,
-  ...
-}:
-{
-  systemd.services.llama-server = {
-    description = "llama.cpp server (Qwen3-Coder-Next, ROCm)";
-    after = [ "network-online.target" ];
-    wants = [ "network-online.target" ];
-    wantedBy = [ "multi-user.target" ];
-
-    environment = {
-      HOME = "%S/llama-server";
-      HF_HOME = "%S/llama-server";
-    };
-
-    serviceConfig = {
-      Type = "simple";
-      DynamicUser = true;
-      SupplementaryGroups = [
-        "video"
-        "render"
-      ];
-      StateDirectory = "llama-server";
-      CacheDirectory = "llama-server";
-      WorkingDirectory = "%S/llama-server";
-      ExecStartPre = "${pkgs.coreutils}/bin/mkdir -p %C/llama-server/kv-slots-coder-next";
-      ExecStart = lib.concatStringsSep " " [
-        "${pkgs.llama-cpp-rocm}/bin/llama-server"
-        "--flash-attn on"
-        "--parallel 1"
-        "--jinja"
-        "--host 0.0.0.0"
-        "--port 8000"
-        "--no-mmap"
-        "--n-gpu-layers 99"
-        "--threads 8"
-        "--ubatch-size 256"
-        "-ctk bf16 -ctv bf16"
-        "--fit on"
-        "--no-context-shift"
-        "-hf unsloth/Qwen3-Coder-Next-GGUF:UD-Q8_K_XL"
-        "--alias halo-8000"
-        "--temp 1.0 --top-p 0.95 --min-p 0.01 --top-k 40"
-        "--slot-save-path %C/llama-server/kv-slots-coder-next"
-      ];
-      Restart = "on-failure";
-      RestartSec = 10;
-
-      PrivateTmp = true;
-      ProtectSystem = "strict";
-      ProtectHome = true;
-      NoNewPrivileges = true;
-    };
-  };
-}
diff --git a/systems/x86_64-linux/halo/llama-server.nix b/systems/x86_64-linux/halo/llama-server.nix
index 683286b..8264f3a 100644
--- a/systems/x86_64-linux/halo/llama-server.nix
+++ b/systems/x86_64-linux/halo/llama-server.nix
@@ -5,7 +5,7 @@
 }:
 {
   systemd.services.llama-server = {
-    description = "llama.cpp server (Qwen3.6-35B-A3B, ROCm)";
+    description = "llama.cpp server (multi-model preset, ROCm)";
     after = [ "network-online.target" ];
     wants = [ "network-online.target" ];
     wantedBy = [ "multi-user.target" ];
@@ -23,29 +23,13 @@
         "render"
       ];
       StateDirectory = "llama-server";
-      CacheDirectory = "llama-server";
       WorkingDirectory = "%S/llama-server";
-      ExecStartPre = "${pkgs.coreutils}/bin/mkdir -p %C/llama-server/kv-slots";
       ExecStart = lib.concatStringsSep " " [
         "${pkgs.llama-cpp-rocm}/bin/llama-server"
-        "--flash-attn on"
-        "--parallel 1"
-        "--jinja"
         "--host 0.0.0.0"
         "--port 8000"
-        "--no-mmap"
-        "--n-gpu-layers 99"
-        "-hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL"
-        "--alias halo-8000"
-        "--threads 8"
-        "--ubatch-size 256"
-        "-ctk bf16 -ctv bf16"
-        "--temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00"
-        "--no-context-shift"
-        ''--chat-template-kwargs '{"preserve_thinking": true}' ''
-        "-c 262144"
-        "--fit on"
-        "--slot-save-path %C/llama-server/kv-slots"
+        "--models-preset ${./models.ini}"
+        "--models-max 2"
       ];
       Restart = "on-failure";
       RestartSec = 10;
diff --git a/systems/x86_64-linux/halo/models.ini b/systems/x86_64-linux/halo/models.ini
new file mode 100644
index 0000000..81de281
--- /dev/null
+++ b/systems/x86_64-linux/halo/models.ini
@@ -0,0 +1,30 @@
+version = 1
+
+[*]
+flash-attn          = on
+parallel            = 2
+jinja               = true
+n-gpu-layers        = 99
+threads             = 8
+ubatch-size         = 256
+cache-type-k        = bf16
+cache-type-v        = bf16
+temp                = 0.6
+top-p               = 0.95
+top-k               = 20
+min-p               = 0.0
+mmap                = false
+no-context-shift    = true
+chat-template-kwargs = {"preserve_thinking": true}
+c                   = 524288
+fit                 = on
+spec-type           = draft-mtp
+
+[Qwen3.6-35B-A3B]
+hf                  = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q6_K
+spec-draft-n-max    = 3
+
+[Qwen3.6-27B]
+hf                  = unsloth/Qwen3.6-27B-MTP-GGUF:Q6_K
+spec-draft-n-max    = 6
+