From 42c52bd87f8cf39474accce7f027eb7c030596f3 Mon Sep 17 00:00:00 2001
From: Harald Hoyer <harald@hoyer.xyz>
Date: Wed, 13 May 2026 16:38:58 +0200
Subject: [PATCH] refactor(mx): drive opencode bot via direct chat-completions
 API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bot no longer shells out to `opencode run`. Instead it POSTs to the
OpenAI-compatible /chat/completions endpoint exposed by llama-server on
halo.hoyer.tail:8000 directly. This removes the Bun/sqlite cold-start
overhead per request, drops the pkgs.opencode runtime dependency, and
eliminates the ExecStartPre dance that materialized config.json into the
service's $HOME.

Conversation history is now stored as a proper OpenAI `messages` list
with system/user/assistant roles, instead of the XML blob that was
inlined into a single `opencode run` argument. The interactive opencode
setup (config/opencode/config.json) is unchanged — only the bot stops
depending on it.

The module gains a `modelBaseUrl` option; `model` is now the bare model
name (`halo-8000`) without the provider/ prefix that the opencode CLI
required.
---
 .../mx/nextcloud-opencode-bot/bot.py          | 125 +++++++++---------
 .../mx/nextcloud-opencode-bot/default.nix     |   4 +-
 .../mx/nextcloud-opencode-bot/module.nix      |  44 ++----
 3 files changed, 72 insertions(+), 101 deletions(-)

diff --git a/systems/x86_64-linux/mx/nextcloud-opencode-bot/bot.py b/systems/x86_64-linux/mx/nextcloud-opencode-bot/bot.py
index b300a8f..a2190db 100644
--- a/systems/x86_64-linux/mx/nextcloud-opencode-bot/bot.py
+++ b/systems/x86_64-linux/mx/nextcloud-opencode-bot/bot.py
@@ -2,11 +2,11 @@
 """
 Nextcloud Talk OpenCode Bot
 
-Receives webhooks from Nextcloud Talk and responds using opencode CLI
-against a local model exposed via the `halo-8000` provider.
+Receives webhooks from Nextcloud Talk and forwards the conversation to an
+OpenAI-compatible chat-completions endpoint (e.g. llama-server) running on
+the local LLM host.
 """
 
-import asyncio
 import hashlib
 import hmac
 import json
@@ -22,8 +22,9 @@ from fastapi import FastAPI, Request, HTTPException, Header
 from fastapi.responses import JSONResponse
 
 NEXTCLOUD_URL = os.environ.get("NEXTCLOUD_URL", "").rstrip("/")
-OPENCODE_PATH = os.environ.get("OPENCODE_PATH", "opencode")
-OPENCODE_MODEL = os.environ.get("OPENCODE_MODEL", "halo-8000/halo-8000")
+MODEL_BASE_URL = os.environ.get("MODEL_BASE_URL", "").rstrip("/")
+MODEL_NAME = os.environ.get("MODEL_NAME", "halo-8000")
+MODEL_API_KEY = os.environ.get("MODEL_API_KEY", "")
 ALLOWED_USERS = [u.strip() for u in os.environ.get("ALLOWED_USERS", "").split(",") if u.strip()]
 TIMEOUT = int(os.environ.get("TIMEOUT", "120"))
 SYSTEM_PROMPT = os.environ.get("SYSTEM_PROMPT", "")
@@ -50,7 +51,9 @@ log = logging.getLogger(__name__)
 
 app = FastAPI(title="Nextcloud OpenCode Bot")
 
-conversations: dict[str, list[tuple[str, str]]] = {}
+# Conversation history per room: list of OpenAI-style message dicts
+# (role: "user"|"assistant", content: str).
+conversations: dict[str, list[dict]] = {}
 MAX_HISTORY = int(os.environ.get("CONTEXT_MESSAGES", "6"))
 
 
@@ -93,11 +96,7 @@ BOT_SYSTEM_PROMPT = """\
 Du bist ein KI-Assistent im Nextcloud Talk Chat.
 Deine Antworten werden direkt in den Chatraum gepostet.
 Halte deine Antworten kurz und prägnant, da es ein Chat ist.
-Nutze Markdown für Formatierung wenn sinnvoll.
-
-Du erhältst:
-- <chat_history>: Die letzten Nachrichten im Chatraum (User und deine Antworten)
-- <current_message>: Die aktuelle Nachricht, auf die du antworten sollst"""
+Nutze Markdown für Formatierung wenn sinnvoll."""
 
 
 def build_system_prompt() -> str:
@@ -106,60 +105,57 @@ def build_system_prompt() -> str:
     return BOT_SYSTEM_PROMPT
 
 
-def build_prompt(conversation_token: str, current_message: str, current_user: str) -> str:
-    """Build the full prompt. opencode run has no system-prompt flag, so we
-    inline the system instructions at the top."""
-    parts = [
-        "<system_instructions>",
-        build_system_prompt(),
-        "</system_instructions>",
-        "",
-    ]
-
+def build_messages(conversation_token: str, current_message: str, current_user: str) -> list[dict]:
+    messages: list[dict] = [{"role": "system", "content": build_system_prompt()}]
     history = conversations.get(conversation_token, [])
-    if history:
-        parts.append("<chat_history>")
-        for role, msg in history[-MAX_HISTORY:]:
-            parts.append(f"{role}: {msg}")
-        parts.append("</chat_history>")
-        parts.append("")
-
-    parts.append(f"<current_message user=\"{current_user}\">")
-    parts.append(current_message)
-    parts.append("</current_message>")
-
-    return "\n".join(parts)
+    messages.extend(history[-MAX_HISTORY * 2:])
+    messages.append({"role": "user", "content": f"[{current_user}] {current_message}"})
+    return messages
 
 
-async def call_opencode(prompt: str) -> str:
-    """Call opencode CLI and return response."""
-    cmd = [OPENCODE_PATH, "run", "-m", OPENCODE_MODEL, prompt]
+async def call_model(messages: list[dict]) -> str:
+    """POST to /chat/completions and return the assistant content."""
+    if not MODEL_BASE_URL:
+        return "❌ Fehler: MODEL_BASE_URL ist nicht konfiguriert."
 
-    log.info(f"Calling opencode: {OPENCODE_PATH} run -m {OPENCODE_MODEL} ...")
+    url = f"{MODEL_BASE_URL}/chat/completions"
+    headers = {"Content-Type": "application/json"}
+    if MODEL_API_KEY:
+        headers["Authorization"] = f"Bearer {MODEL_API_KEY}"
+
+    payload = {
+        "model": MODEL_NAME,
+        "messages": messages,
+        "stream": False,
+    }
+
+    log.info(f"Calling model {MODEL_NAME} at {url} ({len(messages)} messages)")
 
     try:
-        proc = await asyncio.create_subprocess_exec(
-            *cmd,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-        )
+        async with httpx.AsyncClient(timeout=TIMEOUT) as client:
+            resp = await client.post(url, json=payload, headers=headers)
 
-        stdout, stderr = await asyncio.wait_for(
-            proc.communicate(),
-            timeout=TIMEOUT
-        )
+        if resp.status_code != 200:
+            log.error(f"Model API error: {resp.status_code} {resp.text[:500]}")
+            return f"❌ Fehler vom Modell: HTTP {resp.status_code}"
 
-        if proc.returncode != 0:
-            log.error(f"opencode CLI error: {stderr.decode()}")
-            return f"❌ Fehler beim Aufruf von opencode: {stderr.decode()[:200]}"
+        data = resp.json()
+        choices = data.get("choices") or []
+        if not choices:
+            log.error(f"Model returned no choices: {data}")
+            return "❌ Fehler: Modell hat keine Antwort geliefert."
 
-        return stdout.decode().strip()
+        content = choices[0].get("message", {}).get("content", "")
+        if not content:
+            log.error(f"Model returned empty content: {choices[0]}")
+            return "❌ Fehler: leere Antwort vom Modell."
+        return content.strip()
 
-    except asyncio.TimeoutError:
-        log.error(f"opencode CLI timeout after {TIMEOUT}s")
-        return f"⏱️ Timeout: opencode hat nicht innerhalb von {TIMEOUT}s geantwortet."
+    except httpx.TimeoutException:
+        log.error(f"Model API timeout after {TIMEOUT}s")
+        return f"⏱️ Timeout: Das Modell hat nicht innerhalb von {TIMEOUT}s geantwortet."
     except Exception as e:
-        log.exception("Error calling opencode")
+        log.exception("Error calling model")
         return f"❌ Fehler: {str(e)}"
 
 
@@ -286,21 +282,20 @@ Schreib mir einfach eine Nachricht und ich antworte dir.
 **Befehle:**
 • `hilfe` oder `?` – Diese Hilfe anzeigen
 
-Modell: `{OPENCODE_MODEL}`
+Modell: `{MODEL_NAME}` @ `{MODEL_BASE_URL}`
 Der Bot merkt sich die letzten Nachrichten pro Raum (bis zum Neustart)."""
         await send_reply(conversation_token, help_text, reply_to=message_id)
         return JSONResponse({"status": "ok", "action": "help"})
 
-    prompt = build_prompt(conversation_token, message_text, actor_id)
-    response = await call_opencode(prompt)
+    messages = build_messages(conversation_token, message_text, actor_id)
+    response = await call_model(messages)
 
-    if conversation_token not in conversations:
-        conversations[conversation_token] = []
-    conversations[conversation_token].append((f"User ({actor_id})", message_text))
-    conversations[conversation_token].append(("Assistant", response))
+    history = conversations.setdefault(conversation_token, [])
+    history.append({"role": "user", "content": f"[{actor_id}] {message_text}"})
+    history.append({"role": "assistant", "content": response})
 
-    if len(conversations[conversation_token]) > MAX_HISTORY * 2:
-        conversations[conversation_token] = conversations[conversation_token][-MAX_HISTORY * 2:]
+    if len(history) > MAX_HISTORY * 2:
+        del history[: len(history) - MAX_HISTORY * 2]
 
     await send_reply(conversation_token, response, reply_to=message_id)
 
@@ -312,8 +307,8 @@ async def health():
     return {
         "status": "ok",
         "nextcloud_url": NEXTCLOUD_URL,
-        "opencode_path": OPENCODE_PATH,
-        "opencode_model": OPENCODE_MODEL,
+        "model_base_url": MODEL_BASE_URL,
+        "model_name": MODEL_NAME,
         "bot_name": BOT_NAME,
         "allowed_users": ALLOWED_USERS if ALLOWED_USERS else "all",
         "max_history": MAX_HISTORY,
diff --git a/systems/x86_64-linux/mx/nextcloud-opencode-bot/default.nix b/systems/x86_64-linux/mx/nextcloud-opencode-bot/default.nix
index fba9606..15c2053 100644
--- a/systems/x86_64-linux/mx/nextcloud-opencode-bot/default.nix
+++ b/systems/x86_64-linux/mx/nextcloud-opencode-bot/default.nix
@@ -6,8 +6,8 @@
     enable = true;
     nextcloudUrl = "https://nc.hoyer.xyz";
     botSecretFile = config.sops.secrets."nextcloud-opencode-bot/secret".path;
-    opencodeConfig = ../../../../config/opencode/config.json;
-    model = "halo-8000/halo-8000";
+    modelBaseUrl = "http://halo.hoyer.tail:8000/v1";
+    model = "halo-8000";
     botName = "Halo";
     allowedUsers = [ ];
   };
diff --git a/systems/x86_64-linux/mx/nextcloud-opencode-bot/module.nix b/systems/x86_64-linux/mx/nextcloud-opencode-bot/module.nix
index 39daecb..85ac1a4 100644
--- a/systems/x86_64-linux/mx/nextcloud-opencode-bot/module.nix
+++ b/systems/x86_64-linux/mx/nextcloud-opencode-bot/module.nix
@@ -51,21 +51,16 @@ in
       description = "Path to file containing the bot secret (shared with Nextcloud)";
     };
 
-    opencodePath = mkOption {
-      type = types.path;
-      default = "${pkgs.opencode}/bin/opencode";
-      description = "Path to opencode CLI binary";
-    };
-
-    opencodeConfig = mkOption {
-      type = types.path;
-      description = "Path to the opencode config.json file (placed at $HOME/.config/opencode/config.json on service start)";
+    modelBaseUrl = mkOption {
+      type = types.str;
+      example = "http://halo.hoyer.tail:8000/v1";
+      description = "Base URL of the OpenAI-compatible chat-completions endpoint (without trailing /chat/completions)";
     };
 
     model = mkOption {
       type = types.str;
-      default = "halo-8000/halo-8000";
-      description = "Model identifier passed to `opencode run -m`";
+      default = "halo-8000";
+      description = "Model name passed in the `model` field of /chat/completions requests";
     };
 
     botName = mkOption {
@@ -83,13 +78,13 @@ in
     contextMessages = mkOption {
       type = types.int;
       default = 6;
-      description = "Number of recent messages to keep as context";
+      description = "Number of recent (user+assistant) turns to keep as context";
     };
 
     timeout = mkOption {
       type = types.int;
       default = 120;
-      description = "Timeout in seconds for opencode CLI";
+      description = "Timeout in seconds for the model API call";
     };
 
     systemPrompt = mkOption {
@@ -105,21 +100,12 @@ in
       wantedBy = [ "multi-user.target" ];
       after = [ "network.target" ];
 
-      path = with pkgs; [
-        bash
-        coreutils
-        git
-        curl
-        jq
-      ];
-
       environment = {
-        HOME = "/var/lib/nextcloud-opencode-bot";
         BOT_HOST = cfg.host;
         BOT_PORT = toString cfg.port;
         NEXTCLOUD_URL = cfg.nextcloudUrl;
-        OPENCODE_PATH = cfg.opencodePath;
-        OPENCODE_MODEL = cfg.model;
+        MODEL_BASE_URL = cfg.modelBaseUrl;
+        MODEL_NAME = cfg.model;
         BOT_NAME = cfg.botName;
         ALLOWED_USERS = concatStringsSep "," cfg.allowedUsers;
         CONTEXT_MESSAGES = toString cfg.contextMessages;
@@ -131,15 +117,6 @@ in
       serviceConfig = {
         Type = "simple";
 
-        # Materialize the opencode config at the path opencode looks for by
-        # default ($HOME/.config/opencode/config.json). We copy rather than
-        # symlink so opencode's config loader sees a regular file.
-        ExecStartPre = pkgs.writeShellScript "install-opencode-config" ''
-          set -eu
-          install -d -m 0700 "$HOME/.config/opencode"
-          install -m 0600 ${cfg.opencodeConfig} "$HOME/.config/opencode/config.json"
-        '';
-
         ExecStart = "${pythonEnv}/bin/uvicorn nextcloud_opencode_bot:app --host ${cfg.host} --port ${toString cfg.port}";
         Restart = "always";
         RestartSec = 5;
@@ -170,7 +147,6 @@ in
     users.users.opencode-bot = {
       isSystemUser = true;
       group = "opencode-bot";
-      home = "/var/lib/nextcloud-opencode-bot";
     };
 
     users.groups.opencode-bot = { };