From 42c52bd87f8cf39474accce7f027eb7c030596f3 Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Wed, 13 May 2026 16:38:58 +0200 Subject: [PATCH] refactor(mx): drive opencode bot via direct chat-completions API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bot no longer shells out to `opencode run`. Instead it POSTs to the OpenAI-compatible /chat/completions endpoint exposed by llama-server on halo.hoyer.tail:8000 directly. This removes the Bun/sqlite cold-start overhead per request, drops the pkgs.opencode runtime dependency, and eliminates the ExecStartPre dance that materialized config.json into the service's $HOME. Conversation history is now stored as a proper OpenAI `messages` list with system/user/assistant roles, instead of the XML blob that was inlined into a single `opencode run` argument. The interactive opencode setup (config/opencode/config.json) is unchanged — only the bot stops depending on it. The module gains a `modelBaseUrl` option; `model` is now the bare model name (`halo-8000`) without the provider/ prefix that the opencode CLI required. --- .../mx/nextcloud-opencode-bot/bot.py | 125 +++++++++--------- .../mx/nextcloud-opencode-bot/default.nix | 4 +- .../mx/nextcloud-opencode-bot/module.nix | 44 ++---- 3 files changed, 72 insertions(+), 101 deletions(-) diff --git a/systems/x86_64-linux/mx/nextcloud-opencode-bot/bot.py b/systems/x86_64-linux/mx/nextcloud-opencode-bot/bot.py index b300a8f..a2190db 100644 --- a/systems/x86_64-linux/mx/nextcloud-opencode-bot/bot.py +++ b/systems/x86_64-linux/mx/nextcloud-opencode-bot/bot.py @@ -2,11 +2,11 @@ """ Nextcloud Talk OpenCode Bot -Receives webhooks from Nextcloud Talk and responds using opencode CLI -against a local model exposed via the `halo-8000` provider. +Receives webhooks from Nextcloud Talk and forwards the conversation to an +OpenAI-compatible chat-completions endpoint (e.g. llama-server) running on +the local LLM host. """ -import asyncio import hashlib import hmac import json @@ -22,8 +22,9 @@ from fastapi import FastAPI, Request, HTTPException, Header from fastapi.responses import JSONResponse NEXTCLOUD_URL = os.environ.get("NEXTCLOUD_URL", "").rstrip("/") -OPENCODE_PATH = os.environ.get("OPENCODE_PATH", "opencode") -OPENCODE_MODEL = os.environ.get("OPENCODE_MODEL", "halo-8000/halo-8000") +MODEL_BASE_URL = os.environ.get("MODEL_BASE_URL", "").rstrip("/") +MODEL_NAME = os.environ.get("MODEL_NAME", "halo-8000") +MODEL_API_KEY = os.environ.get("MODEL_API_KEY", "") ALLOWED_USERS = [u.strip() for u in os.environ.get("ALLOWED_USERS", "").split(",") if u.strip()] TIMEOUT = int(os.environ.get("TIMEOUT", "120")) SYSTEM_PROMPT = os.environ.get("SYSTEM_PROMPT", "") @@ -50,7 +51,9 @@ log = logging.getLogger(__name__) app = FastAPI(title="Nextcloud OpenCode Bot") -conversations: dict[str, list[tuple[str, str]]] = {} +# Conversation history per room: list of OpenAI-style message dicts +# (role: "user"|"assistant", content: str). +conversations: dict[str, list[dict]] = {} MAX_HISTORY = int(os.environ.get("CONTEXT_MESSAGES", "6")) @@ -93,11 +96,7 @@ BOT_SYSTEM_PROMPT = """\ Du bist ein KI-Assistent im Nextcloud Talk Chat. Deine Antworten werden direkt in den Chatraum gepostet. Halte deine Antworten kurz und prägnant, da es ein Chat ist. -Nutze Markdown für Formatierung wenn sinnvoll. - -Du erhältst: -- : Die letzten Nachrichten im Chatraum (User und deine Antworten) -- : Die aktuelle Nachricht, auf die du antworten sollst""" +Nutze Markdown für Formatierung wenn sinnvoll.""" def build_system_prompt() -> str: @@ -106,60 +105,57 @@ def build_system_prompt() -> str: return BOT_SYSTEM_PROMPT -def build_prompt(conversation_token: str, current_message: str, current_user: str) -> str: - """Build the full prompt. opencode run has no system-prompt flag, so we - inline the system instructions at the top.""" - parts = [ - "", - build_system_prompt(), - "", - "", - ] - +def build_messages(conversation_token: str, current_message: str, current_user: str) -> list[dict]: + messages: list[dict] = [{"role": "system", "content": build_system_prompt()}] history = conversations.get(conversation_token, []) - if history: - parts.append("") - for role, msg in history[-MAX_HISTORY:]: - parts.append(f"{role}: {msg}") - parts.append("") - parts.append("") - - parts.append(f"") - parts.append(current_message) - parts.append("") - - return "\n".join(parts) + messages.extend(history[-MAX_HISTORY * 2:]) + messages.append({"role": "user", "content": f"[{current_user}] {current_message}"}) + return messages -async def call_opencode(prompt: str) -> str: - """Call opencode CLI and return response.""" - cmd = [OPENCODE_PATH, "run", "-m", OPENCODE_MODEL, prompt] +async def call_model(messages: list[dict]) -> str: + """POST to /chat/completions and return the assistant content.""" + if not MODEL_BASE_URL: + return "❌ Fehler: MODEL_BASE_URL ist nicht konfiguriert." - log.info(f"Calling opencode: {OPENCODE_PATH} run -m {OPENCODE_MODEL} ...") + url = f"{MODEL_BASE_URL}/chat/completions" + headers = {"Content-Type": "application/json"} + if MODEL_API_KEY: + headers["Authorization"] = f"Bearer {MODEL_API_KEY}" + + payload = { + "model": MODEL_NAME, + "messages": messages, + "stream": False, + } + + log.info(f"Calling model {MODEL_NAME} at {url} ({len(messages)} messages)") try: - proc = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) + async with httpx.AsyncClient(timeout=TIMEOUT) as client: + resp = await client.post(url, json=payload, headers=headers) - stdout, stderr = await asyncio.wait_for( - proc.communicate(), - timeout=TIMEOUT - ) + if resp.status_code != 200: + log.error(f"Model API error: {resp.status_code} {resp.text[:500]}") + return f"❌ Fehler vom Modell: HTTP {resp.status_code}" - if proc.returncode != 0: - log.error(f"opencode CLI error: {stderr.decode()}") - return f"❌ Fehler beim Aufruf von opencode: {stderr.decode()[:200]}" + data = resp.json() + choices = data.get("choices") or [] + if not choices: + log.error(f"Model returned no choices: {data}") + return "❌ Fehler: Modell hat keine Antwort geliefert." - return stdout.decode().strip() + content = choices[0].get("message", {}).get("content", "") + if not content: + log.error(f"Model returned empty content: {choices[0]}") + return "❌ Fehler: leere Antwort vom Modell." + return content.strip() - except asyncio.TimeoutError: - log.error(f"opencode CLI timeout after {TIMEOUT}s") - return f"⏱️ Timeout: opencode hat nicht innerhalb von {TIMEOUT}s geantwortet." + except httpx.TimeoutException: + log.error(f"Model API timeout after {TIMEOUT}s") + return f"⏱️ Timeout: Das Modell hat nicht innerhalb von {TIMEOUT}s geantwortet." except Exception as e: - log.exception("Error calling opencode") + log.exception("Error calling model") return f"❌ Fehler: {str(e)}" @@ -286,21 +282,20 @@ Schreib mir einfach eine Nachricht und ich antworte dir. **Befehle:** • `hilfe` oder `?` – Diese Hilfe anzeigen -Modell: `{OPENCODE_MODEL}` +Modell: `{MODEL_NAME}` @ `{MODEL_BASE_URL}` Der Bot merkt sich die letzten Nachrichten pro Raum (bis zum Neustart).""" await send_reply(conversation_token, help_text, reply_to=message_id) return JSONResponse({"status": "ok", "action": "help"}) - prompt = build_prompt(conversation_token, message_text, actor_id) - response = await call_opencode(prompt) + messages = build_messages(conversation_token, message_text, actor_id) + response = await call_model(messages) - if conversation_token not in conversations: - conversations[conversation_token] = [] - conversations[conversation_token].append((f"User ({actor_id})", message_text)) - conversations[conversation_token].append(("Assistant", response)) + history = conversations.setdefault(conversation_token, []) + history.append({"role": "user", "content": f"[{actor_id}] {message_text}"}) + history.append({"role": "assistant", "content": response}) - if len(conversations[conversation_token]) > MAX_HISTORY * 2: - conversations[conversation_token] = conversations[conversation_token][-MAX_HISTORY * 2:] + if len(history) > MAX_HISTORY * 2: + del history[: len(history) - MAX_HISTORY * 2] await send_reply(conversation_token, response, reply_to=message_id) @@ -312,8 +307,8 @@ async def health(): return { "status": "ok", "nextcloud_url": NEXTCLOUD_URL, - "opencode_path": OPENCODE_PATH, - "opencode_model": OPENCODE_MODEL, + "model_base_url": MODEL_BASE_URL, + "model_name": MODEL_NAME, "bot_name": BOT_NAME, "allowed_users": ALLOWED_USERS if ALLOWED_USERS else "all", "max_history": MAX_HISTORY, diff --git a/systems/x86_64-linux/mx/nextcloud-opencode-bot/default.nix b/systems/x86_64-linux/mx/nextcloud-opencode-bot/default.nix index fba9606..15c2053 100644 --- a/systems/x86_64-linux/mx/nextcloud-opencode-bot/default.nix +++ b/systems/x86_64-linux/mx/nextcloud-opencode-bot/default.nix @@ -6,8 +6,8 @@ enable = true; nextcloudUrl = "https://nc.hoyer.xyz"; botSecretFile = config.sops.secrets."nextcloud-opencode-bot/secret".path; - opencodeConfig = ../../../../config/opencode/config.json; - model = "halo-8000/halo-8000"; + modelBaseUrl = "http://halo.hoyer.tail:8000/v1"; + model = "halo-8000"; botName = "Halo"; allowedUsers = [ ]; }; diff --git a/systems/x86_64-linux/mx/nextcloud-opencode-bot/module.nix b/systems/x86_64-linux/mx/nextcloud-opencode-bot/module.nix index 39daecb..85ac1a4 100644 --- a/systems/x86_64-linux/mx/nextcloud-opencode-bot/module.nix +++ b/systems/x86_64-linux/mx/nextcloud-opencode-bot/module.nix @@ -51,21 +51,16 @@ in description = "Path to file containing the bot secret (shared with Nextcloud)"; }; - opencodePath = mkOption { - type = types.path; - default = "${pkgs.opencode}/bin/opencode"; - description = "Path to opencode CLI binary"; - }; - - opencodeConfig = mkOption { - type = types.path; - description = "Path to the opencode config.json file (placed at $HOME/.config/opencode/config.json on service start)"; + modelBaseUrl = mkOption { + type = types.str; + example = "http://halo.hoyer.tail:8000/v1"; + description = "Base URL of the OpenAI-compatible chat-completions endpoint (without trailing /chat/completions)"; }; model = mkOption { type = types.str; - default = "halo-8000/halo-8000"; - description = "Model identifier passed to `opencode run -m`"; + default = "halo-8000"; + description = "Model name passed in the `model` field of /chat/completions requests"; }; botName = mkOption { @@ -83,13 +78,13 @@ in contextMessages = mkOption { type = types.int; default = 6; - description = "Number of recent messages to keep as context"; + description = "Number of recent (user+assistant) turns to keep as context"; }; timeout = mkOption { type = types.int; default = 120; - description = "Timeout in seconds for opencode CLI"; + description = "Timeout in seconds for the model API call"; }; systemPrompt = mkOption { @@ -105,21 +100,12 @@ in wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; - path = with pkgs; [ - bash - coreutils - git - curl - jq - ]; - environment = { - HOME = "/var/lib/nextcloud-opencode-bot"; BOT_HOST = cfg.host; BOT_PORT = toString cfg.port; NEXTCLOUD_URL = cfg.nextcloudUrl; - OPENCODE_PATH = cfg.opencodePath; - OPENCODE_MODEL = cfg.model; + MODEL_BASE_URL = cfg.modelBaseUrl; + MODEL_NAME = cfg.model; BOT_NAME = cfg.botName; ALLOWED_USERS = concatStringsSep "," cfg.allowedUsers; CONTEXT_MESSAGES = toString cfg.contextMessages; @@ -131,15 +117,6 @@ in serviceConfig = { Type = "simple"; - # Materialize the opencode config at the path opencode looks for by - # default ($HOME/.config/opencode/config.json). We copy rather than - # symlink so opencode's config loader sees a regular file. - ExecStartPre = pkgs.writeShellScript "install-opencode-config" '' - set -eu - install -d -m 0700 "$HOME/.config/opencode" - install -m 0600 ${cfg.opencodeConfig} "$HOME/.config/opencode/config.json" - ''; - ExecStart = "${pythonEnv}/bin/uvicorn nextcloud_opencode_bot:app --host ${cfg.host} --port ${toString cfg.port}"; Restart = "always"; RestartSec = 5; @@ -170,7 +147,6 @@ in users.users.opencode-bot = { isSystemUser = true; group = "opencode-bot"; - home = "/var/lib/nextcloud-opencode-bot"; }; users.groups.opencode-bot = { };