refactor(mx): drive opencode bot via direct chat-completions API

The bot no longer shells out to `opencode run`. Instead it POSTs to the
OpenAI-compatible /chat/completions endpoint exposed by llama-server on
halo.hoyer.tail:8000 directly. This removes the Bun/sqlite cold-start
overhead per request, drops the pkgs.opencode runtime dependency, and
eliminates the ExecStartPre dance that materialized config.json into the
service's $HOME.

Conversation history is now stored as a proper OpenAI `messages` list
with system/user/assistant roles, instead of the XML blob that was
inlined into a single `opencode run` argument. The interactive opencode
setup (config/opencode/config.json) is unchanged — only the bot stops
depending on it.

The module gains a `modelBaseUrl` option; `model` is now the bare model
name (`halo-8000`) without the provider/ prefix that the opencode CLI
required.
This commit is contained in:
Harald Hoyer 2026-05-13 16:38:58 +02:00
parent aa3bc3c457
commit 42c52bd87f
3 changed files with 72 additions and 101 deletions

View file

@ -2,11 +2,11 @@
""" """
Nextcloud Talk OpenCode Bot Nextcloud Talk OpenCode Bot
Receives webhooks from Nextcloud Talk and responds using opencode CLI Receives webhooks from Nextcloud Talk and forwards the conversation to an
against a local model exposed via the `halo-8000` provider. OpenAI-compatible chat-completions endpoint (e.g. llama-server) running on
the local LLM host.
""" """
import asyncio
import hashlib import hashlib
import hmac import hmac
import json import json
@ -22,8 +22,9 @@ from fastapi import FastAPI, Request, HTTPException, Header
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
NEXTCLOUD_URL = os.environ.get("NEXTCLOUD_URL", "").rstrip("/") NEXTCLOUD_URL = os.environ.get("NEXTCLOUD_URL", "").rstrip("/")
OPENCODE_PATH = os.environ.get("OPENCODE_PATH", "opencode") MODEL_BASE_URL = os.environ.get("MODEL_BASE_URL", "").rstrip("/")
OPENCODE_MODEL = os.environ.get("OPENCODE_MODEL", "halo-8000/halo-8000") MODEL_NAME = os.environ.get("MODEL_NAME", "halo-8000")
MODEL_API_KEY = os.environ.get("MODEL_API_KEY", "")
ALLOWED_USERS = [u.strip() for u in os.environ.get("ALLOWED_USERS", "").split(",") if u.strip()] ALLOWED_USERS = [u.strip() for u in os.environ.get("ALLOWED_USERS", "").split(",") if u.strip()]
TIMEOUT = int(os.environ.get("TIMEOUT", "120")) TIMEOUT = int(os.environ.get("TIMEOUT", "120"))
SYSTEM_PROMPT = os.environ.get("SYSTEM_PROMPT", "") SYSTEM_PROMPT = os.environ.get("SYSTEM_PROMPT", "")
@ -50,7 +51,9 @@ log = logging.getLogger(__name__)
app = FastAPI(title="Nextcloud OpenCode Bot") app = FastAPI(title="Nextcloud OpenCode Bot")
conversations: dict[str, list[tuple[str, str]]] = {} # Conversation history per room: list of OpenAI-style message dicts
# (role: "user"|"assistant", content: str).
conversations: dict[str, list[dict]] = {}
MAX_HISTORY = int(os.environ.get("CONTEXT_MESSAGES", "6")) MAX_HISTORY = int(os.environ.get("CONTEXT_MESSAGES", "6"))
@ -93,11 +96,7 @@ BOT_SYSTEM_PROMPT = """\
Du bist ein KI-Assistent im Nextcloud Talk Chat. Du bist ein KI-Assistent im Nextcloud Talk Chat.
Deine Antworten werden direkt in den Chatraum gepostet. Deine Antworten werden direkt in den Chatraum gepostet.
Halte deine Antworten kurz und prägnant, da es ein Chat ist. Halte deine Antworten kurz und prägnant, da es ein Chat ist.
Nutze Markdown für Formatierung wenn sinnvoll. Nutze Markdown für Formatierung wenn sinnvoll."""
Du erhältst:
- <chat_history>: Die letzten Nachrichten im Chatraum (User und deine Antworten)
- <current_message>: Die aktuelle Nachricht, auf die du antworten sollst"""
def build_system_prompt() -> str: def build_system_prompt() -> str:
@ -106,60 +105,57 @@ def build_system_prompt() -> str:
return BOT_SYSTEM_PROMPT return BOT_SYSTEM_PROMPT
def build_prompt(conversation_token: str, current_message: str, current_user: str) -> str: def build_messages(conversation_token: str, current_message: str, current_user: str) -> list[dict]:
"""Build the full prompt. opencode run has no system-prompt flag, so we messages: list[dict] = [{"role": "system", "content": build_system_prompt()}]
inline the system instructions at the top."""
parts = [
"<system_instructions>",
build_system_prompt(),
"</system_instructions>",
"",
]
history = conversations.get(conversation_token, []) history = conversations.get(conversation_token, [])
if history: messages.extend(history[-MAX_HISTORY * 2:])
parts.append("<chat_history>") messages.append({"role": "user", "content": f"[{current_user}] {current_message}"})
for role, msg in history[-MAX_HISTORY:]: return messages
parts.append(f"{role}: {msg}")
parts.append("</chat_history>")
parts.append("")
parts.append(f"<current_message user=\"{current_user}\">")
parts.append(current_message)
parts.append("</current_message>")
return "\n".join(parts)
async def call_opencode(prompt: str) -> str: async def call_model(messages: list[dict]) -> str:
"""Call opencode CLI and return response.""" """POST to /chat/completions and return the assistant content."""
cmd = [OPENCODE_PATH, "run", "-m", OPENCODE_MODEL, prompt] if not MODEL_BASE_URL:
return "❌ Fehler: MODEL_BASE_URL ist nicht konfiguriert."
log.info(f"Calling opencode: {OPENCODE_PATH} run -m {OPENCODE_MODEL} ...") url = f"{MODEL_BASE_URL}/chat/completions"
headers = {"Content-Type": "application/json"}
if MODEL_API_KEY:
headers["Authorization"] = f"Bearer {MODEL_API_KEY}"
payload = {
"model": MODEL_NAME,
"messages": messages,
"stream": False,
}
log.info(f"Calling model {MODEL_NAME} at {url} ({len(messages)} messages)")
try: try:
proc = await asyncio.create_subprocess_exec( async with httpx.AsyncClient(timeout=TIMEOUT) as client:
*cmd, resp = await client.post(url, json=payload, headers=headers)
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for( if resp.status_code != 200:
proc.communicate(), log.error(f"Model API error: {resp.status_code} {resp.text[:500]}")
timeout=TIMEOUT return f"❌ Fehler vom Modell: HTTP {resp.status_code}"
)
if proc.returncode != 0: data = resp.json()
log.error(f"opencode CLI error: {stderr.decode()}") choices = data.get("choices") or []
return f"❌ Fehler beim Aufruf von opencode: {stderr.decode()[:200]}" if not choices:
log.error(f"Model returned no choices: {data}")
return "❌ Fehler: Modell hat keine Antwort geliefert."
return stdout.decode().strip() content = choices[0].get("message", {}).get("content", "")
if not content:
log.error(f"Model returned empty content: {choices[0]}")
return "❌ Fehler: leere Antwort vom Modell."
return content.strip()
except asyncio.TimeoutError: except httpx.TimeoutException:
log.error(f"opencode CLI timeout after {TIMEOUT}s") log.error(f"Model API timeout after {TIMEOUT}s")
return f"⏱️ Timeout: opencode hat nicht innerhalb von {TIMEOUT}s geantwortet." return f"⏱️ Timeout: Das Modell hat nicht innerhalb von {TIMEOUT}s geantwortet."
except Exception as e: except Exception as e:
log.exception("Error calling opencode") log.exception("Error calling model")
return f"❌ Fehler: {str(e)}" return f"❌ Fehler: {str(e)}"
@ -286,21 +282,20 @@ Schreib mir einfach eine Nachricht und ich antworte dir.
**Befehle:** **Befehle:**
`hilfe` oder `?` Diese Hilfe anzeigen `hilfe` oder `?` Diese Hilfe anzeigen
Modell: `{OPENCODE_MODEL}` Modell: `{MODEL_NAME}` @ `{MODEL_BASE_URL}`
Der Bot merkt sich die letzten Nachrichten pro Raum (bis zum Neustart).""" Der Bot merkt sich die letzten Nachrichten pro Raum (bis zum Neustart)."""
await send_reply(conversation_token, help_text, reply_to=message_id) await send_reply(conversation_token, help_text, reply_to=message_id)
return JSONResponse({"status": "ok", "action": "help"}) return JSONResponse({"status": "ok", "action": "help"})
prompt = build_prompt(conversation_token, message_text, actor_id) messages = build_messages(conversation_token, message_text, actor_id)
response = await call_opencode(prompt) response = await call_model(messages)
if conversation_token not in conversations: history = conversations.setdefault(conversation_token, [])
conversations[conversation_token] = [] history.append({"role": "user", "content": f"[{actor_id}] {message_text}"})
conversations[conversation_token].append((f"User ({actor_id})", message_text)) history.append({"role": "assistant", "content": response})
conversations[conversation_token].append(("Assistant", response))
if len(conversations[conversation_token]) > MAX_HISTORY * 2: if len(history) > MAX_HISTORY * 2:
conversations[conversation_token] = conversations[conversation_token][-MAX_HISTORY * 2:] del history[: len(history) - MAX_HISTORY * 2]
await send_reply(conversation_token, response, reply_to=message_id) await send_reply(conversation_token, response, reply_to=message_id)
@ -312,8 +307,8 @@ async def health():
return { return {
"status": "ok", "status": "ok",
"nextcloud_url": NEXTCLOUD_URL, "nextcloud_url": NEXTCLOUD_URL,
"opencode_path": OPENCODE_PATH, "model_base_url": MODEL_BASE_URL,
"opencode_model": OPENCODE_MODEL, "model_name": MODEL_NAME,
"bot_name": BOT_NAME, "bot_name": BOT_NAME,
"allowed_users": ALLOWED_USERS if ALLOWED_USERS else "all", "allowed_users": ALLOWED_USERS if ALLOWED_USERS else "all",
"max_history": MAX_HISTORY, "max_history": MAX_HISTORY,

View file

@ -6,8 +6,8 @@
enable = true; enable = true;
nextcloudUrl = "https://nc.hoyer.xyz"; nextcloudUrl = "https://nc.hoyer.xyz";
botSecretFile = config.sops.secrets."nextcloud-opencode-bot/secret".path; botSecretFile = config.sops.secrets."nextcloud-opencode-bot/secret".path;
opencodeConfig = ../../../../config/opencode/config.json; modelBaseUrl = "http://halo.hoyer.tail:8000/v1";
model = "halo-8000/halo-8000"; model = "halo-8000";
botName = "Halo"; botName = "Halo";
allowedUsers = [ ]; allowedUsers = [ ];
}; };

View file

@ -51,21 +51,16 @@ in
description = "Path to file containing the bot secret (shared with Nextcloud)"; description = "Path to file containing the bot secret (shared with Nextcloud)";
}; };
opencodePath = mkOption { modelBaseUrl = mkOption {
type = types.path; type = types.str;
default = "${pkgs.opencode}/bin/opencode"; example = "http://halo.hoyer.tail:8000/v1";
description = "Path to opencode CLI binary"; description = "Base URL of the OpenAI-compatible chat-completions endpoint (without trailing /chat/completions)";
};
opencodeConfig = mkOption {
type = types.path;
description = "Path to the opencode config.json file (placed at $HOME/.config/opencode/config.json on service start)";
}; };
model = mkOption { model = mkOption {
type = types.str; type = types.str;
default = "halo-8000/halo-8000"; default = "halo-8000";
description = "Model identifier passed to `opencode run -m`"; description = "Model name passed in the `model` field of /chat/completions requests";
}; };
botName = mkOption { botName = mkOption {
@ -83,13 +78,13 @@ in
contextMessages = mkOption { contextMessages = mkOption {
type = types.int; type = types.int;
default = 6; default = 6;
description = "Number of recent messages to keep as context"; description = "Number of recent (user+assistant) turns to keep as context";
}; };
timeout = mkOption { timeout = mkOption {
type = types.int; type = types.int;
default = 120; default = 120;
description = "Timeout in seconds for opencode CLI"; description = "Timeout in seconds for the model API call";
}; };
systemPrompt = mkOption { systemPrompt = mkOption {
@ -105,21 +100,12 @@ in
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
after = [ "network.target" ]; after = [ "network.target" ];
path = with pkgs; [
bash
coreutils
git
curl
jq
];
environment = { environment = {
HOME = "/var/lib/nextcloud-opencode-bot";
BOT_HOST = cfg.host; BOT_HOST = cfg.host;
BOT_PORT = toString cfg.port; BOT_PORT = toString cfg.port;
NEXTCLOUD_URL = cfg.nextcloudUrl; NEXTCLOUD_URL = cfg.nextcloudUrl;
OPENCODE_PATH = cfg.opencodePath; MODEL_BASE_URL = cfg.modelBaseUrl;
OPENCODE_MODEL = cfg.model; MODEL_NAME = cfg.model;
BOT_NAME = cfg.botName; BOT_NAME = cfg.botName;
ALLOWED_USERS = concatStringsSep "," cfg.allowedUsers; ALLOWED_USERS = concatStringsSep "," cfg.allowedUsers;
CONTEXT_MESSAGES = toString cfg.contextMessages; CONTEXT_MESSAGES = toString cfg.contextMessages;
@ -131,15 +117,6 @@ in
serviceConfig = { serviceConfig = {
Type = "simple"; Type = "simple";
# Materialize the opencode config at the path opencode looks for by
# default ($HOME/.config/opencode/config.json). We copy rather than
# symlink so opencode's config loader sees a regular file.
ExecStartPre = pkgs.writeShellScript "install-opencode-config" ''
set -eu
install -d -m 0700 "$HOME/.config/opencode"
install -m 0600 ${cfg.opencodeConfig} "$HOME/.config/opencode/config.json"
'';
ExecStart = "${pythonEnv}/bin/uvicorn nextcloud_opencode_bot:app --host ${cfg.host} --port ${toString cfg.port}"; ExecStart = "${pythonEnv}/bin/uvicorn nextcloud_opencode_bot:app --host ${cfg.host} --port ${toString cfg.port}";
Restart = "always"; Restart = "always";
RestartSec = 5; RestartSec = 5;
@ -170,7 +147,6 @@ in
users.users.opencode-bot = { users.users.opencode-bot = {
isSystemUser = true; isSystemUser = true;
group = "opencode-bot"; group = "opencode-bot";
home = "/var/lib/nextcloud-opencode-bot";
}; };
users.groups.opencode-bot = { }; users.groups.opencode-bot = { };