refactor(mx): drive opencode bot via direct chat-completions API

The bot no longer shells out to `opencode run`. Instead it POSTs to the
OpenAI-compatible /chat/completions endpoint exposed by llama-server on
halo.hoyer.tail:8000 directly. This removes the Bun/sqlite cold-start
overhead per request, drops the pkgs.opencode runtime dependency, and
eliminates the ExecStartPre dance that materialized config.json into the
service's $HOME.

Conversation history is now stored as a proper OpenAI `messages` list
with system/user/assistant roles, instead of the XML blob that was
inlined into a single `opencode run` argument. The interactive opencode
setup (config/opencode/config.json) is unchanged — only the bot stops
depending on it.

The module gains a `modelBaseUrl` option; `model` is now the bare model
name (`halo-8000`) without the provider/ prefix that the opencode CLI
required.
This commit is contained in:
Harald Hoyer 2026-05-13 16:38:58 +02:00
parent aa3bc3c457
commit 42c52bd87f
3 changed files with 72 additions and 101 deletions

View file

@ -2,11 +2,11 @@
"""
Nextcloud Talk OpenCode Bot
Receives webhooks from Nextcloud Talk and responds using opencode CLI
against a local model exposed via the `halo-8000` provider.
Receives webhooks from Nextcloud Talk and forwards the conversation to an
OpenAI-compatible chat-completions endpoint (e.g. llama-server) running on
the local LLM host.
"""
import asyncio
import hashlib
import hmac
import json
@ -22,8 +22,9 @@ from fastapi import FastAPI, Request, HTTPException, Header
from fastapi.responses import JSONResponse
NEXTCLOUD_URL = os.environ.get("NEXTCLOUD_URL", "").rstrip("/")
OPENCODE_PATH = os.environ.get("OPENCODE_PATH", "opencode")
OPENCODE_MODEL = os.environ.get("OPENCODE_MODEL", "halo-8000/halo-8000")
MODEL_BASE_URL = os.environ.get("MODEL_BASE_URL", "").rstrip("/")
MODEL_NAME = os.environ.get("MODEL_NAME", "halo-8000")
MODEL_API_KEY = os.environ.get("MODEL_API_KEY", "")
ALLOWED_USERS = [u.strip() for u in os.environ.get("ALLOWED_USERS", "").split(",") if u.strip()]
TIMEOUT = int(os.environ.get("TIMEOUT", "120"))
SYSTEM_PROMPT = os.environ.get("SYSTEM_PROMPT", "")
@ -50,7 +51,9 @@ log = logging.getLogger(__name__)
app = FastAPI(title="Nextcloud OpenCode Bot")
conversations: dict[str, list[tuple[str, str]]] = {}
# Conversation history per room: list of OpenAI-style message dicts
# (role: "user"|"assistant", content: str).
conversations: dict[str, list[dict]] = {}
MAX_HISTORY = int(os.environ.get("CONTEXT_MESSAGES", "6"))
@ -93,11 +96,7 @@ BOT_SYSTEM_PROMPT = """\
Du bist ein KI-Assistent im Nextcloud Talk Chat.
Deine Antworten werden direkt in den Chatraum gepostet.
Halte deine Antworten kurz und prägnant, da es ein Chat ist.
Nutze Markdown für Formatierung wenn sinnvoll.
Du erhältst:
- <chat_history>: Die letzten Nachrichten im Chatraum (User und deine Antworten)
- <current_message>: Die aktuelle Nachricht, auf die du antworten sollst"""
Nutze Markdown für Formatierung wenn sinnvoll."""
def build_system_prompt() -> str:
@ -106,60 +105,57 @@ def build_system_prompt() -> str:
return BOT_SYSTEM_PROMPT
def build_prompt(conversation_token: str, current_message: str, current_user: str) -> str:
"""Build the full prompt. opencode run has no system-prompt flag, so we
inline the system instructions at the top."""
parts = [
"<system_instructions>",
build_system_prompt(),
"</system_instructions>",
"",
]
def build_messages(conversation_token: str, current_message: str, current_user: str) -> list[dict]:
messages: list[dict] = [{"role": "system", "content": build_system_prompt()}]
history = conversations.get(conversation_token, [])
if history:
parts.append("<chat_history>")
for role, msg in history[-MAX_HISTORY:]:
parts.append(f"{role}: {msg}")
parts.append("</chat_history>")
parts.append("")
parts.append(f"<current_message user=\"{current_user}\">")
parts.append(current_message)
parts.append("</current_message>")
return "\n".join(parts)
messages.extend(history[-MAX_HISTORY * 2:])
messages.append({"role": "user", "content": f"[{current_user}] {current_message}"})
return messages
async def call_opencode(prompt: str) -> str:
"""Call opencode CLI and return response."""
cmd = [OPENCODE_PATH, "run", "-m", OPENCODE_MODEL, prompt]
async def call_model(messages: list[dict]) -> str:
"""POST to /chat/completions and return the assistant content."""
if not MODEL_BASE_URL:
return "❌ Fehler: MODEL_BASE_URL ist nicht konfiguriert."
log.info(f"Calling opencode: {OPENCODE_PATH} run -m {OPENCODE_MODEL} ...")
url = f"{MODEL_BASE_URL}/chat/completions"
headers = {"Content-Type": "application/json"}
if MODEL_API_KEY:
headers["Authorization"] = f"Bearer {MODEL_API_KEY}"
payload = {
"model": MODEL_NAME,
"messages": messages,
"stream": False,
}
log.info(f"Calling model {MODEL_NAME} at {url} ({len(messages)} messages)")
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.post(url, json=payload, headers=headers)
stdout, stderr = await asyncio.wait_for(
proc.communicate(),
timeout=TIMEOUT
)
if resp.status_code != 200:
log.error(f"Model API error: {resp.status_code} {resp.text[:500]}")
return f"❌ Fehler vom Modell: HTTP {resp.status_code}"
if proc.returncode != 0:
log.error(f"opencode CLI error: {stderr.decode()}")
return f"❌ Fehler beim Aufruf von opencode: {stderr.decode()[:200]}"
data = resp.json()
choices = data.get("choices") or []
if not choices:
log.error(f"Model returned no choices: {data}")
return "❌ Fehler: Modell hat keine Antwort geliefert."
return stdout.decode().strip()
content = choices[0].get("message", {}).get("content", "")
if not content:
log.error(f"Model returned empty content: {choices[0]}")
return "❌ Fehler: leere Antwort vom Modell."
return content.strip()
except asyncio.TimeoutError:
log.error(f"opencode CLI timeout after {TIMEOUT}s")
return f"⏱️ Timeout: opencode hat nicht innerhalb von {TIMEOUT}s geantwortet."
except httpx.TimeoutException:
log.error(f"Model API timeout after {TIMEOUT}s")
return f"⏱️ Timeout: Das Modell hat nicht innerhalb von {TIMEOUT}s geantwortet."
except Exception as e:
log.exception("Error calling opencode")
log.exception("Error calling model")
return f"❌ Fehler: {str(e)}"
@ -286,21 +282,20 @@ Schreib mir einfach eine Nachricht und ich antworte dir.
**Befehle:**
`hilfe` oder `?` Diese Hilfe anzeigen
Modell: `{OPENCODE_MODEL}`
Modell: `{MODEL_NAME}` @ `{MODEL_BASE_URL}`
Der Bot merkt sich die letzten Nachrichten pro Raum (bis zum Neustart)."""
await send_reply(conversation_token, help_text, reply_to=message_id)
return JSONResponse({"status": "ok", "action": "help"})
prompt = build_prompt(conversation_token, message_text, actor_id)
response = await call_opencode(prompt)
messages = build_messages(conversation_token, message_text, actor_id)
response = await call_model(messages)
if conversation_token not in conversations:
conversations[conversation_token] = []
conversations[conversation_token].append((f"User ({actor_id})", message_text))
conversations[conversation_token].append(("Assistant", response))
history = conversations.setdefault(conversation_token, [])
history.append({"role": "user", "content": f"[{actor_id}] {message_text}"})
history.append({"role": "assistant", "content": response})
if len(conversations[conversation_token]) > MAX_HISTORY * 2:
conversations[conversation_token] = conversations[conversation_token][-MAX_HISTORY * 2:]
if len(history) > MAX_HISTORY * 2:
del history[: len(history) - MAX_HISTORY * 2]
await send_reply(conversation_token, response, reply_to=message_id)
@ -312,8 +307,8 @@ async def health():
return {
"status": "ok",
"nextcloud_url": NEXTCLOUD_URL,
"opencode_path": OPENCODE_PATH,
"opencode_model": OPENCODE_MODEL,
"model_base_url": MODEL_BASE_URL,
"model_name": MODEL_NAME,
"bot_name": BOT_NAME,
"allowed_users": ALLOWED_USERS if ALLOWED_USERS else "all",
"max_history": MAX_HISTORY,