refactor(mx): drive opencode bot via direct chat-completions API
The bot no longer shells out to `opencode run`. Instead it POSTs to the OpenAI-compatible /chat/completions endpoint exposed by llama-server on halo.hoyer.tail:8000 directly. This removes the Bun/sqlite cold-start overhead per request, drops the pkgs.opencode runtime dependency, and eliminates the ExecStartPre dance that materialized config.json into the service's $HOME. Conversation history is now stored as a proper OpenAI `messages` list with system/user/assistant roles, instead of the XML blob that was inlined into a single `opencode run` argument. The interactive opencode setup (config/opencode/config.json) is unchanged — only the bot stops depending on it. The module gains a `modelBaseUrl` option; `model` is now the bare model name (`halo-8000`) without the provider/ prefix that the opencode CLI required.
This commit is contained in:
parent
aa3bc3c457
commit
42c52bd87f
3 changed files with 72 additions and 101 deletions
|
|
@ -2,11 +2,11 @@
|
||||||
"""
|
"""
|
||||||
Nextcloud Talk OpenCode Bot
|
Nextcloud Talk OpenCode Bot
|
||||||
|
|
||||||
Receives webhooks from Nextcloud Talk and responds using opencode CLI
|
Receives webhooks from Nextcloud Talk and forwards the conversation to an
|
||||||
against a local model exposed via the `halo-8000` provider.
|
OpenAI-compatible chat-completions endpoint (e.g. llama-server) running on
|
||||||
|
the local LLM host.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import json
|
import json
|
||||||
|
|
@ -22,8 +22,9 @@ from fastapi import FastAPI, Request, HTTPException, Header
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
NEXTCLOUD_URL = os.environ.get("NEXTCLOUD_URL", "").rstrip("/")
|
NEXTCLOUD_URL = os.environ.get("NEXTCLOUD_URL", "").rstrip("/")
|
||||||
OPENCODE_PATH = os.environ.get("OPENCODE_PATH", "opencode")
|
MODEL_BASE_URL = os.environ.get("MODEL_BASE_URL", "").rstrip("/")
|
||||||
OPENCODE_MODEL = os.environ.get("OPENCODE_MODEL", "halo-8000/halo-8000")
|
MODEL_NAME = os.environ.get("MODEL_NAME", "halo-8000")
|
||||||
|
MODEL_API_KEY = os.environ.get("MODEL_API_KEY", "")
|
||||||
ALLOWED_USERS = [u.strip() for u in os.environ.get("ALLOWED_USERS", "").split(",") if u.strip()]
|
ALLOWED_USERS = [u.strip() for u in os.environ.get("ALLOWED_USERS", "").split(",") if u.strip()]
|
||||||
TIMEOUT = int(os.environ.get("TIMEOUT", "120"))
|
TIMEOUT = int(os.environ.get("TIMEOUT", "120"))
|
||||||
SYSTEM_PROMPT = os.environ.get("SYSTEM_PROMPT", "")
|
SYSTEM_PROMPT = os.environ.get("SYSTEM_PROMPT", "")
|
||||||
|
|
@ -50,7 +51,9 @@ log = logging.getLogger(__name__)
|
||||||
|
|
||||||
app = FastAPI(title="Nextcloud OpenCode Bot")
|
app = FastAPI(title="Nextcloud OpenCode Bot")
|
||||||
|
|
||||||
conversations: dict[str, list[tuple[str, str]]] = {}
|
# Conversation history per room: list of OpenAI-style message dicts
|
||||||
|
# (role: "user"|"assistant", content: str).
|
||||||
|
conversations: dict[str, list[dict]] = {}
|
||||||
MAX_HISTORY = int(os.environ.get("CONTEXT_MESSAGES", "6"))
|
MAX_HISTORY = int(os.environ.get("CONTEXT_MESSAGES", "6"))
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -93,11 +96,7 @@ BOT_SYSTEM_PROMPT = """\
|
||||||
Du bist ein KI-Assistent im Nextcloud Talk Chat.
|
Du bist ein KI-Assistent im Nextcloud Talk Chat.
|
||||||
Deine Antworten werden direkt in den Chatraum gepostet.
|
Deine Antworten werden direkt in den Chatraum gepostet.
|
||||||
Halte deine Antworten kurz und prägnant, da es ein Chat ist.
|
Halte deine Antworten kurz und prägnant, da es ein Chat ist.
|
||||||
Nutze Markdown für Formatierung wenn sinnvoll.
|
Nutze Markdown für Formatierung wenn sinnvoll."""
|
||||||
|
|
||||||
Du erhältst:
|
|
||||||
- <chat_history>: Die letzten Nachrichten im Chatraum (User und deine Antworten)
|
|
||||||
- <current_message>: Die aktuelle Nachricht, auf die du antworten sollst"""
|
|
||||||
|
|
||||||
|
|
||||||
def build_system_prompt() -> str:
|
def build_system_prompt() -> str:
|
||||||
|
|
@ -106,60 +105,57 @@ def build_system_prompt() -> str:
|
||||||
return BOT_SYSTEM_PROMPT
|
return BOT_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
|
||||||
def build_prompt(conversation_token: str, current_message: str, current_user: str) -> str:
|
def build_messages(conversation_token: str, current_message: str, current_user: str) -> list[dict]:
|
||||||
"""Build the full prompt. opencode run has no system-prompt flag, so we
|
messages: list[dict] = [{"role": "system", "content": build_system_prompt()}]
|
||||||
inline the system instructions at the top."""
|
|
||||||
parts = [
|
|
||||||
"<system_instructions>",
|
|
||||||
build_system_prompt(),
|
|
||||||
"</system_instructions>",
|
|
||||||
"",
|
|
||||||
]
|
|
||||||
|
|
||||||
history = conversations.get(conversation_token, [])
|
history = conversations.get(conversation_token, [])
|
||||||
if history:
|
messages.extend(history[-MAX_HISTORY * 2:])
|
||||||
parts.append("<chat_history>")
|
messages.append({"role": "user", "content": f"[{current_user}] {current_message}"})
|
||||||
for role, msg in history[-MAX_HISTORY:]:
|
return messages
|
||||||
parts.append(f"{role}: {msg}")
|
|
||||||
parts.append("</chat_history>")
|
|
||||||
parts.append("")
|
|
||||||
|
|
||||||
parts.append(f"<current_message user=\"{current_user}\">")
|
|
||||||
parts.append(current_message)
|
|
||||||
parts.append("</current_message>")
|
|
||||||
|
|
||||||
return "\n".join(parts)
|
|
||||||
|
|
||||||
|
|
||||||
async def call_opencode(prompt: str) -> str:
|
async def call_model(messages: list[dict]) -> str:
|
||||||
"""Call opencode CLI and return response."""
|
"""POST to /chat/completions and return the assistant content."""
|
||||||
cmd = [OPENCODE_PATH, "run", "-m", OPENCODE_MODEL, prompt]
|
if not MODEL_BASE_URL:
|
||||||
|
return "❌ Fehler: MODEL_BASE_URL ist nicht konfiguriert."
|
||||||
|
|
||||||
log.info(f"Calling opencode: {OPENCODE_PATH} run -m {OPENCODE_MODEL} ...")
|
url = f"{MODEL_BASE_URL}/chat/completions"
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if MODEL_API_KEY:
|
||||||
|
headers["Authorization"] = f"Bearer {MODEL_API_KEY}"
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": MODEL_NAME,
|
||||||
|
"messages": messages,
|
||||||
|
"stream": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info(f"Calling model {MODEL_NAME} at {url} ({len(messages)} messages)")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
proc = await asyncio.create_subprocess_exec(
|
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
|
||||||
*cmd,
|
resp = await client.post(url, json=payload, headers=headers)
|
||||||
stdout=asyncio.subprocess.PIPE,
|
|
||||||
stderr=asyncio.subprocess.PIPE,
|
|
||||||
)
|
|
||||||
|
|
||||||
stdout, stderr = await asyncio.wait_for(
|
if resp.status_code != 200:
|
||||||
proc.communicate(),
|
log.error(f"Model API error: {resp.status_code} {resp.text[:500]}")
|
||||||
timeout=TIMEOUT
|
return f"❌ Fehler vom Modell: HTTP {resp.status_code}"
|
||||||
)
|
|
||||||
|
|
||||||
if proc.returncode != 0:
|
data = resp.json()
|
||||||
log.error(f"opencode CLI error: {stderr.decode()}")
|
choices = data.get("choices") or []
|
||||||
return f"❌ Fehler beim Aufruf von opencode: {stderr.decode()[:200]}"
|
if not choices:
|
||||||
|
log.error(f"Model returned no choices: {data}")
|
||||||
|
return "❌ Fehler: Modell hat keine Antwort geliefert."
|
||||||
|
|
||||||
return stdout.decode().strip()
|
content = choices[0].get("message", {}).get("content", "")
|
||||||
|
if not content:
|
||||||
|
log.error(f"Model returned empty content: {choices[0]}")
|
||||||
|
return "❌ Fehler: leere Antwort vom Modell."
|
||||||
|
return content.strip()
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
except httpx.TimeoutException:
|
||||||
log.error(f"opencode CLI timeout after {TIMEOUT}s")
|
log.error(f"Model API timeout after {TIMEOUT}s")
|
||||||
return f"⏱️ Timeout: opencode hat nicht innerhalb von {TIMEOUT}s geantwortet."
|
return f"⏱️ Timeout: Das Modell hat nicht innerhalb von {TIMEOUT}s geantwortet."
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception("Error calling opencode")
|
log.exception("Error calling model")
|
||||||
return f"❌ Fehler: {str(e)}"
|
return f"❌ Fehler: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -286,21 +282,20 @@ Schreib mir einfach eine Nachricht und ich antworte dir.
|
||||||
**Befehle:**
|
**Befehle:**
|
||||||
• `hilfe` oder `?` – Diese Hilfe anzeigen
|
• `hilfe` oder `?` – Diese Hilfe anzeigen
|
||||||
|
|
||||||
Modell: `{OPENCODE_MODEL}`
|
Modell: `{MODEL_NAME}` @ `{MODEL_BASE_URL}`
|
||||||
Der Bot merkt sich die letzten Nachrichten pro Raum (bis zum Neustart)."""
|
Der Bot merkt sich die letzten Nachrichten pro Raum (bis zum Neustart)."""
|
||||||
await send_reply(conversation_token, help_text, reply_to=message_id)
|
await send_reply(conversation_token, help_text, reply_to=message_id)
|
||||||
return JSONResponse({"status": "ok", "action": "help"})
|
return JSONResponse({"status": "ok", "action": "help"})
|
||||||
|
|
||||||
prompt = build_prompt(conversation_token, message_text, actor_id)
|
messages = build_messages(conversation_token, message_text, actor_id)
|
||||||
response = await call_opencode(prompt)
|
response = await call_model(messages)
|
||||||
|
|
||||||
if conversation_token not in conversations:
|
history = conversations.setdefault(conversation_token, [])
|
||||||
conversations[conversation_token] = []
|
history.append({"role": "user", "content": f"[{actor_id}] {message_text}"})
|
||||||
conversations[conversation_token].append((f"User ({actor_id})", message_text))
|
history.append({"role": "assistant", "content": response})
|
||||||
conversations[conversation_token].append(("Assistant", response))
|
|
||||||
|
|
||||||
if len(conversations[conversation_token]) > MAX_HISTORY * 2:
|
if len(history) > MAX_HISTORY * 2:
|
||||||
conversations[conversation_token] = conversations[conversation_token][-MAX_HISTORY * 2:]
|
del history[: len(history) - MAX_HISTORY * 2]
|
||||||
|
|
||||||
await send_reply(conversation_token, response, reply_to=message_id)
|
await send_reply(conversation_token, response, reply_to=message_id)
|
||||||
|
|
||||||
|
|
@ -312,8 +307,8 @@ async def health():
|
||||||
return {
|
return {
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
"nextcloud_url": NEXTCLOUD_URL,
|
"nextcloud_url": NEXTCLOUD_URL,
|
||||||
"opencode_path": OPENCODE_PATH,
|
"model_base_url": MODEL_BASE_URL,
|
||||||
"opencode_model": OPENCODE_MODEL,
|
"model_name": MODEL_NAME,
|
||||||
"bot_name": BOT_NAME,
|
"bot_name": BOT_NAME,
|
||||||
"allowed_users": ALLOWED_USERS if ALLOWED_USERS else "all",
|
"allowed_users": ALLOWED_USERS if ALLOWED_USERS else "all",
|
||||||
"max_history": MAX_HISTORY,
|
"max_history": MAX_HISTORY,
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,8 @@
|
||||||
enable = true;
|
enable = true;
|
||||||
nextcloudUrl = "https://nc.hoyer.xyz";
|
nextcloudUrl = "https://nc.hoyer.xyz";
|
||||||
botSecretFile = config.sops.secrets."nextcloud-opencode-bot/secret".path;
|
botSecretFile = config.sops.secrets."nextcloud-opencode-bot/secret".path;
|
||||||
opencodeConfig = ../../../../config/opencode/config.json;
|
modelBaseUrl = "http://halo.hoyer.tail:8000/v1";
|
||||||
model = "halo-8000/halo-8000";
|
model = "halo-8000";
|
||||||
botName = "Halo";
|
botName = "Halo";
|
||||||
allowedUsers = [ ];
|
allowedUsers = [ ];
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -51,21 +51,16 @@ in
|
||||||
description = "Path to file containing the bot secret (shared with Nextcloud)";
|
description = "Path to file containing the bot secret (shared with Nextcloud)";
|
||||||
};
|
};
|
||||||
|
|
||||||
opencodePath = mkOption {
|
modelBaseUrl = mkOption {
|
||||||
type = types.path;
|
type = types.str;
|
||||||
default = "${pkgs.opencode}/bin/opencode";
|
example = "http://halo.hoyer.tail:8000/v1";
|
||||||
description = "Path to opencode CLI binary";
|
description = "Base URL of the OpenAI-compatible chat-completions endpoint (without trailing /chat/completions)";
|
||||||
};
|
|
||||||
|
|
||||||
opencodeConfig = mkOption {
|
|
||||||
type = types.path;
|
|
||||||
description = "Path to the opencode config.json file (placed at $HOME/.config/opencode/config.json on service start)";
|
|
||||||
};
|
};
|
||||||
|
|
||||||
model = mkOption {
|
model = mkOption {
|
||||||
type = types.str;
|
type = types.str;
|
||||||
default = "halo-8000/halo-8000";
|
default = "halo-8000";
|
||||||
description = "Model identifier passed to `opencode run -m`";
|
description = "Model name passed in the `model` field of /chat/completions requests";
|
||||||
};
|
};
|
||||||
|
|
||||||
botName = mkOption {
|
botName = mkOption {
|
||||||
|
|
@ -83,13 +78,13 @@ in
|
||||||
contextMessages = mkOption {
|
contextMessages = mkOption {
|
||||||
type = types.int;
|
type = types.int;
|
||||||
default = 6;
|
default = 6;
|
||||||
description = "Number of recent messages to keep as context";
|
description = "Number of recent (user+assistant) turns to keep as context";
|
||||||
};
|
};
|
||||||
|
|
||||||
timeout = mkOption {
|
timeout = mkOption {
|
||||||
type = types.int;
|
type = types.int;
|
||||||
default = 120;
|
default = 120;
|
||||||
description = "Timeout in seconds for opencode CLI";
|
description = "Timeout in seconds for the model API call";
|
||||||
};
|
};
|
||||||
|
|
||||||
systemPrompt = mkOption {
|
systemPrompt = mkOption {
|
||||||
|
|
@ -105,21 +100,12 @@ in
|
||||||
wantedBy = [ "multi-user.target" ];
|
wantedBy = [ "multi-user.target" ];
|
||||||
after = [ "network.target" ];
|
after = [ "network.target" ];
|
||||||
|
|
||||||
path = with pkgs; [
|
|
||||||
bash
|
|
||||||
coreutils
|
|
||||||
git
|
|
||||||
curl
|
|
||||||
jq
|
|
||||||
];
|
|
||||||
|
|
||||||
environment = {
|
environment = {
|
||||||
HOME = "/var/lib/nextcloud-opencode-bot";
|
|
||||||
BOT_HOST = cfg.host;
|
BOT_HOST = cfg.host;
|
||||||
BOT_PORT = toString cfg.port;
|
BOT_PORT = toString cfg.port;
|
||||||
NEXTCLOUD_URL = cfg.nextcloudUrl;
|
NEXTCLOUD_URL = cfg.nextcloudUrl;
|
||||||
OPENCODE_PATH = cfg.opencodePath;
|
MODEL_BASE_URL = cfg.modelBaseUrl;
|
||||||
OPENCODE_MODEL = cfg.model;
|
MODEL_NAME = cfg.model;
|
||||||
BOT_NAME = cfg.botName;
|
BOT_NAME = cfg.botName;
|
||||||
ALLOWED_USERS = concatStringsSep "," cfg.allowedUsers;
|
ALLOWED_USERS = concatStringsSep "," cfg.allowedUsers;
|
||||||
CONTEXT_MESSAGES = toString cfg.contextMessages;
|
CONTEXT_MESSAGES = toString cfg.contextMessages;
|
||||||
|
|
@ -131,15 +117,6 @@ in
|
||||||
serviceConfig = {
|
serviceConfig = {
|
||||||
Type = "simple";
|
Type = "simple";
|
||||||
|
|
||||||
# Materialize the opencode config at the path opencode looks for by
|
|
||||||
# default ($HOME/.config/opencode/config.json). We copy rather than
|
|
||||||
# symlink so opencode's config loader sees a regular file.
|
|
||||||
ExecStartPre = pkgs.writeShellScript "install-opencode-config" ''
|
|
||||||
set -eu
|
|
||||||
install -d -m 0700 "$HOME/.config/opencode"
|
|
||||||
install -m 0600 ${cfg.opencodeConfig} "$HOME/.config/opencode/config.json"
|
|
||||||
'';
|
|
||||||
|
|
||||||
ExecStart = "${pythonEnv}/bin/uvicorn nextcloud_opencode_bot:app --host ${cfg.host} --port ${toString cfg.port}";
|
ExecStart = "${pythonEnv}/bin/uvicorn nextcloud_opencode_bot:app --host ${cfg.host} --port ${toString cfg.port}";
|
||||||
Restart = "always";
|
Restart = "always";
|
||||||
RestartSec = 5;
|
RestartSec = 5;
|
||||||
|
|
@ -170,7 +147,6 @@ in
|
||||||
users.users.opencode-bot = {
|
users.users.opencode-bot = {
|
||||||
isSystemUser = true;
|
isSystemUser = true;
|
||||||
group = "opencode-bot";
|
group = "opencode-bot";
|
||||||
home = "/var/lib/nextcloud-opencode-bot";
|
|
||||||
};
|
};
|
||||||
|
|
||||||
users.groups.opencode-bot = { };
|
users.groups.opencode-bot = { };
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue