fix(agent): use native format for tool result history in run_tool_call_loop

When use_native_tools is true, the agent loop now: - Formats assistant history as JSON with tool_calls array (matching what convert_messages() expects to reconstruct NativeMessage) - Pushes each tool result as ChatMessage::tool with tool_call_id (instead of a single ChatMessage::user with XML tool_result tags) - Adds fallback parsing for markdown code block tool calls (```tool_call ... ``` and hybrid ```tool_call ... </tool_call>) Without this, the second LLM call (sending tool results back) gets rejected with 4xx by OpenRouter/Gemini because the message format doesn't match the OpenAI tool calling API expectations. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 16:02:39 -05:00 · 2026-02-17 16:02:39 -05:00 · 0e5a785015
commit 0e5a785015
parent 508fb53ac1
1 changed files with 98 additions and 8 deletions
--- a/src/agent/loop_.rs
+++ b/src/agent/loop_.rs
@ -461,12 +461,50 @@ fn parse_tool_calls(response: &str) -> (String, Vec<ParsedToolCall>) {
        }
    }

+    // If XML tags found nothing, try markdown code blocks with tool_call language.
+    // Models behind OpenRouter sometimes output ```tool_call ... ``` or hybrid
+    // ```tool_call ... </tool_call> instead of structured API calls or XML tags.
+    if calls.is_empty() {
+        static MD_TOOL_CALL_RE: LazyLock<Regex> = LazyLock::new(|| {
+            Regex::new(r"(?s)```tool[_-]?call\s*\n(.*?)(?:```|</tool[_-]?call>|</toolcall>)").unwrap()
+        });
+        let mut md_remaining = response;
+        let mut md_text_parts: Vec<String> = Vec::new();
+        let mut last_end = 0;
+
+        for cap in MD_TOOL_CALL_RE.captures_iter(response) {
+            let full_match = cap.get(0).unwrap();
+            let before = &response[last_end..full_match.start()];
+            if !before.trim().is_empty() {
+                md_text_parts.push(before.trim().to_string());
+            }
+            let inner = &cap[1];
+            let json_values = extract_json_values(inner);
+            for value in json_values {
+                let parsed_calls = parse_tool_calls_from_json_value(&value);
+                calls.extend(parsed_calls);
+            }
+            last_end = full_match.end();
+        }
+
+        if !calls.is_empty() {
+            let after = &response[last_end..];
+            if !after.trim().is_empty() {
+                md_text_parts.push(after.trim().to_string());
+            }
+            text_parts = md_text_parts;
+            md_remaining = "";
+        }
+        let _ = md_remaining; // suppress unused warning
+    }
+
    // SECURITY: We do NOT fall back to extracting arbitrary JSON from the response
    // here. That would enable prompt injection attacks where malicious content
    // (e.g., in emails, files, or web pages) could include JSON that mimics a
    // tool call. Tool calls MUST be explicitly wrapped in either:
    // 1. OpenAI-style JSON with a "tool_calls" array
    // 2. ZeroClaw tool-call tags (<tool_call>, <toolcall>, <tool-call>)
+    // 3. Markdown code blocks with tool_call/toolcall/tool-call language
    // This ensures only the LLM's intentional tool calls are executed.

    // Remaining text after last tool call
@ -488,6 +526,34 @@ fn parse_structured_tool_calls(tool_calls: &[ToolCall]) -> Vec<ParsedToolCall> {
        .collect()
 }

+/// Build assistant history entry in JSON format for native tool-call APIs.
+/// `convert_messages` in the OpenRouter provider parses this JSON to reconstruct
+/// the proper `NativeMessage` with structured `tool_calls`.
+fn build_native_assistant_history(text: &str, tool_calls: &[ToolCall]) -> String {
+    let calls_json: Vec<serde_json::Value> = tool_calls
+        .iter()
+        .map(|tc| {
+            serde_json::json!({
+                "id": tc.id,
+                "name": tc.name,
+                "arguments": tc.arguments,
+            })
+        })
+        .collect();
+
+    let content = if text.trim().is_empty() {
+        serde_json::Value::Null
+    } else {
+        serde_json::Value::String(text.trim().to_string())
+    };
+
+    serde_json::json!({
+        "content": content,
+        "tool_calls": calls_json,
+    })
+    .to_string()
+}
+
 fn build_assistant_history_with_tool_calls(text: &str, tool_calls: &[ToolCall]) -> String {
    let mut parts = Vec::new();

@ -577,7 +643,9 @@ pub(crate) async fn run_tool_call_loop(
        let llm_started_at = Instant::now();

        // Choose between native tool-call API and prompt-based tool use.
-        let (response_text, parsed_text, tool_calls, assistant_history_content) =
+        // `native_tool_calls` preserves the structured ToolCall vec (with IDs) so
+        // that tool results can later be sent back as proper `role: tool` messages.
+        let (response_text, parsed_text, tool_calls, assistant_history_content, native_tool_calls) =
            if use_native_tools {
                match provider
                    .chat_with_tools(history, &tool_definitions, model, temperature)
@ -603,16 +671,19 @@ pub(crate) async fn run_tool_call_loop(
                            calls = fallback_calls;
                        }

+                        // Use JSON format for native tools so convert_messages()
+                        // can reconstruct proper NativeMessage with tool_calls.
                        let assistant_history_content = if resp.tool_calls.is_empty() {
                            response_text.clone()
                        } else {
-                            build_assistant_history_with_tool_calls(
+                            build_native_assistant_history(
                                &response_text,
                                &resp.tool_calls,
                            )
                        };

-                        (response_text, parsed_text, calls, assistant_history_content)
+                        let native_calls = resp.tool_calls;
+                        (response_text, parsed_text, calls, assistant_history_content, native_calls)
                    }
                    Err(e) => {
                        observer.record_event(&ObserverEvent::LlmResponse {
@ -643,7 +714,7 @@ pub(crate) async fn run_tool_call_loop(
                        let response_text = resp;
                        let assistant_history_content = response_text.clone();
                        let (parsed_text, calls) = parse_tool_calls(&response_text);
-                        (response_text, parsed_text, calls, assistant_history_content)
+                        (response_text, parsed_text, calls, assistant_history_content, Vec::new())
                    }
                    Err(e) => {
                        observer.record_event(&ObserverEvent::LlmResponse {
@ -678,8 +749,11 @@ pub(crate) async fn run_tool_call_loop(
            let _ = std::io::stdout().flush();
        }

-        // Execute each tool call and build results
+        // Execute each tool call and build results.
+        // `individual_results` tracks per-call output so that native-mode history
+        // can emit one `role: tool` message per tool call with the correct ID.
        let mut tool_results = String::new();
+        let mut individual_results: Vec<String> = Vec::new();
        for call in &tool_calls {
            // ── Approval hook ────────────────────────────────
            if let Some(mgr) = approval {
@ -699,9 +773,11 @@ pub(crate) async fn run_tool_call_loop(
                    mgr.record_decision(&call.name, &call.arguments, decision, channel_name);

                    if decision == ApprovalResponse::No {
+                        let denied = "Denied by user.".to_string();
+                        individual_results.push(denied.clone());
                        let _ = writeln!(
                            tool_results,
-                            "<tool_result name=\"{}\">\nDenied by user.\n</tool_result>",
+                            "<tool_result name=\"{}\">\n{denied}\n</tool_result>",
                            call.name
                        );
                        continue;
@ -740,6 +816,7 @@ pub(crate) async fn run_tool_call_loop(
                format!("Unknown tool: {}", call.name)
            };

+            individual_results.push(result.clone());
            let _ = writeln!(
                tool_results,
                "<tool_result name=\"{}\">\n{}\n</tool_result>",
@ -747,9 +824,22 @@ pub(crate) async fn run_tool_call_loop(
            );
        }

-        // Add assistant message with tool calls + tool results to history
+        // Add assistant message with tool calls + tool results to history.
+        // Native mode: use JSON-structured messages so convert_messages() can
+        // reconstruct proper OpenAI-format tool_calls and tool result messages.
+        // Prompt mode: use XML-based text format as before.
        history.push(ChatMessage::assistant(assistant_history_content));
-        history.push(ChatMessage::user(format!("[Tool results]\n{tool_results}")));
+        if !native_tool_calls.is_empty() {
+            for (native_call, result) in native_tool_calls.iter().zip(individual_results.iter()) {
+                let tool_msg = serde_json::json!({
+                    "tool_call_id": native_call.id,
+                    "content": result,
+                });
+                history.push(ChatMessage::tool(tool_msg.to_string()));
+            }
+        } else {
+            history.push(ChatMessage::user(format!("[Tool results]\n{tool_results}")));
+        }
    }

    anyhow::bail!("Agent exceeded maximum tool iterations ({MAX_TOOL_ITERATIONS})")