fix(agent): use native format for tool result history in run_tool_call_loop

When use_native_tools is true, the agent loop now: - Formats assistant history as JSON with tool_calls array (matching what convert_messages() expects to reconstruct NativeMessage) - Pushes each tool result as ChatMessage::tool with tool_call_id (instead of a single ChatMessage::user with XML tool_result tags) - Adds fallback parsing for markdown code block tool calls (```tool_call ... ``` and hybrid ```tool_call ... </tool_call>) Without this, the second LLM call (sending tool results back) gets rejected with 4xx by OpenRouter/Gemini because the message format doesn't match the OpenAI tool calling API expectations. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 16:02:39 -05:00 · 2026-02-17 16:02:39 -05:00 · 0e5a785015
commit 0e5a785015
parent 508fb53ac1
1 changed files with 98 additions and 8 deletions
--- a/src/agent/loop_.rs
+++ b/src/agent/loop_.rs
@ -461,12 +461,50 @@ fn parse_tool_calls(response: &str) -> (String, Vec<ParsedToolCall>) {
        }
    }
    // If XML tags found nothing, try markdown code blocks with tool_call language.
    // Models behind OpenRouter sometimes output ```tool_call ... ``` or hybrid
    // ```tool_call ... </tool_call> instead of structured API calls or XML tags.
    if calls.is_empty() {
        static MD_TOOL_CALL_RE: LazyLock<Regex> = LazyLock::new(|| {
            Regex::new(r"(?s)```tool[_-]?call\s*\n(.*?)(?:```|</tool[_-]?call>|</toolcall>)").unwrap()
        });
        let mut md_remaining = response;
        let mut md_text_parts: Vec<String> = Vec::new();
        let mut last_end = 0;
        for cap in MD_TOOL_CALL_RE.captures_iter(response) {
            let full_match = cap.get(0).unwrap();
            let before = &response[last_end..full_match.start()];
            if !before.trim().is_empty() {
                md_text_parts.push(before.trim().to_string());
            }
            let inner = &cap[1];
            let json_values = extract_json_values(inner);
            for value in json_values {
                let parsed_calls = parse_tool_calls_from_json_value(&value);
                calls.extend(parsed_calls);
            }
            last_end = full_match.end();
        }
        if !calls.is_empty() {
            let after = &response[last_end..];
            if !after.trim().is_empty() {
                md_text_parts.push(after.trim().to_string());
            }
            text_parts = md_text_parts;
            md_remaining = "";
        }
        let _ = md_remaining; // suppress unused warning
    }
    // SECURITY: We do NOT fall back to extracting arbitrary JSON from the response
    // here. That would enable prompt injection attacks where malicious content
    // (e.g., in emails, files, or web pages) could include JSON that mimics a
    // tool call. Tool calls MUST be explicitly wrapped in either:
    // 1. OpenAI-style JSON with a "tool_calls" array
    // 2. ZeroClaw tool-call tags (<tool_call>, <toolcall>, <tool-call>)
    // 3. Markdown code blocks with tool_call/toolcall/tool-call language
    // This ensures only the LLM's intentional tool calls are executed.
    // Remaining text after last tool call
@ -488,6 +526,34 @@ fn parse_structured_tool_calls(tool_calls: &[ToolCall]) -> Vec<ParsedToolCall> {
        .collect()
 }
 /// Build assistant history entry in JSON format for native tool-call APIs.
 /// `convert_messages` in the OpenRouter provider parses this JSON to reconstruct
 /// the proper `NativeMessage` with structured `tool_calls`.
 fn build_native_assistant_history(text: &str, tool_calls: &[ToolCall]) -> String {
    let calls_json: Vec<serde_json::Value> = tool_calls
        .iter()
        .map(|tc| {
            serde_json::json!({
                "id": tc.id,
                "name": tc.name,
                "arguments": tc.arguments,
            })
        })
        .collect();
    let content = if text.trim().is_empty() {
        serde_json::Value::Null
    } else {
        serde_json::Value::String(text.trim().to_string())
    };
    serde_json::json!({
        "content": content,
        "tool_calls": calls_json,
    })
    .to_string()
 }
 fn build_assistant_history_with_tool_calls(text: &str, tool_calls: &[ToolCall]) -> String {
    let mut parts = Vec::new();
@ -577,7 +643,9 @@ pub(crate) async fn run_tool_call_loop(
        let llm_started_at = Instant::now();
        // Choose between native tool-call API and prompt-based tool use.
-        let (response_text, parsed_text, tool_calls, assistant_history_content) =
+        // `native_tool_calls` preserves the structured ToolCall vec (with IDs) so
        // that tool results can later be sent back as proper `role: tool` messages.
        let (response_text, parsed_text, tool_calls, assistant_history_content, native_tool_calls) =
            if use_native_tools {
                match provider
                    .chat_with_tools(history, &tool_definitions, model, temperature)
@ -603,16 +671,19 @@ pub(crate) async fn run_tool_call_loop(
                            calls = fallback_calls;
                        }
                        // Use JSON format for native tools so convert_messages()
                        // can reconstruct proper NativeMessage with tool_calls.
                        let assistant_history_content = if resp.tool_calls.is_empty() {
                            response_text.clone()
                        } else {
-                            build_assistant_history_with_tool_calls(
+                            build_native_assistant_history(
                                &response_text,
                                &resp.tool_calls,
                            )
                        };
-                        (response_text, parsed_text, calls, assistant_history_content)
+                        let native_calls = resp.tool_calls;
                        (response_text, parsed_text, calls, assistant_history_content, native_calls)
                    }
                    Err(e) => {
                        observer.record_event(&ObserverEvent::LlmResponse {
@ -643,7 +714,7 @@ pub(crate) async fn run_tool_call_loop(
                        let response_text = resp;
                        let assistant_history_content = response_text.clone();
                        let (parsed_text, calls) = parse_tool_calls(&response_text);
-                        (response_text, parsed_text, calls, assistant_history_content)
+                        (response_text, parsed_text, calls, assistant_history_content, Vec::new())
                    }
                    Err(e) => {
                        observer.record_event(&ObserverEvent::LlmResponse {
@ -678,8 +749,11 @@ pub(crate) async fn run_tool_call_loop(
            let _ = std::io::stdout().flush();
        }
-        // Execute each tool call and build results
+        // Execute each tool call and build results.
        // `individual_results` tracks per-call output so that native-mode history
        // can emit one `role: tool` message per tool call with the correct ID.
        let mut tool_results = String::new();
        let mut individual_results: Vec<String> = Vec::new();
        for call in &tool_calls {
            // ── Approval hook ────────────────────────────────
            if let Some(mgr) = approval {
@ -699,9 +773,11 @@ pub(crate) async fn run_tool_call_loop(
                    mgr.record_decision(&call.name, &call.arguments, decision, channel_name);
                    if decision == ApprovalResponse::No {
                        let denied = "Denied by user.".to_string();
                        individual_results.push(denied.clone());
                        let _ = writeln!(
                            tool_results,
-                            "<tool_result name=\"{}\">\nDenied by user.\n</tool_result>",
+                            "<tool_result name=\"{}\">\n{denied}\n</tool_result>",
                            call.name
                        );
                        continue;
@ -740,6 +816,7 @@ pub(crate) async fn run_tool_call_loop(
                format!("Unknown tool: {}", call.name)
            };
            individual_results.push(result.clone());
            let _ = writeln!(
                tool_results,
                "<tool_result name=\"{}\">\n{}\n</tool_result>",
@ -747,9 +824,22 @@ pub(crate) async fn run_tool_call_loop(
            );
        }
-        // Add assistant message with tool calls + tool results to history
+        // Add assistant message with tool calls + tool results to history.
        // Native mode: use JSON-structured messages so convert_messages() can
        // reconstruct proper OpenAI-format tool_calls and tool result messages.
        // Prompt mode: use XML-based text format as before.
        history.push(ChatMessage::assistant(assistant_history_content));
-        history.push(ChatMessage::user(format!("[Tool results]\n{tool_results}")));
+        if !native_tool_calls.is_empty() {
            for (native_call, result) in native_tool_calls.iter().zip(individual_results.iter()) {
                let tool_msg = serde_json::json!({
                    "tool_call_id": native_call.id,
                    "content": result,
                });
                history.push(ChatMessage::tool(tool_msg.to_string()));
            }
        } else {
            history.push(ChatMessage::user(format!("[Tool results]\n{tool_results}")));
        }
    }
    anyhow::bail!("Agent exceeded maximum tool iterations ({MAX_TOOL_ITERATIONS})")