fix(agent): use native format for tool result history in run_tool_call_loop
When use_native_tools is true, the agent loop now: - Formats assistant history as JSON with tool_calls array (matching what convert_messages() expects to reconstruct NativeMessage) - Pushes each tool result as ChatMessage::tool with tool_call_id (instead of a single ChatMessage::user with XML tool_result tags) - Adds fallback parsing for markdown code block tool calls (```tool_call ... ``` and hybrid ```tool_call ... </tool_call>) Without this, the second LLM call (sending tool results back) gets rejected with 4xx by OpenRouter/Gemini because the message format doesn't match the OpenAI tool calling API expectations. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
508fb53ac1
commit
0e5a785015
1 changed files with 98 additions and 8 deletions
|
|
@ -461,12 +461,50 @@ fn parse_tool_calls(response: &str) -> (String, Vec<ParsedToolCall>) {
|
|||
}
|
||||
}
|
||||
|
||||
// If XML tags found nothing, try markdown code blocks with tool_call language.
|
||||
// Models behind OpenRouter sometimes output ```tool_call ... ``` or hybrid
|
||||
// ```tool_call ... </tool_call> instead of structured API calls or XML tags.
|
||||
if calls.is_empty() {
|
||||
static MD_TOOL_CALL_RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"(?s)```tool[_-]?call\s*\n(.*?)(?:```|</tool[_-]?call>|</toolcall>)").unwrap()
|
||||
});
|
||||
let mut md_remaining = response;
|
||||
let mut md_text_parts: Vec<String> = Vec::new();
|
||||
let mut last_end = 0;
|
||||
|
||||
for cap in MD_TOOL_CALL_RE.captures_iter(response) {
|
||||
let full_match = cap.get(0).unwrap();
|
||||
let before = &response[last_end..full_match.start()];
|
||||
if !before.trim().is_empty() {
|
||||
md_text_parts.push(before.trim().to_string());
|
||||
}
|
||||
let inner = &cap[1];
|
||||
let json_values = extract_json_values(inner);
|
||||
for value in json_values {
|
||||
let parsed_calls = parse_tool_calls_from_json_value(&value);
|
||||
calls.extend(parsed_calls);
|
||||
}
|
||||
last_end = full_match.end();
|
||||
}
|
||||
|
||||
if !calls.is_empty() {
|
||||
let after = &response[last_end..];
|
||||
if !after.trim().is_empty() {
|
||||
md_text_parts.push(after.trim().to_string());
|
||||
}
|
||||
text_parts = md_text_parts;
|
||||
md_remaining = "";
|
||||
}
|
||||
let _ = md_remaining; // suppress unused warning
|
||||
}
|
||||
|
||||
// SECURITY: We do NOT fall back to extracting arbitrary JSON from the response
|
||||
// here. That would enable prompt injection attacks where malicious content
|
||||
// (e.g., in emails, files, or web pages) could include JSON that mimics a
|
||||
// tool call. Tool calls MUST be explicitly wrapped in either:
|
||||
// 1. OpenAI-style JSON with a "tool_calls" array
|
||||
// 2. ZeroClaw tool-call tags (<tool_call>, <toolcall>, <tool-call>)
|
||||
// 3. Markdown code blocks with tool_call/toolcall/tool-call language
|
||||
// This ensures only the LLM's intentional tool calls are executed.
|
||||
|
||||
// Remaining text after last tool call
|
||||
|
|
@ -488,6 +526,34 @@ fn parse_structured_tool_calls(tool_calls: &[ToolCall]) -> Vec<ParsedToolCall> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Build assistant history entry in JSON format for native tool-call APIs.
|
||||
/// `convert_messages` in the OpenRouter provider parses this JSON to reconstruct
|
||||
/// the proper `NativeMessage` with structured `tool_calls`.
|
||||
fn build_native_assistant_history(text: &str, tool_calls: &[ToolCall]) -> String {
|
||||
let calls_json: Vec<serde_json::Value> = tool_calls
|
||||
.iter()
|
||||
.map(|tc| {
|
||||
serde_json::json!({
|
||||
"id": tc.id,
|
||||
"name": tc.name,
|
||||
"arguments": tc.arguments,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
let content = if text.trim().is_empty() {
|
||||
serde_json::Value::Null
|
||||
} else {
|
||||
serde_json::Value::String(text.trim().to_string())
|
||||
};
|
||||
|
||||
serde_json::json!({
|
||||
"content": content,
|
||||
"tool_calls": calls_json,
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn build_assistant_history_with_tool_calls(text: &str, tool_calls: &[ToolCall]) -> String {
|
||||
let mut parts = Vec::new();
|
||||
|
||||
|
|
@ -577,7 +643,9 @@ pub(crate) async fn run_tool_call_loop(
|
|||
let llm_started_at = Instant::now();
|
||||
|
||||
// Choose between native tool-call API and prompt-based tool use.
|
||||
let (response_text, parsed_text, tool_calls, assistant_history_content) =
|
||||
// `native_tool_calls` preserves the structured ToolCall vec (with IDs) so
|
||||
// that tool results can later be sent back as proper `role: tool` messages.
|
||||
let (response_text, parsed_text, tool_calls, assistant_history_content, native_tool_calls) =
|
||||
if use_native_tools {
|
||||
match provider
|
||||
.chat_with_tools(history, &tool_definitions, model, temperature)
|
||||
|
|
@ -603,16 +671,19 @@ pub(crate) async fn run_tool_call_loop(
|
|||
calls = fallback_calls;
|
||||
}
|
||||
|
||||
// Use JSON format for native tools so convert_messages()
|
||||
// can reconstruct proper NativeMessage with tool_calls.
|
||||
let assistant_history_content = if resp.tool_calls.is_empty() {
|
||||
response_text.clone()
|
||||
} else {
|
||||
build_assistant_history_with_tool_calls(
|
||||
build_native_assistant_history(
|
||||
&response_text,
|
||||
&resp.tool_calls,
|
||||
)
|
||||
};
|
||||
|
||||
(response_text, parsed_text, calls, assistant_history_content)
|
||||
let native_calls = resp.tool_calls;
|
||||
(response_text, parsed_text, calls, assistant_history_content, native_calls)
|
||||
}
|
||||
Err(e) => {
|
||||
observer.record_event(&ObserverEvent::LlmResponse {
|
||||
|
|
@ -643,7 +714,7 @@ pub(crate) async fn run_tool_call_loop(
|
|||
let response_text = resp;
|
||||
let assistant_history_content = response_text.clone();
|
||||
let (parsed_text, calls) = parse_tool_calls(&response_text);
|
||||
(response_text, parsed_text, calls, assistant_history_content)
|
||||
(response_text, parsed_text, calls, assistant_history_content, Vec::new())
|
||||
}
|
||||
Err(e) => {
|
||||
observer.record_event(&ObserverEvent::LlmResponse {
|
||||
|
|
@ -678,8 +749,11 @@ pub(crate) async fn run_tool_call_loop(
|
|||
let _ = std::io::stdout().flush();
|
||||
}
|
||||
|
||||
// Execute each tool call and build results
|
||||
// Execute each tool call and build results.
|
||||
// `individual_results` tracks per-call output so that native-mode history
|
||||
// can emit one `role: tool` message per tool call with the correct ID.
|
||||
let mut tool_results = String::new();
|
||||
let mut individual_results: Vec<String> = Vec::new();
|
||||
for call in &tool_calls {
|
||||
// ── Approval hook ────────────────────────────────
|
||||
if let Some(mgr) = approval {
|
||||
|
|
@ -699,9 +773,11 @@ pub(crate) async fn run_tool_call_loop(
|
|||
mgr.record_decision(&call.name, &call.arguments, decision, channel_name);
|
||||
|
||||
if decision == ApprovalResponse::No {
|
||||
let denied = "Denied by user.".to_string();
|
||||
individual_results.push(denied.clone());
|
||||
let _ = writeln!(
|
||||
tool_results,
|
||||
"<tool_result name=\"{}\">\nDenied by user.\n</tool_result>",
|
||||
"<tool_result name=\"{}\">\n{denied}\n</tool_result>",
|
||||
call.name
|
||||
);
|
||||
continue;
|
||||
|
|
@ -740,6 +816,7 @@ pub(crate) async fn run_tool_call_loop(
|
|||
format!("Unknown tool: {}", call.name)
|
||||
};
|
||||
|
||||
individual_results.push(result.clone());
|
||||
let _ = writeln!(
|
||||
tool_results,
|
||||
"<tool_result name=\"{}\">\n{}\n</tool_result>",
|
||||
|
|
@ -747,9 +824,22 @@ pub(crate) async fn run_tool_call_loop(
|
|||
);
|
||||
}
|
||||
|
||||
// Add assistant message with tool calls + tool results to history
|
||||
// Add assistant message with tool calls + tool results to history.
|
||||
// Native mode: use JSON-structured messages so convert_messages() can
|
||||
// reconstruct proper OpenAI-format tool_calls and tool result messages.
|
||||
// Prompt mode: use XML-based text format as before.
|
||||
history.push(ChatMessage::assistant(assistant_history_content));
|
||||
history.push(ChatMessage::user(format!("[Tool results]\n{tool_results}")));
|
||||
if !native_tool_calls.is_empty() {
|
||||
for (native_call, result) in native_tool_calls.iter().zip(individual_results.iter()) {
|
||||
let tool_msg = serde_json::json!({
|
||||
"tool_call_id": native_call.id,
|
||||
"content": result,
|
||||
});
|
||||
history.push(ChatMessage::tool(tool_msg.to_string()));
|
||||
}
|
||||
} else {
|
||||
history.push(ChatMessage::user(format!("[Tool results]\n{tool_results}")));
|
||||
}
|
||||
}
|
||||
|
||||
anyhow::bail!("Agent exceeded maximum tool iterations ({MAX_TOOL_ITERATIONS})")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue