fix(agent): retry malformed prefixed tool_call markup

This commit is contained in:
JamesYin 2026-02-17 22:42:19 +08:00 committed by Chummy
parent 128e888d7a
commit 59f74e8f39

View file

@ -35,6 +35,9 @@ static SENSITIVE_KV_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"(?i)(token|api[_-]?key|password|secret|user[_-]?key|bearer|credential)["']?\s*[:=]\s*(?:"([^"]{8,})"|'([^']{8,})'|([a-zA-Z0-9_\-\.]{8,}))"#).unwrap()
});
static MALFORMED_TOOL_CALL_PREFIX_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"(?is)^<tool_call>\s*[a-zA-Z_][a-zA-Z0-9_:-]*\s*\{"#).unwrap());
/// Scrub credentials from tool output to prevent accidental exfiltration.
/// Replaces known credential patterns with a redacted placeholder while preserving
/// a small prefix for context.
@ -488,6 +491,19 @@ fn parse_structured_tool_calls(tool_calls: &[ToolCall]) -> Vec<ParsedToolCall> {
.collect()
}
fn looks_like_malformed_tool_call_markup(response: &str) -> bool {
let trimmed = response.trim_start();
if !trimmed.starts_with("<tool_call>") {
return false;
}
if !trimmed.contains("</tool_call>") {
return true;
}
MALFORMED_TOOL_CALL_PREFIX_REGEX.is_match(trimmed)
}
fn build_assistant_history_with_tool_calls(text: &str, tool_calls: &[ToolCall]) -> String {
let mut parts = Vec::new();
@ -668,11 +684,12 @@ pub(crate) async fn run_tool_call_loop(
};
let has_tool_call_markup =
response_text.contains("<tool_call>") && response_text.contains("</tool_call>");
let malformed_tool_call_markup = looks_like_malformed_tool_call_markup(&response_text);
if tool_calls.is_empty() {
// Recovery path: the model attempted tool use but emitted malformed JSON.
// Ask it to re-send valid tool-call payload instead of leaking raw markup to users.
if has_tool_call_markup && parsed_text_is_empty {
if (has_tool_call_markup && parsed_text_is_empty) || malformed_tool_call_markup {
history.push(ChatMessage::assistant(response_text.clone()));
history.push(ChatMessage::user(
"[Tool parser error]\nYour previous <tool_call> payload was invalid JSON and was NOT executed. Re-send the same tool call using strict valid JSON only. Escape inner double quotes inside string values.",
@ -1601,6 +1618,17 @@ I will now call the tool with this payload:
);
}
#[test]
fn looks_like_malformed_tool_call_markup_detects_prefixed_json() {
let malformed = r#"<tool_call>schedule{"action":"create","id":"nova-self-update"}"#;
assert!(looks_like_malformed_tool_call_markup(malformed));
let valid = r#"<tool_call>
{"name":"shell","arguments":{"command":"date"}}
</tool_call>"#;
assert!(!looks_like_malformed_tool_call_markup(valid));
}
#[test]
fn build_tool_instructions_includes_all_tools() {
use crate::security::SecurityPolicy;
@ -2057,4 +2085,78 @@ Done."#;
.iter()
.any(|m| m.role == "user" && m.content.contains("[Tool parser error]")));
}
struct PrefixMalformedThenValidToolProvider;
#[async_trait]
impl Provider for PrefixMalformedThenValidToolProvider {
async fn chat_with_system(
&self,
_system_prompt: Option<&str>,
_message: &str,
_model: &str,
_temperature: f64,
) -> anyhow::Result<String> {
anyhow::bail!("chat_with_system should not be called in this test");
}
async fn chat_with_history(
&self,
messages: &[ChatMessage],
_model: &str,
_temperature: f64,
) -> anyhow::Result<String> {
if messages
.iter()
.any(|m| m.role == "user" && m.content.contains("[Tool results]"))
{
return Ok("Scheduled successfully.".to_string());
}
if messages
.iter()
.any(|m| m.role == "user" && m.content.contains("[Tool parser error]"))
{
return Ok(r#"<tool_call>
{"name":"shell","arguments":{"command":"echo fixed"}}
</tool_call>"#
.to_string());
}
Ok(r#"<tool_call>schedule{"action":"create","command":"date","expression":"0 3 * * *","id":"nova-self-update"}"#.to_string())
}
}
#[tokio::test]
async fn run_tool_call_loop_retries_prefixed_tool_call_markup() {
let runs = Arc::new(AtomicUsize::new(0));
let tools_registry: Vec<Box<dyn Tool>> = vec![Box::new(CountingShellTool {
runs: Arc::clone(&runs),
})];
let mut history = vec![
ChatMessage::system("sys"),
ChatMessage::user("set schedule"),
];
let response = run_tool_call_loop(
&PrefixMalformedThenValidToolProvider,
&mut history,
&tools_registry,
&NoopObserver,
"test-provider",
"test-model",
0.0,
true,
)
.await
.unwrap();
assert_eq!(response, "Scheduled successfully.");
assert_eq!(runs.load(Ordering::SeqCst), 1);
assert!(!response.contains("<tool_call>"));
assert!(history
.iter()
.any(|m| m.role == "user" && m.content.contains("[Tool parser error]")));
}
}