fix(agent): retry malformed prefixed tool_call markup

2026-02-17 22:42:19 +08:00 · 2026-02-17 22:42:19 +08:00 · 59f74e8f39
commit 59f74e8f39
parent 128e888d7a
1 changed files with 103 additions and 1 deletions
--- a/src/agent/loop_.rs
+++ b/src/agent/loop_.rs
@ -35,6 +35,9 @@ static SENSITIVE_KV_REGEX: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r#"(?i)(token|api[_-]?key|password|secret|user[_-]?key|bearer|credential)["']?\s*[:=]\s*(?:"([^"]{8,})"|'([^']{8,})'|([a-zA-Z0-9_\-\.]{8,}))"#).unwrap()
 });

+static MALFORMED_TOOL_CALL_PREFIX_REGEX: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r#"(?is)^<tool_call>\s*[a-zA-Z_][a-zA-Z0-9_:-]*\s*\{"#).unwrap());
+
 /// Scrub credentials from tool output to prevent accidental exfiltration.
 /// Replaces known credential patterns with a redacted placeholder while preserving
 /// a small prefix for context.
@ -488,6 +491,19 @@ fn parse_structured_tool_calls(tool_calls: &[ToolCall]) -> Vec<ParsedToolCall> {
        .collect()
 }

+fn looks_like_malformed_tool_call_markup(response: &str) -> bool {
+    let trimmed = response.trim_start();
+    if !trimmed.starts_with("<tool_call>") {
+        return false;
+    }
+
+    if !trimmed.contains("</tool_call>") {
+        return true;
+    }
+
+    MALFORMED_TOOL_CALL_PREFIX_REGEX.is_match(trimmed)
+}
+
 fn build_assistant_history_with_tool_calls(text: &str, tool_calls: &[ToolCall]) -> String {
    let mut parts = Vec::new();

@ -668,11 +684,12 @@ pub(crate) async fn run_tool_call_loop(
        };
        let has_tool_call_markup =
            response_text.contains("<tool_call>") && response_text.contains("</tool_call>");
+        let malformed_tool_call_markup = looks_like_malformed_tool_call_markup(&response_text);

        if tool_calls.is_empty() {
            // Recovery path: the model attempted tool use but emitted malformed JSON.
            // Ask it to re-send valid tool-call payload instead of leaking raw markup to users.
-            if has_tool_call_markup && parsed_text_is_empty {
+            if (has_tool_call_markup && parsed_text_is_empty) || malformed_tool_call_markup {
                history.push(ChatMessage::assistant(response_text.clone()));
                history.push(ChatMessage::user(
                    "[Tool parser error]\nYour previous <tool_call> payload was invalid JSON and was NOT executed. Re-send the same tool call using strict valid JSON only. Escape inner double quotes inside string values.",
@ -1601,6 +1618,17 @@ I will now call the tool with this payload:
        );
    }

+    #[test]
+    fn looks_like_malformed_tool_call_markup_detects_prefixed_json() {
+        let malformed = r#"<tool_call>schedule{"action":"create","id":"nova-self-update"}"#;
+        assert!(looks_like_malformed_tool_call_markup(malformed));
+
+        let valid = r#"<tool_call>
+{"name":"shell","arguments":{"command":"date"}}
+</tool_call>"#;
+        assert!(!looks_like_malformed_tool_call_markup(valid));
+    }
+
    #[test]
    fn build_tool_instructions_includes_all_tools() {
        use crate::security::SecurityPolicy;
@ -2057,4 +2085,78 @@ Done."#;
            .iter()
            .any(|m| m.role == "user" && m.content.contains("[Tool parser error]")));
    }
+
+    struct PrefixMalformedThenValidToolProvider;
+
+    #[async_trait]
+    impl Provider for PrefixMalformedThenValidToolProvider {
+        async fn chat_with_system(
+            &self,
+            _system_prompt: Option<&str>,
+            _message: &str,
+            _model: &str,
+            _temperature: f64,
+        ) -> anyhow::Result<String> {
+            anyhow::bail!("chat_with_system should not be called in this test");
+        }
+
+        async fn chat_with_history(
+            &self,
+            messages: &[ChatMessage],
+            _model: &str,
+            _temperature: f64,
+        ) -> anyhow::Result<String> {
+            if messages
+                .iter()
+                .any(|m| m.role == "user" && m.content.contains("[Tool results]"))
+            {
+                return Ok("Scheduled successfully.".to_string());
+            }
+
+            if messages
+                .iter()
+                .any(|m| m.role == "user" && m.content.contains("[Tool parser error]"))
+            {
+                return Ok(r#"<tool_call>
+{"name":"shell","arguments":{"command":"echo fixed"}}
+</tool_call>"#
+                    .to_string());
+            }
+
+            Ok(r#"<tool_call>schedule{"action":"create","command":"date","expression":"0 3 * * *","id":"nova-self-update"}"#.to_string())
+        }
+    }
+
+    #[tokio::test]
+    async fn run_tool_call_loop_retries_prefixed_tool_call_markup() {
+        let runs = Arc::new(AtomicUsize::new(0));
+        let tools_registry: Vec<Box<dyn Tool>> = vec![Box::new(CountingShellTool {
+            runs: Arc::clone(&runs),
+        })];
+
+        let mut history = vec![
+            ChatMessage::system("sys"),
+            ChatMessage::user("set schedule"),
+        ];
+
+        let response = run_tool_call_loop(
+            &PrefixMalformedThenValidToolProvider,
+            &mut history,
+            &tools_registry,
+            &NoopObserver,
+            "test-provider",
+            "test-model",
+            0.0,
+            true,
+        )
+        .await
+        .unwrap();
+
+        assert_eq!(response, "Scheduled successfully.");
+        assert_eq!(runs.load(Ordering::SeqCst), 1);
+        assert!(!response.contains("<tool_call>"));
+        assert!(history
+            .iter()
+            .any(|m| m.role == "user" && m.content.contains("[Tool parser error]")));
+    }
 }