feat: add multi-turn conversation history and tool execution

* feat: add multi-turn conversation history and tool execution Major enhancement to the agent loop: **Multi-turn conversation:** - Add `ChatMessage` type with system/user/assistant constructors - Add `chat_with_history` method to Provider trait (default impl delegates to `chat_with_system` for backward compatibility) - Implement native `chat_with_history` on OpenRouter, Compatible, Reliable, and Router providers to send full message history - Interactive mode now maintains persistent history across turns **Tool execution:** - Agent loop now parses `<tool_call>` XML tags from LLM responses - Executes tools from the registry and feeds results back as `<tool_result>` messages - Agentic loop continues until LLM produces final text (no tool calls) - MAX_TOOL_ITERATIONS (10) safety limit prevents runaway loops - System prompt includes structured tool-use protocol with JSON schemas **Types:** - `ChatMessage`, `ChatResponse`, `ToolCall`, `ToolResultMessage`, `ConversationMessage` — full conversation modeling types Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address review comments on multi-turn + tool execution - Add history sliding window (MAX_HISTORY_MESSAGES=50) to prevent unbounded conversation history growth in interactive mode - Add 404→Responses API fallback in compatible.rs chat_with_history, matching chat_with_system behavior - Use super::api_error() for error sanitization in compatible.rs instead of raw error body (prevents secret leakage) - Add missing operational logs in reliable.rs chat_with_history: recovery, non-retryable, fallback switch warnings - Add trim_history tests Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address second round of review comments - Sanitize raw error text in compatible.rs chat_with_system using sanitize_api_error (prevents leaking secrets in error messages) - Add chat_with_history to MockProvider in reliable.rs tests so the retry/fallback path is exercised end-to-end - Add chat_with_history_retries_then_recovers and chat_with_history_falls_back tests - Log warning on malformed <tool_call> JSON instead of silent drop - Flush stdout after print! in agent_turn so output appears before tool execution on line-buffered terminals - Make interactive mode resilient to transient errors (continue loop instead of terminating session) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 14:43:02 -05:00 · 2026-02-15 14:43:02 -05:00 · 89b1ec6fa2
commit 89b1ec6fa2
parent 92c42dc24d
7 changed files with 829 additions and 21 deletions
--- a/src/agent/loop_.rs
+++ b/src/agent/loop_.rs
@ -1,16 +1,44 @@
 use crate::config::Config;
 use crate::memory::{self, Memory, MemoryCategory};
 use crate::observability::{self, Observer, ObserverEvent};
-use crate::providers::{self, Provider};
+use crate::providers::{self, ChatMessage, Provider};
 use crate::runtime;
 use crate::security::SecurityPolicy;
-use crate::tools;
+use crate::tools::{self, Tool};
 use crate::util::truncate_with_ellipsis;
 use anyhow::Result;
 use std::fmt::Write;
+use std::io::Write as IoWrite;
 use std::sync::Arc;
 use std::time::Instant;

+/// Maximum agentic tool-use iterations per user message to prevent runaway loops.
+const MAX_TOOL_ITERATIONS: usize = 10;
+
+/// Maximum number of non-system messages to keep in history.
+/// When exceeded, the oldest messages are dropped (system prompt is always preserved).
+const MAX_HISTORY_MESSAGES: usize = 50;
+
+/// Trim conversation history to prevent unbounded growth.
+/// Preserves the system prompt (first message if role=system) and the most recent messages.
+fn trim_history(history: &mut Vec<ChatMessage>) {
+    // Nothing to trim if within limit
+    let has_system = history.first().map_or(false, |m| m.role == "system");
+    let non_system_count = if has_system {
+        history.len() - 1
+    } else {
+        history.len()
+    };
+
+    if non_system_count <= MAX_HISTORY_MESSAGES {
+        return;
+    }
+
+    let start = if has_system { 1 } else { 0 };
+    let to_remove = non_system_count - MAX_HISTORY_MESSAGES;
+    history.drain(start..start + to_remove);
+}
+
 /// Build context preamble by searching memory for relevant entries
 async fn build_context(mem: &dyn Memory, user_msg: &str) -> String {
    let mut context = String::new();
@ -29,6 +57,178 @@ async fn build_context(mem: &dyn Memory, user_msg: &str) -> String {
    context
 }

+/// Find a tool by name in the registry.
+fn find_tool<'a>(tools: &'a [Box<dyn Tool>], name: &str) -> Option<&'a dyn Tool> {
+    tools.iter().find(|t| t.name() == name).map(|t| t.as_ref())
+}
+
+/// Parse tool calls from an LLM response that uses XML-style function calling.
+///
+/// Expected format (common with system-prompt-guided tool use):
+/// ```text
+/// <tool_call>
+/// {"name": "shell", "arguments": {"command": "ls"}}
+/// </tool_call>
+/// ```
+///
+/// Also supports JSON with `tool_calls` array from OpenAI-format responses.
+fn parse_tool_calls(response: &str) -> (String, Vec<ParsedToolCall>) {
+    let mut text_parts = Vec::new();
+    let mut calls = Vec::new();
+    let mut remaining = response;
+
+    while let Some(start) = remaining.find("<tool_call>") {
+        // Everything before the tag is text
+        let before = &remaining[..start];
+        if !before.trim().is_empty() {
+            text_parts.push(before.trim().to_string());
+        }
+
+        if let Some(end) = remaining[start..].find("</tool_call>") {
+            let inner = &remaining[start + 11..start + end];
+            match serde_json::from_str::<serde_json::Value>(inner.trim()) {
+                Ok(parsed) => {
+                    let name = parsed
+                        .get("name")
+                        .and_then(|v| v.as_str())
+                        .unwrap_or("")
+                        .to_string();
+                    let arguments = parsed
+                        .get("arguments")
+                        .cloned()
+                        .unwrap_or(serde_json::Value::Object(serde_json::Map::new()));
+                    calls.push(ParsedToolCall { name, arguments });
+                }
+                Err(e) => {
+                    tracing::warn!("Malformed <tool_call> JSON: {e}");
+                }
+            }
+            remaining = &remaining[start + end + 12..];
+        } else {
+            break;
+        }
+    }
+
+    // Remaining text after last tool call
+    if !remaining.trim().is_empty() {
+        text_parts.push(remaining.trim().to_string());
+    }
+
+    (text_parts.join("\n"), calls)
+}
+
+#[derive(Debug)]
+struct ParsedToolCall {
+    name: String,
+    arguments: serde_json::Value,
+}
+
+/// Execute a single turn of the agent loop: send messages, parse tool calls,
+/// execute tools, and loop until the LLM produces a final text response.
+async fn agent_turn(
+    provider: &dyn Provider,
+    history: &mut Vec<ChatMessage>,
+    tools_registry: &[Box<dyn Tool>],
+    observer: &dyn Observer,
+    model: &str,
+    temperature: f64,
+) -> Result<String> {
+    for _iteration in 0..MAX_TOOL_ITERATIONS {
+        let response = provider
+            .chat_with_history(history, model, temperature)
+            .await?;
+
+        let (text, tool_calls) = parse_tool_calls(&response);
+
+        if tool_calls.is_empty() {
+            // No tool calls — this is the final response
+            history.push(ChatMessage::assistant(&response));
+            return Ok(if text.is_empty() {
+                response
+            } else {
+                text
+            });
+        }
+
+        // Print any text the LLM produced alongside tool calls
+        if !text.is_empty() {
+            print!("{text}");
+            let _ = std::io::stdout().flush();
+        }
+
+        // Execute each tool call and build results
+        let mut tool_results = String::new();
+        for call in &tool_calls {
+            let start = Instant::now();
+            let result = if let Some(tool) = find_tool(tools_registry, &call.name) {
+                match tool.execute(call.arguments.clone()).await {
+                    Ok(r) => {
+                        observer.record_event(&ObserverEvent::ToolCall {
+                            tool: call.name.clone(),
+                            duration: start.elapsed(),
+                            success: r.success,
+                        });
+                        if r.success {
+                            r.output
+                        } else {
+                            format!("Error: {}", r.error.unwrap_or_else(|| r.output))
+                        }
+                    }
+                    Err(e) => {
+                        observer.record_event(&ObserverEvent::ToolCall {
+                            tool: call.name.clone(),
+                            duration: start.elapsed(),
+                            success: false,
+                        });
+                        format!("Error executing {}: {e}", call.name)
+                    }
+                }
+            } else {
+                format!("Unknown tool: {}", call.name)
+            };
+
+            let _ = writeln!(
+                tool_results,
+                "<tool_result name=\"{}\">\n{}\n</tool_result>",
+                call.name, result
+            );
+        }
+
+        // Add assistant message with tool calls + tool results to history
+        history.push(ChatMessage::assistant(&response));
+        history.push(ChatMessage::user(format!(
+            "[Tool results]\n{tool_results}"
+        )));
+    }
+
+    anyhow::bail!("Agent exceeded maximum tool iterations ({MAX_TOOL_ITERATIONS})")
+}
+
+/// Build the tool instruction block for the system prompt so the LLM knows
+/// how to invoke tools.
+fn build_tool_instructions(tools_registry: &[Box<dyn Tool>]) -> String {
+    let mut instructions = String::new();
+    instructions.push_str("\n## Tool Use Protocol\n\n");
+    instructions.push_str("To use a tool, wrap a JSON object in <tool_call></tool_call> tags:\n\n");
+    instructions.push_str("```\n<tool_call>\n{\"name\": \"tool_name\", \"arguments\": {\"param\": \"value\"}}\n</tool_call>\n```\n\n");
+    instructions.push_str("You may use multiple tool calls in a single response. ");
+    instructions.push_str("After tool execution, results appear in <tool_result> tags. ");
+    instructions.push_str("Continue reasoning with the results until you can give a final answer.\n\n");
+    instructions.push_str("### Available Tools\n\n");
+
+    for tool in tools_registry {
+        let _ = writeln!(
+            instructions,
+            "**{}**: {}\nParameters: `{}`\n",
+            tool.name(),
+            tool.description(),
+            tool.parameters_schema()
+        );
+    }
+
+    instructions
+}
+
 #[allow(clippy::too_many_lines)]
 pub async fn run(
    config: Config,
@ -61,7 +261,7 @@ pub async fn run(
    } else {
        None
    };
-    let _tools = tools::all_tools_with_runtime(
+    let tools_registry = tools::all_tools_with_runtime(
        &security,
        runtime,
        mem.clone(),
@ -133,7 +333,7 @@ pub async fn run(
            "Execute actions on 1000+ apps via Composio (Gmail, Notion, GitHub, Slack, etc.). Use action='list' to discover, 'execute' to run, 'connect' to OAuth.",
        ));
    }
-    let system_prompt = crate::channels::build_system_prompt(
+    let mut system_prompt = crate::channels::build_system_prompt(
        &config.workspace_dir,
        model_name,
        &tool_descs,
@ -141,6 +341,9 @@ pub async fn run(
        Some(&config.identity),
    );

+    // Append structured tool-use instructions with schemas
+    system_prompt.push_str(&build_tool_instructions(&tools_registry));
+
    // ── Execute ──────────────────────────────────────────────────
    let start = Instant::now();

@ -160,9 +363,20 @@ pub async fn run(
            format!("{context}{msg}")
        };

-        let response = provider
-            .chat_with_system(Some(&system_prompt), &enriched, model_name, temperature)
-            .await?;
+        let mut history = vec![
+            ChatMessage::system(&system_prompt),
+            ChatMessage::user(&enriched),
+        ];
+
+        let response = agent_turn(
+            provider.as_ref(),
+            &mut history,
+            &tools_registry,
+            observer.as_ref(),
+            model_name,
+            temperature,
+        )
+        .await?;
        println!("{response}");

        // Auto-save assistant response to daily log
@ -184,6 +398,9 @@ pub async fn run(
            let _ = crate::channels::Channel::listen(&cli, tx).await;
        });

+        // Persistent conversation history across turns
+        let mut history = vec![ChatMessage::system(&system_prompt)];
+
        while let Some(msg) = rx.recv().await {
            // Auto-save conversation turns
            if config.memory.auto_save {
@ -200,11 +417,29 @@ pub async fn run(
                format!("{context}{}", msg.content)
            };

-            let response = provider
-                .chat_with_system(Some(&system_prompt), &enriched, model_name, temperature)
-                .await?;
+            history.push(ChatMessage::user(&enriched));
+
+            let response = match agent_turn(
+                provider.as_ref(),
+                &mut history,
+                &tools_registry,
+                observer.as_ref(),
+                model_name,
+                temperature,
+            )
+            .await
+            {
+                Ok(resp) => resp,
+                Err(e) => {
+                    eprintln!("\nError: {e}\n");
+                    continue;
+                }
+            };
            println!("\n{response}\n");

+            // Prevent unbounded history growth in long interactive sessions
+            trim_history(&mut history);
+
            if config.memory.auto_save {
                let summary = truncate_with_ellipsis(&response, 100);
                let _ = mem
@ -224,3 +459,126 @@ pub async fn run(

    Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_tool_calls_extracts_single_call() {
+        let response = r#"Let me check that.
+<tool_call>
+{"name": "shell", "arguments": {"command": "ls -la"}}
+</tool_call>"#;
+
+        let (text, calls) = parse_tool_calls(response);
+        assert_eq!(text, "Let me check that.");
+        assert_eq!(calls.len(), 1);
+        assert_eq!(calls[0].name, "shell");
+        assert_eq!(
+            calls[0].arguments.get("command").unwrap().as_str().unwrap(),
+            "ls -la"
+        );
+    }
+
+    #[test]
+    fn parse_tool_calls_extracts_multiple_calls() {
+        let response = r#"<tool_call>
+{"name": "file_read", "arguments": {"path": "a.txt"}}
+</tool_call>
+<tool_call>
+{"name": "file_read", "arguments": {"path": "b.txt"}}
+</tool_call>"#;
+
+        let (_, calls) = parse_tool_calls(response);
+        assert_eq!(calls.len(), 2);
+        assert_eq!(calls[0].name, "file_read");
+        assert_eq!(calls[1].name, "file_read");
+    }
+
+    #[test]
+    fn parse_tool_calls_returns_text_only_when_no_calls() {
+        let response = "Just a normal response with no tools.";
+        let (text, calls) = parse_tool_calls(response);
+        assert_eq!(text, "Just a normal response with no tools.");
+        assert!(calls.is_empty());
+    }
+
+    #[test]
+    fn parse_tool_calls_handles_malformed_json() {
+        let response = r#"<tool_call>
+not valid json
+</tool_call>
+Some text after."#;
+
+        let (text, calls) = parse_tool_calls(response);
+        assert!(calls.is_empty());
+        assert!(text.contains("Some text after."));
+    }
+
+    #[test]
+    fn parse_tool_calls_text_before_and_after() {
+        let response = r#"Before text.
+<tool_call>
+{"name": "shell", "arguments": {"command": "echo hi"}}
+</tool_call>
+After text."#;
+
+        let (text, calls) = parse_tool_calls(response);
+        assert!(text.contains("Before text."));
+        assert!(text.contains("After text."));
+        assert_eq!(calls.len(), 1);
+    }
+
+    #[test]
+    fn build_tool_instructions_includes_all_tools() {
+        use crate::security::SecurityPolicy;
+        let security = Arc::new(SecurityPolicy::from_config(
+            &crate::config::AutonomyConfig::default(),
+            std::path::Path::new("/tmp"),
+        ));
+        let tools = tools::default_tools(security);
+        let instructions = build_tool_instructions(&tools);
+
+        assert!(instructions.contains("## Tool Use Protocol"));
+        assert!(instructions.contains("<tool_call>"));
+        assert!(instructions.contains("shell"));
+        assert!(instructions.contains("file_read"));
+        assert!(instructions.contains("file_write"));
+    }
+
+    #[test]
+    fn trim_history_preserves_system_prompt() {
+        let mut history = vec![ChatMessage::system("system prompt")];
+        for i in 0..MAX_HISTORY_MESSAGES + 20 {
+            history.push(ChatMessage::user(format!("msg {i}")));
+        }
+        let original_len = history.len();
+        assert!(original_len > MAX_HISTORY_MESSAGES + 1);
+
+        trim_history(&mut history);
+
+        // System prompt preserved
+        assert_eq!(history[0].role, "system");
+        assert_eq!(history[0].content, "system prompt");
+        // Trimmed to limit
+        assert_eq!(history.len(), MAX_HISTORY_MESSAGES + 1); // +1 for system
+        // Most recent messages preserved
+        let last = &history[history.len() - 1];
+        assert_eq!(
+            last.content,
+            format!("msg {}", MAX_HISTORY_MESSAGES + 19)
+        );
+    }
+
+    #[test]
+    fn trim_history_noop_when_within_limit() {
+        let mut history = vec![
+            ChatMessage::system("sys"),
+            ChatMessage::user("hello"),
+            ChatMessage::assistant("hi"),
+        ];
+        trim_history(&mut history);
+        assert_eq!(history.len(), 3);
+    }
+}