feat: add multi-turn conversation history and tool execution
* feat: add multi-turn conversation history and tool execution Major enhancement to the agent loop: **Multi-turn conversation:** - Add `ChatMessage` type with system/user/assistant constructors - Add `chat_with_history` method to Provider trait (default impl delegates to `chat_with_system` for backward compatibility) - Implement native `chat_with_history` on OpenRouter, Compatible, Reliable, and Router providers to send full message history - Interactive mode now maintains persistent history across turns **Tool execution:** - Agent loop now parses `<tool_call>` XML tags from LLM responses - Executes tools from the registry and feeds results back as `<tool_result>` messages - Agentic loop continues until LLM produces final text (no tool calls) - MAX_TOOL_ITERATIONS (10) safety limit prevents runaway loops - System prompt includes structured tool-use protocol with JSON schemas **Types:** - `ChatMessage`, `ChatResponse`, `ToolCall`, `ToolResultMessage`, `ConversationMessage` — full conversation modeling types Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address review comments on multi-turn + tool execution - Add history sliding window (MAX_HISTORY_MESSAGES=50) to prevent unbounded conversation history growth in interactive mode - Add 404→Responses API fallback in compatible.rs chat_with_history, matching chat_with_system behavior - Use super::api_error() for error sanitization in compatible.rs instead of raw error body (prevents secret leakage) - Add missing operational logs in reliable.rs chat_with_history: recovery, non-retryable, fallback switch warnings - Add trim_history tests Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address second round of review comments - Sanitize raw error text in compatible.rs chat_with_system using sanitize_api_error (prevents leaking secrets in error messages) - Add chat_with_history to MockProvider in reliable.rs tests so the retry/fallback path is exercised end-to-end - Add chat_with_history_retries_then_recovers and chat_with_history_falls_back tests - Log warning on malformed <tool_call> JSON instead of silent drop - Flush stdout after print! in agent_turn so output appears before tool execution on line-buffered terminals - Make interactive mode resilient to transient errors (continue loop instead of terminating session) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
92c42dc24d
commit
89b1ec6fa2
7 changed files with 829 additions and 21 deletions
|
|
@ -1,16 +1,44 @@
|
|||
use crate::config::Config;
|
||||
use crate::memory::{self, Memory, MemoryCategory};
|
||||
use crate::observability::{self, Observer, ObserverEvent};
|
||||
use crate::providers::{self, Provider};
|
||||
use crate::providers::{self, ChatMessage, Provider};
|
||||
use crate::runtime;
|
||||
use crate::security::SecurityPolicy;
|
||||
use crate::tools;
|
||||
use crate::tools::{self, Tool};
|
||||
use crate::util::truncate_with_ellipsis;
|
||||
use anyhow::Result;
|
||||
use std::fmt::Write;
|
||||
use std::io::Write as IoWrite;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
/// Maximum agentic tool-use iterations per user message to prevent runaway loops.
|
||||
const MAX_TOOL_ITERATIONS: usize = 10;
|
||||
|
||||
/// Maximum number of non-system messages to keep in history.
|
||||
/// When exceeded, the oldest messages are dropped (system prompt is always preserved).
|
||||
const MAX_HISTORY_MESSAGES: usize = 50;
|
||||
|
||||
/// Trim conversation history to prevent unbounded growth.
|
||||
/// Preserves the system prompt (first message if role=system) and the most recent messages.
|
||||
fn trim_history(history: &mut Vec<ChatMessage>) {
|
||||
// Nothing to trim if within limit
|
||||
let has_system = history.first().map_or(false, |m| m.role == "system");
|
||||
let non_system_count = if has_system {
|
||||
history.len() - 1
|
||||
} else {
|
||||
history.len()
|
||||
};
|
||||
|
||||
if non_system_count <= MAX_HISTORY_MESSAGES {
|
||||
return;
|
||||
}
|
||||
|
||||
let start = if has_system { 1 } else { 0 };
|
||||
let to_remove = non_system_count - MAX_HISTORY_MESSAGES;
|
||||
history.drain(start..start + to_remove);
|
||||
}
|
||||
|
||||
/// Build context preamble by searching memory for relevant entries
|
||||
async fn build_context(mem: &dyn Memory, user_msg: &str) -> String {
|
||||
let mut context = String::new();
|
||||
|
|
@ -29,6 +57,178 @@ async fn build_context(mem: &dyn Memory, user_msg: &str) -> String {
|
|||
context
|
||||
}
|
||||
|
||||
/// Find a tool by name in the registry.
|
||||
fn find_tool<'a>(tools: &'a [Box<dyn Tool>], name: &str) -> Option<&'a dyn Tool> {
|
||||
tools.iter().find(|t| t.name() == name).map(|t| t.as_ref())
|
||||
}
|
||||
|
||||
/// Parse tool calls from an LLM response that uses XML-style function calling.
|
||||
///
|
||||
/// Expected format (common with system-prompt-guided tool use):
|
||||
/// ```text
|
||||
/// <tool_call>
|
||||
/// {"name": "shell", "arguments": {"command": "ls"}}
|
||||
/// </tool_call>
|
||||
/// ```
|
||||
///
|
||||
/// Also supports JSON with `tool_calls` array from OpenAI-format responses.
|
||||
fn parse_tool_calls(response: &str) -> (String, Vec<ParsedToolCall>) {
|
||||
let mut text_parts = Vec::new();
|
||||
let mut calls = Vec::new();
|
||||
let mut remaining = response;
|
||||
|
||||
while let Some(start) = remaining.find("<tool_call>") {
|
||||
// Everything before the tag is text
|
||||
let before = &remaining[..start];
|
||||
if !before.trim().is_empty() {
|
||||
text_parts.push(before.trim().to_string());
|
||||
}
|
||||
|
||||
if let Some(end) = remaining[start..].find("</tool_call>") {
|
||||
let inner = &remaining[start + 11..start + end];
|
||||
match serde_json::from_str::<serde_json::Value>(inner.trim()) {
|
||||
Ok(parsed) => {
|
||||
let name = parsed
|
||||
.get("name")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let arguments = parsed
|
||||
.get("arguments")
|
||||
.cloned()
|
||||
.unwrap_or(serde_json::Value::Object(serde_json::Map::new()));
|
||||
calls.push(ParsedToolCall { name, arguments });
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Malformed <tool_call> JSON: {e}");
|
||||
}
|
||||
}
|
||||
remaining = &remaining[start + end + 12..];
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Remaining text after last tool call
|
||||
if !remaining.trim().is_empty() {
|
||||
text_parts.push(remaining.trim().to_string());
|
||||
}
|
||||
|
||||
(text_parts.join("\n"), calls)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ParsedToolCall {
|
||||
name: String,
|
||||
arguments: serde_json::Value,
|
||||
}
|
||||
|
||||
/// Execute a single turn of the agent loop: send messages, parse tool calls,
|
||||
/// execute tools, and loop until the LLM produces a final text response.
|
||||
async fn agent_turn(
|
||||
provider: &dyn Provider,
|
||||
history: &mut Vec<ChatMessage>,
|
||||
tools_registry: &[Box<dyn Tool>],
|
||||
observer: &dyn Observer,
|
||||
model: &str,
|
||||
temperature: f64,
|
||||
) -> Result<String> {
|
||||
for _iteration in 0..MAX_TOOL_ITERATIONS {
|
||||
let response = provider
|
||||
.chat_with_history(history, model, temperature)
|
||||
.await?;
|
||||
|
||||
let (text, tool_calls) = parse_tool_calls(&response);
|
||||
|
||||
if tool_calls.is_empty() {
|
||||
// No tool calls — this is the final response
|
||||
history.push(ChatMessage::assistant(&response));
|
||||
return Ok(if text.is_empty() {
|
||||
response
|
||||
} else {
|
||||
text
|
||||
});
|
||||
}
|
||||
|
||||
// Print any text the LLM produced alongside tool calls
|
||||
if !text.is_empty() {
|
||||
print!("{text}");
|
||||
let _ = std::io::stdout().flush();
|
||||
}
|
||||
|
||||
// Execute each tool call and build results
|
||||
let mut tool_results = String::new();
|
||||
for call in &tool_calls {
|
||||
let start = Instant::now();
|
||||
let result = if let Some(tool) = find_tool(tools_registry, &call.name) {
|
||||
match tool.execute(call.arguments.clone()).await {
|
||||
Ok(r) => {
|
||||
observer.record_event(&ObserverEvent::ToolCall {
|
||||
tool: call.name.clone(),
|
||||
duration: start.elapsed(),
|
||||
success: r.success,
|
||||
});
|
||||
if r.success {
|
||||
r.output
|
||||
} else {
|
||||
format!("Error: {}", r.error.unwrap_or_else(|| r.output))
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
observer.record_event(&ObserverEvent::ToolCall {
|
||||
tool: call.name.clone(),
|
||||
duration: start.elapsed(),
|
||||
success: false,
|
||||
});
|
||||
format!("Error executing {}: {e}", call.name)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
format!("Unknown tool: {}", call.name)
|
||||
};
|
||||
|
||||
let _ = writeln!(
|
||||
tool_results,
|
||||
"<tool_result name=\"{}\">\n{}\n</tool_result>",
|
||||
call.name, result
|
||||
);
|
||||
}
|
||||
|
||||
// Add assistant message with tool calls + tool results to history
|
||||
history.push(ChatMessage::assistant(&response));
|
||||
history.push(ChatMessage::user(format!(
|
||||
"[Tool results]\n{tool_results}"
|
||||
)));
|
||||
}
|
||||
|
||||
anyhow::bail!("Agent exceeded maximum tool iterations ({MAX_TOOL_ITERATIONS})")
|
||||
}
|
||||
|
||||
/// Build the tool instruction block for the system prompt so the LLM knows
|
||||
/// how to invoke tools.
|
||||
fn build_tool_instructions(tools_registry: &[Box<dyn Tool>]) -> String {
|
||||
let mut instructions = String::new();
|
||||
instructions.push_str("\n## Tool Use Protocol\n\n");
|
||||
instructions.push_str("To use a tool, wrap a JSON object in <tool_call></tool_call> tags:\n\n");
|
||||
instructions.push_str("```\n<tool_call>\n{\"name\": \"tool_name\", \"arguments\": {\"param\": \"value\"}}\n</tool_call>\n```\n\n");
|
||||
instructions.push_str("You may use multiple tool calls in a single response. ");
|
||||
instructions.push_str("After tool execution, results appear in <tool_result> tags. ");
|
||||
instructions.push_str("Continue reasoning with the results until you can give a final answer.\n\n");
|
||||
instructions.push_str("### Available Tools\n\n");
|
||||
|
||||
for tool in tools_registry {
|
||||
let _ = writeln!(
|
||||
instructions,
|
||||
"**{}**: {}\nParameters: `{}`\n",
|
||||
tool.name(),
|
||||
tool.description(),
|
||||
tool.parameters_schema()
|
||||
);
|
||||
}
|
||||
|
||||
instructions
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_lines)]
|
||||
pub async fn run(
|
||||
config: Config,
|
||||
|
|
@ -61,7 +261,7 @@ pub async fn run(
|
|||
} else {
|
||||
None
|
||||
};
|
||||
let _tools = tools::all_tools_with_runtime(
|
||||
let tools_registry = tools::all_tools_with_runtime(
|
||||
&security,
|
||||
runtime,
|
||||
mem.clone(),
|
||||
|
|
@ -133,7 +333,7 @@ pub async fn run(
|
|||
"Execute actions on 1000+ apps via Composio (Gmail, Notion, GitHub, Slack, etc.). Use action='list' to discover, 'execute' to run, 'connect' to OAuth.",
|
||||
));
|
||||
}
|
||||
let system_prompt = crate::channels::build_system_prompt(
|
||||
let mut system_prompt = crate::channels::build_system_prompt(
|
||||
&config.workspace_dir,
|
||||
model_name,
|
||||
&tool_descs,
|
||||
|
|
@ -141,6 +341,9 @@ pub async fn run(
|
|||
Some(&config.identity),
|
||||
);
|
||||
|
||||
// Append structured tool-use instructions with schemas
|
||||
system_prompt.push_str(&build_tool_instructions(&tools_registry));
|
||||
|
||||
// ── Execute ──────────────────────────────────────────────────
|
||||
let start = Instant::now();
|
||||
|
||||
|
|
@ -160,9 +363,20 @@ pub async fn run(
|
|||
format!("{context}{msg}")
|
||||
};
|
||||
|
||||
let response = provider
|
||||
.chat_with_system(Some(&system_prompt), &enriched, model_name, temperature)
|
||||
.await?;
|
||||
let mut history = vec![
|
||||
ChatMessage::system(&system_prompt),
|
||||
ChatMessage::user(&enriched),
|
||||
];
|
||||
|
||||
let response = agent_turn(
|
||||
provider.as_ref(),
|
||||
&mut history,
|
||||
&tools_registry,
|
||||
observer.as_ref(),
|
||||
model_name,
|
||||
temperature,
|
||||
)
|
||||
.await?;
|
||||
println!("{response}");
|
||||
|
||||
// Auto-save assistant response to daily log
|
||||
|
|
@ -184,6 +398,9 @@ pub async fn run(
|
|||
let _ = crate::channels::Channel::listen(&cli, tx).await;
|
||||
});
|
||||
|
||||
// Persistent conversation history across turns
|
||||
let mut history = vec![ChatMessage::system(&system_prompt)];
|
||||
|
||||
while let Some(msg) = rx.recv().await {
|
||||
// Auto-save conversation turns
|
||||
if config.memory.auto_save {
|
||||
|
|
@ -200,11 +417,29 @@ pub async fn run(
|
|||
format!("{context}{}", msg.content)
|
||||
};
|
||||
|
||||
let response = provider
|
||||
.chat_with_system(Some(&system_prompt), &enriched, model_name, temperature)
|
||||
.await?;
|
||||
history.push(ChatMessage::user(&enriched));
|
||||
|
||||
let response = match agent_turn(
|
||||
provider.as_ref(),
|
||||
&mut history,
|
||||
&tools_registry,
|
||||
observer.as_ref(),
|
||||
model_name,
|
||||
temperature,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(resp) => resp,
|
||||
Err(e) => {
|
||||
eprintln!("\nError: {e}\n");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
println!("\n{response}\n");
|
||||
|
||||
// Prevent unbounded history growth in long interactive sessions
|
||||
trim_history(&mut history);
|
||||
|
||||
if config.memory.auto_save {
|
||||
let summary = truncate_with_ellipsis(&response, 100);
|
||||
let _ = mem
|
||||
|
|
@ -224,3 +459,126 @@ pub async fn run(
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_tool_calls_extracts_single_call() {
|
||||
let response = r#"Let me check that.
|
||||
<tool_call>
|
||||
{"name": "shell", "arguments": {"command": "ls -la"}}
|
||||
</tool_call>"#;
|
||||
|
||||
let (text, calls) = parse_tool_calls(response);
|
||||
assert_eq!(text, "Let me check that.");
|
||||
assert_eq!(calls.len(), 1);
|
||||
assert_eq!(calls[0].name, "shell");
|
||||
assert_eq!(
|
||||
calls[0].arguments.get("command").unwrap().as_str().unwrap(),
|
||||
"ls -la"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_calls_extracts_multiple_calls() {
|
||||
let response = r#"<tool_call>
|
||||
{"name": "file_read", "arguments": {"path": "a.txt"}}
|
||||
</tool_call>
|
||||
<tool_call>
|
||||
{"name": "file_read", "arguments": {"path": "b.txt"}}
|
||||
</tool_call>"#;
|
||||
|
||||
let (_, calls) = parse_tool_calls(response);
|
||||
assert_eq!(calls.len(), 2);
|
||||
assert_eq!(calls[0].name, "file_read");
|
||||
assert_eq!(calls[1].name, "file_read");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_calls_returns_text_only_when_no_calls() {
|
||||
let response = "Just a normal response with no tools.";
|
||||
let (text, calls) = parse_tool_calls(response);
|
||||
assert_eq!(text, "Just a normal response with no tools.");
|
||||
assert!(calls.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_calls_handles_malformed_json() {
|
||||
let response = r#"<tool_call>
|
||||
not valid json
|
||||
</tool_call>
|
||||
Some text after."#;
|
||||
|
||||
let (text, calls) = parse_tool_calls(response);
|
||||
assert!(calls.is_empty());
|
||||
assert!(text.contains("Some text after."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_calls_text_before_and_after() {
|
||||
let response = r#"Before text.
|
||||
<tool_call>
|
||||
{"name": "shell", "arguments": {"command": "echo hi"}}
|
||||
</tool_call>
|
||||
After text."#;
|
||||
|
||||
let (text, calls) = parse_tool_calls(response);
|
||||
assert!(text.contains("Before text."));
|
||||
assert!(text.contains("After text."));
|
||||
assert_eq!(calls.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_tool_instructions_includes_all_tools() {
|
||||
use crate::security::SecurityPolicy;
|
||||
let security = Arc::new(SecurityPolicy::from_config(
|
||||
&crate::config::AutonomyConfig::default(),
|
||||
std::path::Path::new("/tmp"),
|
||||
));
|
||||
let tools = tools::default_tools(security);
|
||||
let instructions = build_tool_instructions(&tools);
|
||||
|
||||
assert!(instructions.contains("## Tool Use Protocol"));
|
||||
assert!(instructions.contains("<tool_call>"));
|
||||
assert!(instructions.contains("shell"));
|
||||
assert!(instructions.contains("file_read"));
|
||||
assert!(instructions.contains("file_write"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_history_preserves_system_prompt() {
|
||||
let mut history = vec![ChatMessage::system("system prompt")];
|
||||
for i in 0..MAX_HISTORY_MESSAGES + 20 {
|
||||
history.push(ChatMessage::user(format!("msg {i}")));
|
||||
}
|
||||
let original_len = history.len();
|
||||
assert!(original_len > MAX_HISTORY_MESSAGES + 1);
|
||||
|
||||
trim_history(&mut history);
|
||||
|
||||
// System prompt preserved
|
||||
assert_eq!(history[0].role, "system");
|
||||
assert_eq!(history[0].content, "system prompt");
|
||||
// Trimmed to limit
|
||||
assert_eq!(history.len(), MAX_HISTORY_MESSAGES + 1); // +1 for system
|
||||
// Most recent messages preserved
|
||||
let last = &history[history.len() - 1];
|
||||
assert_eq!(
|
||||
last.content,
|
||||
format!("msg {}", MAX_HISTORY_MESSAGES + 19)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_history_noop_when_within_limit() {
|
||||
let mut history = vec![
|
||||
ChatMessage::system("sys"),
|
||||
ChatMessage::user("hello"),
|
||||
ChatMessage::assistant("hi"),
|
||||
];
|
||||
trim_history(&mut history);
|
||||
assert_eq!(history.len(), 3);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue