diff --git a/tests/agent_e2e.rs b/tests/agent_e2e.rs index 9ca3287..6bdfb36 100644 --- a/tests/agent_e2e.rs +++ b/tests/agent_e2e.rs @@ -13,11 +13,15 @@ use serde_json::json; use std::sync::{Arc, Mutex}; use zeroclaw::agent::agent::Agent; use zeroclaw::agent::dispatcher::{NativeToolDispatcher, XmlToolDispatcher}; +use zeroclaw::agent::memory_loader::MemoryLoader; use zeroclaw::config::MemoryConfig; use zeroclaw::memory; use zeroclaw::memory::Memory; use zeroclaw::observability::{NoopObserver, Observer}; -use zeroclaw::providers::{ChatRequest, ChatResponse, Provider, ToolCall}; +use zeroclaw::providers::traits::ChatMessage; +use zeroclaw::providers::{ + ChatRequest, ChatResponse, ConversationMessage, Provider, ProviderRuntimeOptions, ToolCall, +}; use zeroclaw::tools::{Tool, ToolResult}; // ───────────────────────────────────────────────────────────────────────────── @@ -138,6 +142,79 @@ impl Tool for CountingTool { } } +/// Mock provider that returns scripted responses AND records every request. +/// Pattern from `ScriptedProvider` in `src/agent/tests.rs`. +struct RecordingProvider { + responses: Mutex>, + recorded_requests: Arc>>>, +} + +impl RecordingProvider { + fn new(responses: Vec) -> (Self, Arc>>>) { + let recorded = Arc::new(Mutex::new(Vec::new())); + let provider = Self { + responses: Mutex::new(responses), + recorded_requests: recorded.clone(), + }; + (provider, recorded) + } +} + +#[async_trait] +impl Provider for RecordingProvider { + async fn chat_with_system( + &self, + _system_prompt: Option<&str>, + _message: &str, + _model: &str, + _temperature: f64, + ) -> Result { + Ok("fallback".into()) + } + + async fn chat( + &self, + request: ChatRequest<'_>, + _model: &str, + _temperature: f64, + ) -> Result { + self.recorded_requests + .lock() + .unwrap() + .push(request.messages.to_vec()); + + let mut guard = self.responses.lock().unwrap(); + if guard.is_empty() { + return Ok(ChatResponse { + text: Some("done".into()), + tool_calls: vec![], + }); + } + Ok(guard.remove(0)) + } +} + +/// Mock memory loader that returns a static context string, +/// simulating RAG recall without a real memory backend. +struct StaticMemoryLoader { + context: String, +} + +impl StaticMemoryLoader { + fn new(context: &str) -> Self { + Self { + context: context.to_string(), + } + } +} + +#[async_trait] +impl MemoryLoader for StaticMemoryLoader { + async fn load_context(&self, _memory: &dyn Memory, _user_message: &str) -> Result { + Ok(self.context.clone()) + } +} + // ───────────────────────────────────────────────────────────────────────────── // Test helpers // ───────────────────────────────────────────────────────────────────────────── @@ -192,6 +269,26 @@ fn build_agent_xml(provider: Box, tools: Vec>) -> Ag .unwrap() } +fn build_recording_agent( + provider: Box, + tools: Vec>, + memory_loader: Option>, +) -> Agent { + let mut builder = Agent::builder() + .provider(provider) + .tools(tools) + .memory(make_memory()) + .observer(make_observer()) + .tool_dispatcher(Box::new(NativeToolDispatcher)) + .workspace_dir(std::env::temp_dir()); + + if let Some(loader) = memory_loader { + builder = builder.memory_loader(loader); + } + + builder.build().unwrap() +} + // ═════════════════════════════════════════════════════════════════════════════ // E2E smoke tests — full agent turn cycle // ═════════════════════════════════════════════════════════════════════════════ @@ -352,3 +449,243 @@ async fn e2e_parallel_tool_dispatch() { ); assert_eq!(*count.lock().unwrap(), 2); } + +// ═════════════════════════════════════════════════════════════════════════════ +// Multi-turn history fidelity & memory enrichment tests +// ═════════════════════════════════════════════════════════════════════════════ + +/// Validates that multi-turn conversation correctly accumulates history +/// and passes growing message sequences to the provider on each turn. +#[tokio::test] +async fn e2e_multi_turn_history_fidelity() { + let (provider, recorded) = RecordingProvider::new(vec![ + text_response("response 1"), + text_response("response 2"), + text_response("response 3"), + ]); + + let mut agent = build_recording_agent(Box::new(provider), vec![], None); + + let r1 = agent.turn("msg 1").await.unwrap(); + assert_eq!(r1, "response 1"); + + let r2 = agent.turn("msg 2").await.unwrap(); + assert_eq!(r2, "response 2"); + + let r3 = agent.turn("msg 3").await.unwrap(); + assert_eq!(r3, "response 3"); + + let requests = recorded.lock().unwrap(); + assert_eq!(requests.len(), 3, "Provider should receive 3 requests"); + + // Request 1: system + user("msg 1") + let req1 = &requests[0]; + assert!(req1.len() >= 2); + assert_eq!(req1[0].role, "system"); + assert_eq!(req1[1].role, "user"); + assert!(req1[1].content.contains("msg 1")); + + // Request 2: system + user("msg 1") + assistant("response 1") + user("msg 2") + let req2 = &requests[1]; + let req2_users: Vec<&ChatMessage> = req2.iter().filter(|m| m.role == "user").collect(); + let req2_assts: Vec<&ChatMessage> = req2.iter().filter(|m| m.role == "assistant").collect(); + assert_eq!(req2_users.len(), 2, "Request 2: expected 2 user messages"); + assert_eq!( + req2_assts.len(), + 1, + "Request 2: expected 1 assistant message" + ); + assert!(req2_users[0].content.contains("msg 1")); + assert!(req2_users[1].content.contains("msg 2")); + assert_eq!(req2_assts[0].content, "response 1"); + + // Request 3: full history — 3 user + 2 assistant messages + let req3 = &requests[2]; + let req3_users: Vec<&ChatMessage> = req3.iter().filter(|m| m.role == "user").collect(); + let req3_assts: Vec<&ChatMessage> = req3.iter().filter(|m| m.role == "assistant").collect(); + assert_eq!(req3_users.len(), 3, "Request 3: expected 3 user messages"); + assert_eq!( + req3_assts.len(), + 2, + "Request 3: expected 2 assistant messages" + ); + assert!(req3_users[0].content.contains("msg 1")); + assert!(req3_users[1].content.contains("msg 2")); + assert!(req3_users[2].content.contains("msg 3")); + assert_eq!(req3_assts[0].content, "response 1"); + assert_eq!(req3_assts[1].content, "response 2"); + + // Verify agent history: system + 3*(user + assistant) = 7 + let history = agent.history(); + assert_eq!(history.len(), 7); + assert!(matches!(&history[0], ConversationMessage::Chat(c) if c.role == "system")); + assert!(matches!(&history[1], ConversationMessage::Chat(c) if c.role == "user")); + assert!(matches!(&history[2], ConversationMessage::Chat(c) if c.role == "assistant")); + assert!( + matches!(&history[6], ConversationMessage::Chat(c) if c.role == "assistant" && c.content == "response 3") + ); +} + +/// Validates that a custom MemoryLoader injects RAG context into user +/// messages before they reach the provider. +#[tokio::test] +async fn e2e_memory_enrichment_injects_context() { + let (provider, recorded) = RecordingProvider::new(vec![text_response("enriched response")]); + + let memory_context = "[Memory context]\n- user_name: test_user\n\n"; + let loader = StaticMemoryLoader::new(memory_context); + + let mut agent = build_recording_agent(Box::new(provider), vec![], Some(Box::new(loader))); + + let response = agent.turn("hello").await.unwrap(); + assert_eq!(response, "enriched response"); + + // Provider received enriched message + let requests = recorded.lock().unwrap(); + assert_eq!(requests.len(), 1); + let user_msg = requests[0].iter().find(|m| m.role == "user").unwrap(); + assert!( + user_msg.content.starts_with("[Memory context]"), + "User message should start with memory context, got: {}", + user_msg.content, + ); + assert!( + user_msg.content.contains("user_name: test_user"), + "User message should contain memory key-value pair", + ); + assert!( + user_msg.content.ends_with("hello"), + "User message should end with original text, got: {}", + user_msg.content, + ); + + // Agent history also stores enriched message + let history = agent.history(); + match &history[1] { + ConversationMessage::Chat(c) => { + assert_eq!(c.role, "user"); + assert!(c.content.starts_with("[Memory context]")); + assert!(c.content.ends_with("hello")); + } + other => panic!("Expected Chat variant for user message, got: {other:?}"), + } +} + +/// Validates multi-turn conversation with memory enrichment: every user +/// message is enriched, and the provider sees the full enriched history. +#[tokio::test] +async fn e2e_multi_turn_with_memory_enrichment() { + let (provider, recorded) = + RecordingProvider::new(vec![text_response("answer 1"), text_response("answer 2")]); + + let memory_context = "[Memory context]\n- project: zeroclaw\n\n"; + let loader = StaticMemoryLoader::new(memory_context); + + let mut agent = build_recording_agent(Box::new(provider), vec![], Some(Box::new(loader))); + + let r1 = agent.turn("first question").await.unwrap(); + assert_eq!(r1, "answer 1"); + + let r2 = agent.turn("second question").await.unwrap(); + assert_eq!(r2, "answer 2"); + + let requests = recorded.lock().unwrap(); + assert_eq!(requests.len(), 2); + + // Turn 1: user message is enriched + let req1_user = requests[0].iter().find(|m| m.role == "user").unwrap(); + assert!(req1_user.content.contains("[Memory context]")); + assert!(req1_user.content.contains("project: zeroclaw")); + assert!(req1_user.content.ends_with("first question")); + + // Turn 2: both user messages enriched, assistant from turn 1 present + let req2_users: Vec<&ChatMessage> = requests[1].iter().filter(|m| m.role == "user").collect(); + assert_eq!(req2_users.len(), 2, "Request 2 should have 2 user messages"); + + // Turn 1 user message still enriched in history + assert!(req2_users[0].content.contains("[Memory context]")); + assert!(req2_users[0].content.ends_with("first question")); + + // Turn 2 user message also enriched + assert!(req2_users[1].content.contains("[Memory context]")); + assert!(req2_users[1].content.ends_with("second question")); + + // Assistant response from turn 1 preserved + let req2_assts: Vec<&ChatMessage> = requests[1] + .iter() + .filter(|m| m.role == "assistant") + .collect(); + assert_eq!(req2_assts.len(), 1); + assert_eq!(req2_assts[0].content, "answer 1"); + + // History: system + 2*(enriched_user + assistant) = 5 + assert_eq!(agent.history().len(), 5); +} + +/// Validates that empty memory context passes user message through unmodified. +#[tokio::test] +async fn e2e_empty_memory_context_passthrough() { + let (provider, recorded) = RecordingProvider::new(vec![text_response("plain response")]); + + let loader = StaticMemoryLoader::new(""); + + let mut agent = build_recording_agent(Box::new(provider), vec![], Some(Box::new(loader))); + + let response = agent.turn("hello").await.unwrap(); + assert_eq!(response, "plain response"); + + let requests = recorded.lock().unwrap(); + let user_msg = requests[0].iter().find(|m| m.role == "user").unwrap(); + assert_eq!( + user_msg.content, "hello", + "Empty context should not prepend anything to user message", + ); +} + +// ═════════════════════════════════════════════════════════════════════════════ +// Live integration test — real OpenAI Codex API (requires credentials) +// ═════════════════════════════════════════════════════════════════════════════ + +/// Sends a real multi-turn conversation to OpenAI Codex and verifies +/// the model retains context from earlier messages. +/// +/// Requires valid OAuth credentials in `~/.zeroclaw/`. +/// Run manually: `cargo test e2e_live_openai_codex_multi_turn -- --ignored` +#[tokio::test] +#[ignore] +async fn e2e_live_openai_codex_multi_turn() { + use zeroclaw::providers::openai_codex::OpenAiCodexProvider; + use zeroclaw::providers::traits::Provider; + + let provider = OpenAiCodexProvider::new(&ProviderRuntimeOptions::default()); + let model = "gpt-5.3-codex"; + + // Turn 1: establish a fact + let messages_turn1 = vec![ + ChatMessage::system("You are a concise assistant. Reply in one short sentence."), + ChatMessage::user("The secret word is \"zephyr\". Just confirm you noted it."), + ]; + let response1 = provider + .chat_with_history(&messages_turn1, model, 0.0) + .await; + assert!(response1.is_ok(), "Turn 1 failed: {:?}", response1.err()); + let r1 = response1.unwrap(); + assert!(!r1.is_empty(), "Turn 1 returned empty response"); + + // Turn 2: ask the model to recall the fact + let messages_turn2 = vec![ + ChatMessage::system("You are a concise assistant. Reply in one short sentence."), + ChatMessage::user("The secret word is \"zephyr\". Just confirm you noted it."), + ChatMessage::assistant(&r1), + ChatMessage::user("What is the secret word?"), + ]; + let response2 = provider + .chat_with_history(&messages_turn2, model, 0.0) + .await; + assert!(response2.is_ok(), "Turn 2 failed: {:?}", response2.err()); + let r2 = response2.unwrap().to_lowercase(); + assert!( + r2.contains("zephyr"), + "Model should recall 'zephyr' from history, got: {r2}", + ); +}