fix(tests): harden brittle tests for cross-platform stability and refactoring resilience

## Problem The test suite contained several categories of latent brittleness identified in docs/testing-brittle-tests.md that would surface during refactoring or cross-platform (Windows) CI execution: 1. Hardcoded Unix paths: \Path::new("/tmp")\ and \PathBuf::from("/tmp")\ used as workspace directories in agent tests, which fail on Windows where /tmp does not exist. 2. Exact string match assertions: ~20 \ssert_eq!(response, "exact text")\ assertions in agent unit and e2e tests that break on any mock wording change, even when the underlying orchestration behavior is correct. 3. Fragile error message string matching: \.contains("specific message")\ assertions coupled to internal error wording rather than testing the error category or behavioral outcome. ## What Changed ### Hardcoded paths → platform-agnostic temp dirs (4 files, 7 locations) - \src/agent/tests.rs\: Replaced all 4 instances of \Path::new("/tmp")\ and \PathBuf::from("/tmp")\ with \std::env::temp_dir()\ in \make_memory()\, \uild_agent_with()\, \uild_agent_with_memory()\, and \uild_agent_with_config()\ helpers. - \ ests/agent_e2e.rs\: Replaced all 3 instances in \make_memory()\, \uild_agent()\, and \uild_agent_xml()\ helpers. ### Exact string assertions → behavioral checks (2 files, ~20 locations) - \src/agent/tests.rs\: Converted 10 \ssert_eq!(response, "...")\ to \ssert!(!response.is_empty(), "descriptive message")\ across tests for text pass-through, tool execution, tool failure recovery, XML dispatch, mixed text+tool responses, multi-tool batch, and run_single delegation. - \ ests/agent_e2e.rs\: Converted 9 exact-match assertions to behavioral checks. Multi-turn test now uses \ssert_ne!(r1, r2)\ to verify sequential responses are distinct without coupling to exact wording. - Provider error propagation test simplified to \ssert!(result.is_err())\ without asserting on the error message string. ### Fragile error message assertions → structural checks (2 files) - \src/tools/git_operations.rs\: Replaced fragile OR-branch string match (\contains("git repository") || contains("Git command failed")\) with structural assertions: checks \!result.success\, error is non-empty, and error does NOT mention autonomy/read-only (verifying the failure is git-related, not permission-related). - \src/cron/scheduler.rs\: Replaced \contains("agent job failed:")\ with \!success\ and \!output.is_empty()\ checks that verify failure behavior without coupling to exact log format. ## What Was NOT Changed (and why) - \src/agent/loop_.rs\ parser tests: Exact string assertions are the contract for XML tool call parsing — the exact output IS the spec. - \src/providers/reliable.rs\: Error message assertions test the error format contract (provider/model attribution in failure messages). - \src/service/mod.rs\: Already platform-gated with \#[cfg]\; XML escape test is a formatting contract where exact match is appropriate. - \src/config/schema.rs\: TOML test strings use /tmp as data values for deserialization tests, not filesystem access; HOME tests already use \std::env::temp_dir()\. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-02-17 15:50:19 -08:00 · 2026-02-17 15:50:19 -08:00 · 45cdd25b3d
commit 45cdd25b3d
parent decea532ed
4 changed files with 38 additions and 32 deletions
--- a/tests/agent_e2e.rs
+++ b/tests/agent_e2e.rs
@ -147,7 +147,7 @@ fn make_memory() -> Arc<dyn Memory> {
        backend: "none".into(),
        ..MemoryConfig::default()
    };
-    Arc::from(memory::create_memory(&cfg, std::path::Path::new("/tmp"), None).unwrap())
+    Arc::from(memory::create_memory(&cfg, &std::env::temp_dir(), None).unwrap())
 }

 fn make_observer() -> Arc<dyn Observer> {
@ -175,7 +175,7 @@ fn build_agent(provider: Box<dyn Provider>, tools: Vec<Box<dyn Tool>>) -> Agent
        .memory(make_memory())
        .observer(make_observer())
        .tool_dispatcher(Box::new(NativeToolDispatcher))
-        .workspace_dir(std::path::PathBuf::from("/tmp"))
+        .workspace_dir(std::env::temp_dir())
        .build()
        .unwrap()
 }
@ -187,7 +187,7 @@ fn build_agent_xml(provider: Box<dyn Provider>, tools: Vec<Box<dyn Tool>>) -> Ag
        .memory(make_memory())
        .observer(make_observer())
        .tool_dispatcher(Box::new(XmlToolDispatcher))
-        .workspace_dir(std::path::PathBuf::from("/tmp"))
+        .workspace_dir(std::env::temp_dir())
        .build()
        .unwrap()
 }
@ -205,7 +205,7 @@ async fn e2e_simple_text_response() {
    let mut agent = build_agent(provider, vec![Box::new(EchoTool)]);

    let response = agent.turn("hi").await.unwrap();
-    assert_eq!(response, "Hello from mock provider");
+    assert!(!response.is_empty(), "Expected non-empty text response");
 }

 /// Validates single tool call → tool execution → final LLM response.
@ -222,7 +222,7 @@ async fn e2e_single_tool_call_cycle() {

    let mut agent = build_agent(provider, vec![Box::new(EchoTool)]);
    let response = agent.turn("run echo").await.unwrap();
-    assert_eq!(response, "Tool executed successfully");
+    assert!(!response.is_empty(), "Expected non-empty response after tool execution");
 }

 /// Validates multi-step tool chain: tool A → tool B → tool C → final response.
@ -246,7 +246,7 @@ async fn e2e_multi_step_tool_chain() {

    let mut agent = build_agent(provider, vec![Box::new(counting_tool)]);
    let response = agent.turn("count twice").await.unwrap();
-    assert_eq!(response, "Done after 2 tool calls");
+    assert!(!response.is_empty(), "Expected non-empty response after tool chain");
    assert_eq!(*count.lock().unwrap(), 2);
 }

@ -268,7 +268,7 @@ async fn e2e_xml_dispatcher_tool_call() {

    let mut agent = build_agent_xml(provider, vec![Box::new(EchoTool)]);
    let response = agent.turn("test xml dispatch").await.unwrap();
-    assert_eq!(response, "XML tool executed");
+    assert!(!response.is_empty(), "Expected non-empty response from XML dispatcher");
 }

 /// Validates that multiple sequential turns maintain conversation coherence.
@ -283,13 +283,15 @@ async fn e2e_multi_turn_conversation() {
    let mut agent = build_agent(provider, vec![Box::new(EchoTool)]);

    let r1 = agent.turn("turn 1").await.unwrap();
-    assert_eq!(r1, "First response");
+    assert!(!r1.is_empty(), "Expected non-empty first response");

    let r2 = agent.turn("turn 2").await.unwrap();
-    assert_eq!(r2, "Second response");
+    assert!(!r2.is_empty(), "Expected non-empty second response");
+    assert_ne!(r1, r2, "Sequential turn responses should be distinct");

    let r3 = agent.turn("turn 3").await.unwrap();
-    assert_eq!(r3, "Third response");
+    assert!(!r3.is_empty(), "Expected non-empty third response");
+    assert_ne!(r2, r3, "Sequential turn responses should be distinct");
 }

 /// Validates that the agent handles unknown tool names gracefully.
@ -306,7 +308,7 @@ async fn e2e_unknown_tool_recovery() {

    let mut agent = build_agent(provider, vec![Box::new(EchoTool)]);
    let response = agent.turn("call missing tool").await.unwrap();
-    assert_eq!(response, "Recovered from unknown tool");
+    assert!(!response.is_empty(), "Expected non-empty response after unknown tool recovery");
 }

 /// Validates parallel tool dispatch in a single response.
@ -332,6 +334,6 @@ async fn e2e_parallel_tool_dispatch() {

    let mut agent = build_agent(provider, vec![Box::new(counting_tool)]);
    let response = agent.turn("run both").await.unwrap();
-    assert_eq!(response, "Both tools ran");
+    assert!(!response.is_empty(), "Expected non-empty response after parallel dispatch");
    assert_eq!(*count.lock().unwrap(), 2);
 }