zeroclaw/benches/agent_benchmarks.rs
Alex Gorevski 8724884b00
feat(ci): add Criterion performance benchmarks for hot paths (#638)
Add benchmarks using Criterion for:
- XML tool-call parsing (single and multi-call)
- Native tool-call parsing
- SQLite memory store/recall/count operations
- Full agent turn cycle (text-only and with tool call)

Add CI workflow (.github/workflows/benchmarks.yml) that:
- Runs benchmarks on push to main and on PRs
- Uploads Criterion results as artifacts
- Posts benchmark summary as PR comment for regression visibility

Ref: https://github.com/zeroclaw-labs/zeroclaw/issues/618 (item 7)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-02-17 15:15:19 -05:00

314 lines
11 KiB
Rust

//! Performance benchmarks for ZeroClaw hot paths.
//!
//! Benchmarks cover:
//! - Tool dispatch (XML parsing, native parsing)
//! - Memory store/recall cycles (SQLite backend)
//! - Agent turn cycle (full orchestration loop)
//!
//! Run: `cargo bench`
//!
//! Ref: https://github.com/zeroclaw-labs/zeroclaw/issues/618 (item 7)
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use std::sync::{Arc, Mutex};
use zeroclaw::agent::agent::Agent;
use zeroclaw::agent::dispatcher::{NativeToolDispatcher, XmlToolDispatcher, ToolDispatcher};
use zeroclaw::config::MemoryConfig;
use zeroclaw::memory;
use zeroclaw::memory::{Memory, MemoryCategory};
use zeroclaw::observability::{NoopObserver, Observer};
use zeroclaw::providers::{ChatRequest, ChatResponse, Provider, ToolCall};
use zeroclaw::tools::{Tool, ToolResult};
use anyhow::Result;
use async_trait::async_trait;
// ─────────────────────────────────────────────────────────────────────────────
// Mock infrastructure (mirrors test mocks, kept local for benchmark isolation)
// ─────────────────────────────────────────────────────────────────────────────
struct BenchProvider {
responses: Mutex<Vec<ChatResponse>>,
}
impl BenchProvider {
fn text_only(text: &str) -> Self {
Self {
responses: Mutex::new(vec![ChatResponse {
text: Some(text.into()),
tool_calls: vec![],
}]),
}
}
fn with_tool_then_text() -> Self {
Self {
responses: Mutex::new(vec![
ChatResponse {
text: Some(String::new()),
tool_calls: vec![ToolCall {
id: "tc1".into(),
name: "noop".into(),
arguments: "{}".into(),
}],
},
ChatResponse {
text: Some("done".into()),
tool_calls: vec![],
},
]),
}
}
}
#[async_trait]
impl Provider for BenchProvider {
async fn chat_with_system(
&self,
_system_prompt: Option<&str>,
_message: &str,
_model: &str,
_temperature: f64,
) -> Result<String> {
Ok("fallback".into())
}
async fn chat(
&self,
_request: ChatRequest<'_>,
_model: &str,
_temperature: f64,
) -> Result<ChatResponse> {
let mut guard = self.responses.lock().unwrap();
if guard.is_empty() {
return Ok(ChatResponse {
text: Some("done".into()),
tool_calls: vec![],
});
}
Ok(guard.remove(0))
}
}
struct NoopTool;
#[async_trait]
impl Tool for NoopTool {
fn name(&self) -> &str {
"noop"
}
fn description(&self) -> &str {
"Does nothing"
}
fn parameters_schema(&self) -> serde_json::Value {
serde_json::json!({"type": "object"})
}
async fn execute(&self, _args: serde_json::Value) -> Result<ToolResult> {
Ok(ToolResult {
success: true,
output: String::new(),
error: None,
})
}
}
fn make_memory() -> Arc<dyn Memory> {
let cfg = MemoryConfig {
backend: "none".into(),
..MemoryConfig::default()
};
Arc::from(memory::create_memory(&cfg, std::path::Path::new("/tmp"), None).unwrap())
}
fn make_sqlite_memory(dir: &std::path::Path) -> Arc<dyn Memory> {
let cfg = MemoryConfig {
backend: "sqlite".into(),
..MemoryConfig::default()
};
Arc::from(memory::create_memory(&cfg, dir, None).unwrap())
}
fn make_observer() -> Arc<dyn Observer> {
Arc::from(NoopObserver {})
}
// ─────────────────────────────────────────────────────────────────────────────
// Benchmark: XML tool-call parsing
// ─────────────────────────────────────────────────────────────────────────────
fn bench_xml_parsing(c: &mut Criterion) {
let dispatcher = XmlToolDispatcher;
let single_tool = ChatResponse {
text: Some(
r#"Here is my analysis.
<tool_call>
{"name": "search", "arguments": {"query": "zeroclaw architecture"}}
</tool_call>
Let me know if you need more."#
.into(),
),
tool_calls: vec![],
};
let multi_tool = ChatResponse {
text: Some(
r#"<tool_call>
{"name": "read_file", "arguments": {"path": "src/main.rs"}}
</tool_call>
<tool_call>
{"name": "search", "arguments": {"query": "config"}}
</tool_call>
<tool_call>
{"name": "list_dir", "arguments": {"path": "src/"}}
</tool_call>"#
.into(),
),
tool_calls: vec![],
};
c.bench_function("xml_parse_single_tool_call", |b| {
b.iter(|| dispatcher.parse_response(black_box(&single_tool)))
});
c.bench_function("xml_parse_multi_tool_call", |b| {
b.iter(|| dispatcher.parse_response(black_box(&multi_tool)))
});
}
// ─────────────────────────────────────────────────────────────────────────────
// Benchmark: Native tool-call parsing
// ─────────────────────────────────────────────────────────────────────────────
fn bench_native_parsing(c: &mut Criterion) {
let dispatcher = NativeToolDispatcher;
let response = ChatResponse {
text: Some("I'll help you.".into()),
tool_calls: vec![
ToolCall {
id: "tc1".into(),
name: "search".into(),
arguments: r#"{"query": "zeroclaw"}"#.into(),
},
ToolCall {
id: "tc2".into(),
name: "read_file".into(),
arguments: r#"{"path": "src/main.rs"}"#.into(),
},
],
};
c.bench_function("native_parse_tool_calls", |b| {
b.iter(|| dispatcher.parse_response(black_box(&response)))
});
}
// ─────────────────────────────────────────────────────────────────────────────
// Benchmark: Memory store + recall (SQLite)
// ─────────────────────────────────────────────────────────────────────────────
fn bench_memory_operations(c: &mut Criterion) {
let rt = tokio::runtime::Runtime::new().unwrap();
let tmp = tempfile::TempDir::new().unwrap();
let mem = make_sqlite_memory(tmp.path());
// Seed with entries for recall benchmarks
rt.block_on(async {
for i in 0..100 {
mem.store(
&format!("key_{i}"),
&format!("Content entry number {i} about zeroclaw agent runtime"),
MemoryCategory::Core,
None,
)
.await
.unwrap();
}
});
c.bench_function("memory_store_single", |b| {
let counter = std::sync::atomic::AtomicUsize::new(1000);
b.iter(|| {
let idx = counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
rt.block_on(async {
mem.store(
&format!("bench_key_{idx}"),
"Benchmark content for store operation",
MemoryCategory::Daily,
None,
)
.await
.unwrap();
});
});
});
c.bench_function("memory_recall_top10", |b| {
b.iter(|| {
rt.block_on(async {
mem.recall(black_box("zeroclaw agent"), 10, None)
.await
.unwrap()
})
});
});
c.bench_function("memory_count", |b| {
b.iter(|| rt.block_on(async { mem.count().await.unwrap() }));
});
}
// ─────────────────────────────────────────────────────────────────────────────
// Benchmark: Full agent turn cycle
// ─────────────────────────────────────────────────────────────────────────────
fn bench_agent_turn(c: &mut Criterion) {
let rt = tokio::runtime::Runtime::new().unwrap();
c.bench_function("agent_turn_text_only", |b| {
b.iter(|| {
rt.block_on(async {
let provider = Box::new(BenchProvider::text_only("benchmark response"));
let mut agent = Agent::builder()
.provider(provider)
.tools(vec![Box::new(NoopTool) as Box<dyn Tool>])
.memory(make_memory())
.observer(make_observer())
.tool_dispatcher(Box::new(NativeToolDispatcher))
.workspace_dir(std::path::PathBuf::from("/tmp"))
.build()
.unwrap();
agent.turn(black_box("hello")).await.unwrap()
})
});
});
c.bench_function("agent_turn_with_tool_call", |b| {
b.iter(|| {
rt.block_on(async {
let provider = Box::new(BenchProvider::with_tool_then_text());
let mut agent = Agent::builder()
.provider(provider)
.tools(vec![Box::new(NoopTool) as Box<dyn Tool>])
.memory(make_memory())
.observer(make_observer())
.tool_dispatcher(Box::new(NativeToolDispatcher))
.workspace_dir(std::path::PathBuf::from("/tmp"))
.build()
.unwrap();
agent.turn(black_box("run tool")).await.unwrap()
})
});
});
}
criterion_group!(
benches,
bench_xml_parsing,
bench_native_parsing,
bench_memory_operations,
bench_agent_turn,
);
criterion_main!(benches);