From 8724884b00daef317c2c088aa451c0b2d2d2a577 Mon Sep 17 00:00:00 2001 From: Alex Gorevski Date: Tue, 17 Feb 2026 12:15:19 -0800 Subject: [PATCH] feat(ci): add Criterion performance benchmarks for hot paths (#638) Add benchmarks using Criterion for: - XML tool-call parsing (single and multi-call) - Native tool-call parsing - SQLite memory store/recall/count operations - Full agent turn cycle (text-only and with tool call) Add CI workflow (.github/workflows/benchmarks.yml) that: - Runs benchmarks on push to main and on PRs - Uploads Criterion results as artifacts - Posts benchmark summary as PR comment for regression visibility Ref: https://github.com/zeroclaw-labs/zeroclaw/issues/618 (item 7) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/benchmarks.yml | 103 ++++++++++ Cargo.lock | 230 +++++++++++++++++++++- Cargo.toml | 5 + benches/agent_benchmarks.rs | 314 +++++++++++++++++++++++++++++++ 4 files changed, 650 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/benchmarks.yml create mode 100644 benches/agent_benchmarks.rs diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 0000000..4380ba8 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,103 @@ +name: Performance Benchmarks + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +concurrency: + group: bench-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: write + +env: + CARGO_TERM_COLOR: always + +jobs: + benchmarks: + name: Criterion Benchmarks + runs-on: blacksmith-2vcpu-ubuntu-2404 + timeout-minutes: 30 + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable + with: + toolchain: 1.92.0 + - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2 + + - name: Run benchmarks + run: cargo bench --locked 2>&1 | tee benchmark_output.txt + + - name: Upload benchmark results + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: benchmark-results + path: | + target/criterion/ + benchmark_output.txt + retention-days: 30 + + - name: Post benchmark summary on PR + if: github.event_name == 'pull_request' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const fs = require('fs'); + const output = fs.readFileSync('benchmark_output.txt', 'utf8'); + + // Extract Criterion result lines + const lines = output.split('\n').filter(l => + l.includes('time:') || l.includes('change:') || l.includes('Performance') + ); + + if (lines.length === 0) { + core.info('No benchmark results to post.'); + return; + } + + const body = [ + '## 📊 Benchmark Results', + '', + '```', + lines.join('\n'), + '```', + '', + '
Full output', + '', + '```', + output.substring(0, 60000), + '```', + '
', + ].join('\n'); + + // Find and update or create comment + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + }); + + const marker = '## 📊 Benchmark Results'; + const existing = comments.find(c => c.body && c.body.startsWith(marker)); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body, + }); + } diff --git a/Cargo.lock b/Cargo.lock index b6b9ff9..61289ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -74,6 +74,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.21" @@ -390,6 +396,12 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cbc" version = "0.1.2" @@ -485,6 +497,33 @@ dependencies = [ "stacker", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -670,6 +709,44 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "futures", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "tokio", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "cron" version = "0.12.1" @@ -681,12 +758,37 @@ dependencies = [ "once_cell", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -1300,6 +1402,17 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hash32" version = "0.3.1" @@ -1823,12 +1936,32 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi 0.5.2", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -2348,6 +2481,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "opaque-debug" version = "0.3.1" @@ -2553,6 +2692,34 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "polling" version = "3.11.0" @@ -2643,7 +2810,7 @@ dependencies = [ "futures-lite", "hidapi", "ihex", - "itertools", + "itertools 0.14.0", "jep106", "nusb 0.1.14", "object 0.37.3", @@ -2723,7 +2890,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools", + "itertools 0.14.0", "proc-macro2", "quote", "syn", @@ -2886,6 +3053,26 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68" +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -3159,6 +3346,15 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.28" @@ -3659,6 +3855,16 @@ dependencies = [ "zerovec 0.11.5", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.10.0" @@ -4210,6 +4416,16 @@ version = "0.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -4429,6 +4645,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -4903,6 +5128,7 @@ dependencies = [ "chrono-tz", "clap", "console 0.15.11", + "criterion", "cron", "dialoguer", "directories", diff --git a/Cargo.toml b/Cargo.toml index 81a22b7..e10b2ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -171,3 +171,8 @@ panic = "abort" [dev-dependencies] tokio-test = "0.4" tempfile = "3.14" +criterion = { version = "0.5", features = ["async_tokio"] } + +[[bench]] +name = "agent_benchmarks" +harness = false diff --git a/benches/agent_benchmarks.rs b/benches/agent_benchmarks.rs new file mode 100644 index 0000000..4a6c676 --- /dev/null +++ b/benches/agent_benchmarks.rs @@ -0,0 +1,314 @@ +//! Performance benchmarks for ZeroClaw hot paths. +//! +//! Benchmarks cover: +//! - Tool dispatch (XML parsing, native parsing) +//! - Memory store/recall cycles (SQLite backend) +//! - Agent turn cycle (full orchestration loop) +//! +//! Run: `cargo bench` +//! +//! Ref: https://github.com/zeroclaw-labs/zeroclaw/issues/618 (item 7) + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use std::sync::{Arc, Mutex}; + +use zeroclaw::agent::agent::Agent; +use zeroclaw::agent::dispatcher::{NativeToolDispatcher, XmlToolDispatcher, ToolDispatcher}; +use zeroclaw::config::MemoryConfig; +use zeroclaw::memory; +use zeroclaw::memory::{Memory, MemoryCategory}; +use zeroclaw::observability::{NoopObserver, Observer}; +use zeroclaw::providers::{ChatRequest, ChatResponse, Provider, ToolCall}; +use zeroclaw::tools::{Tool, ToolResult}; + +use anyhow::Result; +use async_trait::async_trait; + +// ───────────────────────────────────────────────────────────────────────────── +// Mock infrastructure (mirrors test mocks, kept local for benchmark isolation) +// ───────────────────────────────────────────────────────────────────────────── + +struct BenchProvider { + responses: Mutex>, +} + +impl BenchProvider { + fn text_only(text: &str) -> Self { + Self { + responses: Mutex::new(vec![ChatResponse { + text: Some(text.into()), + tool_calls: vec![], + }]), + } + } + + fn with_tool_then_text() -> Self { + Self { + responses: Mutex::new(vec![ + ChatResponse { + text: Some(String::new()), + tool_calls: vec![ToolCall { + id: "tc1".into(), + name: "noop".into(), + arguments: "{}".into(), + }], + }, + ChatResponse { + text: Some("done".into()), + tool_calls: vec![], + }, + ]), + } + } +} + +#[async_trait] +impl Provider for BenchProvider { + async fn chat_with_system( + &self, + _system_prompt: Option<&str>, + _message: &str, + _model: &str, + _temperature: f64, + ) -> Result { + Ok("fallback".into()) + } + + async fn chat( + &self, + _request: ChatRequest<'_>, + _model: &str, + _temperature: f64, + ) -> Result { + let mut guard = self.responses.lock().unwrap(); + if guard.is_empty() { + return Ok(ChatResponse { + text: Some("done".into()), + tool_calls: vec![], + }); + } + Ok(guard.remove(0)) + } +} + +struct NoopTool; + +#[async_trait] +impl Tool for NoopTool { + fn name(&self) -> &str { + "noop" + } + fn description(&self) -> &str { + "Does nothing" + } + fn parameters_schema(&self) -> serde_json::Value { + serde_json::json!({"type": "object"}) + } + async fn execute(&self, _args: serde_json::Value) -> Result { + Ok(ToolResult { + success: true, + output: String::new(), + error: None, + }) + } +} + +fn make_memory() -> Arc { + let cfg = MemoryConfig { + backend: "none".into(), + ..MemoryConfig::default() + }; + Arc::from(memory::create_memory(&cfg, std::path::Path::new("/tmp"), None).unwrap()) +} + +fn make_sqlite_memory(dir: &std::path::Path) -> Arc { + let cfg = MemoryConfig { + backend: "sqlite".into(), + ..MemoryConfig::default() + }; + Arc::from(memory::create_memory(&cfg, dir, None).unwrap()) +} + +fn make_observer() -> Arc { + Arc::from(NoopObserver {}) +} + +// ───────────────────────────────────────────────────────────────────────────── +// Benchmark: XML tool-call parsing +// ───────────────────────────────────────────────────────────────────────────── + +fn bench_xml_parsing(c: &mut Criterion) { + let dispatcher = XmlToolDispatcher; + + let single_tool = ChatResponse { + text: Some( + r#"Here is my analysis. + +{"name": "search", "arguments": {"query": "zeroclaw architecture"}} + +Let me know if you need more."# + .into(), + ), + tool_calls: vec![], + }; + + let multi_tool = ChatResponse { + text: Some( + r#" +{"name": "read_file", "arguments": {"path": "src/main.rs"}} + + +{"name": "search", "arguments": {"query": "config"}} + + +{"name": "list_dir", "arguments": {"path": "src/"}} +"# + .into(), + ), + tool_calls: vec![], + }; + + c.bench_function("xml_parse_single_tool_call", |b| { + b.iter(|| dispatcher.parse_response(black_box(&single_tool))) + }); + + c.bench_function("xml_parse_multi_tool_call", |b| { + b.iter(|| dispatcher.parse_response(black_box(&multi_tool))) + }); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Benchmark: Native tool-call parsing +// ───────────────────────────────────────────────────────────────────────────── + +fn bench_native_parsing(c: &mut Criterion) { + let dispatcher = NativeToolDispatcher; + + let response = ChatResponse { + text: Some("I'll help you.".into()), + tool_calls: vec![ + ToolCall { + id: "tc1".into(), + name: "search".into(), + arguments: r#"{"query": "zeroclaw"}"#.into(), + }, + ToolCall { + id: "tc2".into(), + name: "read_file".into(), + arguments: r#"{"path": "src/main.rs"}"#.into(), + }, + ], + }; + + c.bench_function("native_parse_tool_calls", |b| { + b.iter(|| dispatcher.parse_response(black_box(&response))) + }); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Benchmark: Memory store + recall (SQLite) +// ───────────────────────────────────────────────────────────────────────────── + +fn bench_memory_operations(c: &mut Criterion) { + let rt = tokio::runtime::Runtime::new().unwrap(); + let tmp = tempfile::TempDir::new().unwrap(); + let mem = make_sqlite_memory(tmp.path()); + + // Seed with entries for recall benchmarks + rt.block_on(async { + for i in 0..100 { + mem.store( + &format!("key_{i}"), + &format!("Content entry number {i} about zeroclaw agent runtime"), + MemoryCategory::Core, + None, + ) + .await + .unwrap(); + } + }); + + c.bench_function("memory_store_single", |b| { + let counter = std::sync::atomic::AtomicUsize::new(1000); + b.iter(|| { + let idx = counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + rt.block_on(async { + mem.store( + &format!("bench_key_{idx}"), + "Benchmark content for store operation", + MemoryCategory::Daily, + None, + ) + .await + .unwrap(); + }); + }); + }); + + c.bench_function("memory_recall_top10", |b| { + b.iter(|| { + rt.block_on(async { + mem.recall(black_box("zeroclaw agent"), 10, None) + .await + .unwrap() + }) + }); + }); + + c.bench_function("memory_count", |b| { + b.iter(|| rt.block_on(async { mem.count().await.unwrap() })); + }); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Benchmark: Full agent turn cycle +// ───────────────────────────────────────────────────────────────────────────── + +fn bench_agent_turn(c: &mut Criterion) { + let rt = tokio::runtime::Runtime::new().unwrap(); + + c.bench_function("agent_turn_text_only", |b| { + b.iter(|| { + rt.block_on(async { + let provider = Box::new(BenchProvider::text_only("benchmark response")); + let mut agent = Agent::builder() + .provider(provider) + .tools(vec![Box::new(NoopTool) as Box]) + .memory(make_memory()) + .observer(make_observer()) + .tool_dispatcher(Box::new(NativeToolDispatcher)) + .workspace_dir(std::path::PathBuf::from("/tmp")) + .build() + .unwrap(); + agent.turn(black_box("hello")).await.unwrap() + }) + }); + }); + + c.bench_function("agent_turn_with_tool_call", |b| { + b.iter(|| { + rt.block_on(async { + let provider = Box::new(BenchProvider::with_tool_then_text()); + let mut agent = Agent::builder() + .provider(provider) + .tools(vec![Box::new(NoopTool) as Box]) + .memory(make_memory()) + .observer(make_observer()) + .tool_dispatcher(Box::new(NativeToolDispatcher)) + .workspace_dir(std::path::PathBuf::from("/tmp")) + .build() + .unwrap(); + agent.turn(black_box("run tool")).await.unwrap() + }) + }); + }); +} + +criterion_group!( + benches, + bench_xml_parsing, + bench_native_parsing, + bench_memory_operations, + bench_agent_turn, +); +criterion_main!(benches);