From b0d4a1297b8f54265a0e94ed66f168f6c1604c8b Mon Sep 17 00:00:00 2001 From: stawky Date: Mon, 16 Feb 2026 19:30:30 +0800 Subject: [PATCH] feat(doctor): add enhanced diagnostics and config validation - Expand with grouped health report output - Add semantic config checks (provider/model/temp/routes/channels) - Add workspace checks (existence, write probe, disk availability) - Preserve daemon/scheduler/channel freshness diagnostics - Add environment checks (git/curl/shell/home) - Add unit tests for provider validation and config edge cases Also fix upstream signature drift to keep build green: - channels: pass provider_name to agent_turn - channels: pass workspace_dir to all_tools_with_runtime - daemon: pass verbose flag to agent::run --- src/doctor/mod.rs | 709 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 609 insertions(+), 100 deletions(-) diff --git a/src/doctor/mod.rs b/src/doctor/mod.rs index f4f3b99..9b7a95d 100644 --- a/src/doctor/mod.rs +++ b/src/doctor/mod.rs @@ -1,28 +1,429 @@ use crate::config::Config; -use anyhow::{Context, Result}; +use anyhow::Result; use chrono::{DateTime, Utc}; +use std::path::Path; const DAEMON_STALE_SECONDS: i64 = 30; const SCHEDULER_STALE_SECONDS: i64 = 120; const CHANNEL_STALE_SECONDS: i64 = 300; -pub fn run(config: &Config) -> Result<()> { - let state_file = crate::daemon::state_file_path(config); - if !state_file.exists() { - println!("đŸŠē ZeroClaw Doctor"); - println!(" ❌ daemon state file not found: {}", state_file.display()); - println!(" 💡 Start daemon with: zeroclaw daemon"); - return Ok(()); +/// Known built-in provider names (must stay in sync with `create_provider`). +const KNOWN_PROVIDERS: &[&str] = &[ + "openrouter", + "anthropic", + "openai", + "ollama", + "gemini", + "google", + "google-gemini", + "venice", + "vercel", + "vercel-ai", + "cloudflare", + "cloudflare-ai", + "moonshot", + "kimi", + "synthetic", + "opencode", + "opencode-zen", + "zai", + "z.ai", + "glm", + "zhipu", + "minimax", + "bedrock", + "aws-bedrock", + "qianfan", + "baidu", + "groq", + "mistral", + "xai", + "grok", + "deepseek", + "together", + "together-ai", + "fireworks", + "fireworks-ai", + "perplexity", + "cohere", + "copilot", + "github-copilot", +]; + +// ── Diagnostic item ────────────────────────────────────────────── + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Severity { + Ok, + Warn, + Error, +} + +struct DiagItem { + severity: Severity, + category: &'static str, + message: String, +} + +impl DiagItem { + fn ok(category: &'static str, msg: impl Into) -> Self { + Self { + severity: Severity::Ok, + category, + message: msg.into(), + } + } + fn warn(category: &'static str, msg: impl Into) -> Self { + Self { + severity: Severity::Warn, + category, + message: msg.into(), + } + } + fn error(category: &'static str, msg: impl Into) -> Self { + Self { + severity: Severity::Error, + category, + message: msg.into(), + } } - let raw = std::fs::read_to_string(&state_file) - .with_context(|| format!("Failed to read {}", state_file.display()))?; - let snapshot: serde_json::Value = serde_json::from_str(&raw) - .with_context(|| format!("Failed to parse {}", state_file.display()))?; + fn icon(&self) -> &'static str { + match self.severity { + Severity::Ok => "✅", + Severity::Warn => "âš ī¸ ", + Severity::Error => "❌", + } + } +} - println!("đŸŠē ZeroClaw Doctor"); - println!(" State file: {}", state_file.display()); +// ── Public entry point ─────────────────────────────────────────── +pub fn run(config: &Config) -> Result<()> { + let mut items: Vec = Vec::new(); + + check_config_semantics(config, &mut items); + check_workspace(config, &mut items); + check_daemon_state(config, &mut items); + check_environment(&mut items); + + // Print report + println!("đŸŠē ZeroClaw Doctor (enhanced)"); + println!(); + + let mut current_cat = ""; + for item in &items { + if item.category != current_cat { + current_cat = item.category; + println!(" [{current_cat}]"); + } + println!(" {} {}", item.icon(), item.message); + } + + let errors = items + .iter() + .filter(|i| i.severity == Severity::Error) + .count(); + let warns = items + .iter() + .filter(|i| i.severity == Severity::Warn) + .count(); + let oks = items.iter().filter(|i| i.severity == Severity::Ok).count(); + + println!(); + println!(" Summary: {oks} ok, {warns} warnings, {errors} errors"); + + if errors > 0 { + println!(" 💡 Fix the errors above, then run `zeroclaw doctor` again."); + } + + Ok(()) +} + +// ── Config semantic validation ─────────────────────────────────── + +fn check_config_semantics(config: &Config, items: &mut Vec) { + let cat = "config"; + + // Config file exists + if config.config_path.exists() { + items.push(DiagItem::ok( + cat, + format!("config file: {}", config.config_path.display()), + )); + } else { + items.push(DiagItem::error( + cat, + format!("config file not found: {}", config.config_path.display()), + )); + } + + // Provider validity + if let Some(ref provider) = config.default_provider { + if is_known_provider(provider) { + items.push(DiagItem::ok( + cat, + format!("provider \"{provider}\" is valid"), + )); + } else { + items.push(DiagItem::error( + cat, + format!( + "unknown provider \"{provider}\". Use a known name or \"custom:\" / \"anthropic-custom:\"" + ), + )); + } + } else { + items.push(DiagItem::error(cat, "no default_provider configured")); + } + + // API key presence + if config.default_provider.as_deref() != Some("ollama") { + if config.api_key.is_some() { + items.push(DiagItem::ok(cat, "API key configured")); + } else { + items.push(DiagItem::warn( + cat, + "no api_key set (may rely on env vars or provider defaults)", + )); + } + } + + // Model configured + if config.default_model.is_some() { + items.push(DiagItem::ok( + cat, + format!( + "default model: {}", + config.default_model.as_deref().unwrap_or("?") + ), + )); + } else { + items.push(DiagItem::warn(cat, "no default_model configured")); + } + + // Temperature range + if config.default_temperature >= 0.0 && config.default_temperature <= 2.0 { + items.push(DiagItem::ok( + cat, + format!( + "temperature {:.1} (valid range 0.0–2.0)", + config.default_temperature + ), + )); + } else { + items.push(DiagItem::error( + cat, + format!( + "temperature {:.1} is out of range (expected 0.0–2.0)", + config.default_temperature + ), + )); + } + + // Gateway port range + let port = config.gateway.port; + if port > 0 { + items.push(DiagItem::ok(cat, format!("gateway port: {port}"))); + } else { + items.push(DiagItem::error(cat, "gateway port is 0 (invalid)")); + } + + // Reliability: fallback providers + for fb in &config.reliability.fallback_providers { + if !is_known_provider(fb) { + items.push(DiagItem::warn( + cat, + format!("fallback provider \"{fb}\" is not a known provider name"), + )); + } + } + + // Model routes validation + for route in &config.model_routes { + if route.hint.is_empty() { + items.push(DiagItem::warn(cat, "model route with empty hint")); + } + if !is_known_provider(&route.provider) { + items.push(DiagItem::warn( + cat, + format!( + "model route \"{}\" references unknown provider \"{}\"", + route.hint, route.provider + ), + )); + } + if route.model.is_empty() { + items.push(DiagItem::warn( + cat, + format!("model route \"{}\" has empty model", route.hint), + )); + } + } + + // Channel: at least one configured + let cc = &config.channels_config; + let has_channel = cc.telegram.is_some() + || cc.discord.is_some() + || cc.slack.is_some() + || cc.imessage.is_some() + || cc.matrix.is_some() + || cc.whatsapp.is_some() + || cc.email.is_some() + || cc.irc.is_some() + || cc.lark.is_some() + || cc.webhook.is_some(); + + if has_channel { + items.push(DiagItem::ok(cat, "at least one channel configured")); + } else { + items.push(DiagItem::warn( + cat, + "no channels configured — run `zeroclaw onboard` to set one up", + )); + } + + // Delegate agents: provider validity + for (name, agent) in &config.agents { + if !is_known_provider(&agent.provider) { + items.push(DiagItem::warn( + cat, + format!( + "agent \"{name}\" uses unknown provider \"{}\"", + agent.provider + ), + )); + } + } +} + +fn is_known_provider(name: &str) -> bool { + KNOWN_PROVIDERS.contains(&name) + || name.starts_with("custom:") + || name.starts_with("anthropic-custom:") +} + +// ── Workspace integrity ────────────────────────────────────────── + +fn check_workspace(config: &Config, items: &mut Vec) { + let cat = "workspace"; + let ws = &config.workspace_dir; + + if ws.exists() { + items.push(DiagItem::ok( + cat, + format!("directory exists: {}", ws.display()), + )); + } else { + items.push(DiagItem::error( + cat, + format!("directory missing: {}", ws.display()), + )); + return; + } + + // Writable check + let probe = ws.join(".zeroclaw_doctor_probe"); + match std::fs::write(&probe, b"probe") { + Ok(()) => { + let _ = std::fs::remove_file(&probe); + items.push(DiagItem::ok(cat, "directory is writable")); + } + Err(e) => { + items.push(DiagItem::error( + cat, + format!("directory is not writable: {e}"), + )); + } + } + + // Disk space (best-effort via `df`) + if let Some(avail_mb) = disk_available_mb(ws) { + if avail_mb >= 100 { + items.push(DiagItem::ok( + cat, + format!("disk space: {avail_mb} MB available"), + )); + } else { + items.push(DiagItem::warn( + cat, + format!("low disk space: only {avail_mb} MB available"), + )); + } + } + + // Key workspace files + check_file_exists(ws, "SOUL.md", false, cat, items); + check_file_exists(ws, "AGENTS.md", false, cat, items); +} + +fn check_file_exists( + base: &Path, + name: &str, + required: bool, + cat: &'static str, + items: &mut Vec, +) { + let path = base.join(name); + if path.exists() { + items.push(DiagItem::ok(cat, format!("{name} present"))); + } else if required { + items.push(DiagItem::error(cat, format!("{name} missing"))); + } else { + items.push(DiagItem::warn(cat, format!("{name} not found (optional)"))); + } +} + +fn disk_available_mb(path: &Path) -> Option { + let output = std::process::Command::new("df") + .arg("-m") + .arg(path) + .output() + .ok()?; + if !output.status.success() { + return None; + } + let stdout = String::from_utf8_lossy(&output.stdout); + // Second line, 4th column is "Available" in `df -m` + let line = stdout.lines().nth(1)?; + let avail = line.split_whitespace().nth(3)?; + avail.parse::().ok() +} + +// ── Daemon state (original logic, preserved) ───────────────────── + +fn check_daemon_state(config: &Config, items: &mut Vec) { + let cat = "daemon"; + let state_file = crate::daemon::state_file_path(config); + + if !state_file.exists() { + items.push(DiagItem::error( + cat, + format!( + "state file not found: {} — is the daemon running?", + state_file.display() + ), + )); + return; + } + + let raw = match std::fs::read_to_string(&state_file) { + Ok(r) => r, + Err(e) => { + items.push(DiagItem::error(cat, format!("cannot read state file: {e}"))); + return; + } + }; + + let snapshot: serde_json::Value = match serde_json::from_str(&raw) { + Ok(v) => v, + Err(e) => { + items.push(DiagItem::error(cat, format!("invalid state JSON: {e}"))); + return; + } + }; + + // Daemon heartbeat freshness let updated_at = snapshot .get("updated_at") .and_then(serde_json::Value::as_str) @@ -33,28 +434,32 @@ pub fn run(config: &Config) -> Result<()> { .signed_duration_since(ts.with_timezone(&Utc)) .num_seconds(); if age <= DAEMON_STALE_SECONDS { - println!(" ✅ daemon heartbeat fresh ({age}s ago)"); + items.push(DiagItem::ok(cat, format!("heartbeat fresh ({age}s ago)"))); } else { - println!(" ❌ daemon heartbeat stale ({age}s ago)"); + items.push(DiagItem::error( + cat, + format!("heartbeat stale ({age}s ago)"), + )); } } else { - println!(" ❌ invalid daemon timestamp: {updated_at}"); + items.push(DiagItem::error( + cat, + format!("invalid daemon timestamp: {updated_at}"), + )); } - let mut channel_count = 0_u32; - let mut stale_channels = 0_u32; - + // Components if let Some(components) = snapshot .get("components") .and_then(serde_json::Value::as_object) { + // Scheduler if let Some(scheduler) = components.get("scheduler") { let scheduler_ok = scheduler .get("status") .and_then(serde_json::Value::as_str) .is_some_and(|s| s == "ok"); - - let scheduler_last_ok = scheduler + let scheduler_age = scheduler .get("last_ok") .and_then(serde_json::Value::as_str) .and_then(parse_rfc3339) @@ -62,22 +467,28 @@ pub fn run(config: &Config) -> Result<()> { Utc::now().signed_duration_since(dt).num_seconds() }); - if scheduler_ok && scheduler_last_ok <= SCHEDULER_STALE_SECONDS { - println!(" ✅ scheduler healthy (last ok {scheduler_last_ok}s ago)"); + if scheduler_ok && scheduler_age <= SCHEDULER_STALE_SECONDS { + items.push(DiagItem::ok( + cat, + format!("scheduler healthy (last ok {scheduler_age}s ago)"), + )); } else { - println!( - " ❌ scheduler unhealthy/stale (status_ok={scheduler_ok}, age={scheduler_last_ok}s)" - ); + items.push(DiagItem::error( + cat, + format!("scheduler unhealthy (ok={scheduler_ok}, age={scheduler_age}s)"), + )); } } else { - println!(" ❌ scheduler component missing"); + items.push(DiagItem::warn(cat, "scheduler component not tracked yet")); } + // Channels + let mut channel_count = 0u32; + let mut stale = 0u32; for (name, component) in components { if !name.starts_with("channel:") { continue; } - channel_count += 1; let status_ok = component .get("status") @@ -92,23 +503,88 @@ pub fn run(config: &Config) -> Result<()> { }); if status_ok && age <= CHANNEL_STALE_SECONDS { - println!(" ✅ {name} fresh (last ok {age}s ago)"); + items.push(DiagItem::ok(cat, format!("{name} fresh ({age}s ago)"))); } else { - stale_channels += 1; - println!(" ❌ {name} stale/unhealthy (status_ok={status_ok}, age={age}s)"); + stale += 1; + items.push(DiagItem::error( + cat, + format!("{name} stale (ok={status_ok}, age={age}s)"), + )); } } - } - if channel_count == 0 { - println!(" â„šī¸ no channel components tracked in state yet"); - } else { - println!(" Channel summary: {channel_count} total, {stale_channels} stale"); + if channel_count == 0 { + items.push(DiagItem::warn(cat, "no channel components tracked yet")); + } else if stale > 0 { + items.push(DiagItem::warn( + cat, + format!("{channel_count} channels, {stale} stale"), + )); + } } - - Ok(()) } +// ── Environment checks ─────────────────────────────────────────── + +fn check_environment(items: &mut Vec) { + let cat = "environment"; + + // git + check_command_available("git", &["--version"], cat, items); + + // Shell + let shell = std::env::var("SHELL").unwrap_or_default(); + if !shell.is_empty() { + items.push(DiagItem::ok(cat, format!("shell: {shell}"))); + } else { + items.push(DiagItem::warn(cat, "$SHELL not set")); + } + + // HOME + if std::env::var("HOME").is_ok() || std::env::var("USERPROFILE").is_ok() { + items.push(DiagItem::ok(cat, "home directory env set")); + } else { + items.push(DiagItem::error( + cat, + "neither $HOME nor $USERPROFILE is set", + )); + } + + // Optional tools + check_command_available("curl", &["--version"], cat, items); +} + +fn check_command_available(cmd: &str, args: &[&str], cat: &'static str, items: &mut Vec) { + match std::process::Command::new(cmd) + .args(args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + { + Ok(output) if output.status.success() => { + let ver = String::from_utf8_lossy(&output.stdout); + let first_line = ver.lines().next().unwrap_or("").trim(); + let display = if first_line.len() > 60 { + format!("{}â€Ļ", &first_line[..60]) + } else { + first_line.to_string() + }; + items.push(DiagItem::ok(cat, format!("{cmd}: {display}"))); + } + Ok(_) => { + items.push(DiagItem::warn( + cat, + format!("{cmd} found but returned non-zero"), + )); + } + Err(_) => { + items.push(DiagItem::warn(cat, format!("{cmd} not found in PATH"))); + } + } +} + +// ── Helpers ────────────────────────────────────────────────────── + fn parse_rfc3339(raw: &str) -> Option> { DateTime::parse_from_rfc3339(raw) .ok() @@ -118,85 +594,118 @@ fn parse_rfc3339(raw: &str) -> Option> { #[cfg(test)] mod tests { use super::*; - use crate::config::Config; - use serde_json::json; - use tempfile::TempDir; - fn test_config(tmp: &TempDir) -> Config { + #[test] + fn known_providers_recognized() { + assert!(is_known_provider("openrouter")); + assert!(is_known_provider("anthropic")); + assert!(is_known_provider("ollama")); + assert!(is_known_provider("gemini")); + assert!(is_known_provider("custom:https://example.com")); + assert!(is_known_provider("anthropic-custom:https://example.com")); + assert!(!is_known_provider("nonexistent-provider")); + assert!(!is_known_provider("")); + } + + #[test] + fn diag_item_icons() { + assert_eq!(DiagItem::ok("t", "m").icon(), "✅"); + assert_eq!(DiagItem::warn("t", "m").icon(), "âš ī¸ "); + assert_eq!(DiagItem::error("t", "m").icon(), "❌"); + } + + #[test] + fn config_validation_catches_bad_temperature() { let mut config = Config::default(); - config.workspace_dir = tmp.path().join("workspace"); - config.config_path = tmp.path().join("config.toml"); - config + config.default_temperature = 5.0; + let mut items = Vec::new(); + check_config_semantics(&config, &mut items); + let temp_item = items.iter().find(|i| i.message.contains("temperature")); + assert!(temp_item.is_some()); + assert_eq!(temp_item.unwrap().severity, Severity::Error); } #[test] - fn parse_rfc3339_accepts_valid_timestamp() { - let parsed = parse_rfc3339("2025-01-02T03:04:05Z"); - assert!(parsed.is_some()); + fn config_validation_accepts_valid_temperature() { + let mut config = Config::default(); + config.default_temperature = 0.7; + let mut items = Vec::new(); + check_config_semantics(&config, &mut items); + let temp_item = items.iter().find(|i| i.message.contains("temperature")); + assert!(temp_item.is_some()); + assert_eq!(temp_item.unwrap().severity, Severity::Ok); } #[test] - fn parse_rfc3339_rejects_invalid_timestamp() { - let parsed = parse_rfc3339("not-a-timestamp"); - assert!(parsed.is_none()); + fn config_validation_warns_no_channels() { + let config = Config::default(); + let mut items = Vec::new(); + check_config_semantics(&config, &mut items); + let ch_item = items.iter().find(|i| i.message.contains("channel")); + assert!(ch_item.is_some()); + assert_eq!(ch_item.unwrap().severity, Severity::Warn); } #[test] - fn run_returns_ok_when_state_file_missing() { - let tmp = TempDir::new().unwrap(); - let config = test_config(&tmp); - - let result = run(&config); - - assert!(result.is_ok()); + fn config_validation_catches_unknown_provider() { + let mut config = Config::default(); + config.default_provider = Some("totally-fake".into()); + let mut items = Vec::new(); + check_config_semantics(&config, &mut items); + let prov_item = items + .iter() + .find(|i| i.message.contains("unknown provider")); + assert!(prov_item.is_some()); + assert_eq!(prov_item.unwrap().severity, Severity::Error); } #[test] - fn run_returns_error_for_invalid_json_state_file() { - let tmp = TempDir::new().unwrap(); - let config = test_config(&tmp); - let state_file = crate::daemon::state_file_path(&config); - - std::fs::write(&state_file, "not-json").unwrap(); - - let result = run(&config); - - assert!(result.is_err()); - let error_text = result.unwrap_err().to_string(); - assert!(error_text.contains("Failed to parse")); + fn config_validation_accepts_custom_provider() { + let mut config = Config::default(); + config.default_provider = Some("custom:https://my-api.com".into()); + let mut items = Vec::new(); + check_config_semantics(&config, &mut items); + let prov_item = items.iter().find(|i| i.message.contains("is valid")); + assert!(prov_item.is_some()); + assert_eq!(prov_item.unwrap().severity, Severity::Ok); } #[test] - fn run_accepts_well_formed_state_snapshot() { - let tmp = TempDir::new().unwrap(); - let config = test_config(&tmp); - let state_file = crate::daemon::state_file_path(&config); + fn config_validation_warns_bad_fallback() { + let mut config = Config::default(); + config.reliability.fallback_providers = vec!["fake-provider".into()]; + let mut items = Vec::new(); + check_config_semantics(&config, &mut items); + let fb_item = items + .iter() + .find(|i| i.message.contains("fallback provider")); + assert!(fb_item.is_some()); + assert_eq!(fb_item.unwrap().severity, Severity::Warn); + } - let now = Utc::now().to_rfc3339(); - let snapshot = json!({ - "updated_at": now, - "components": { - "scheduler": { - "status": "ok", - "last_ok": now, - "last_error": null, - "updated_at": now, - "restart_count": 0 - }, - "channel:discord": { - "status": "ok", - "last_ok": now, - "last_error": null, - "updated_at": now, - "restart_count": 0 - } - } - }); + #[test] + fn config_validation_warns_empty_model_route() { + let mut config = Config::default(); + config.model_routes = vec![crate::config::ModelRouteConfig { + hint: "fast".into(), + provider: "groq".into(), + model: String::new(), + api_key: None, + }]; + let mut items = Vec::new(); + check_config_semantics(&config, &mut items); + let route_item = items.iter().find(|i| i.message.contains("empty model")); + assert!(route_item.is_some()); + assert_eq!(route_item.unwrap().severity, Severity::Warn); + } - std::fs::write(&state_file, serde_json::to_vec_pretty(&snapshot).unwrap()).unwrap(); - - let result = run(&config); - - assert!(result.is_ok()); + #[test] + fn environment_check_finds_git() { + let mut items = Vec::new(); + check_environment(&mut items); + let git_item = items.iter().find(|i| i.message.starts_with("git:")); + // git should be available in any CI/dev environment + assert!(git_item.is_some()); + assert_eq!(git_item.unwrap().severity, Severity::Ok); } }