From a0b277b21e246e83f8f1b21e0ace7bbf194c293b Mon Sep 17 00:00:00 2001 From: Chummy Date: Wed, 18 Feb 2026 15:20:08 +0800 Subject: [PATCH] fix(web-search): harden config handling and trim unrelated CI edit --- .github/workflows/pr-auto-response.yml | 6 +- src/config/mod.rs | 1 - src/config/schema.rs | 121 +++++++++++++++++++++++++ src/tools/web_search_tool.rs | 54 ++++++++--- 4 files changed, 164 insertions(+), 18 deletions(-) diff --git a/.github/workflows/pr-auto-response.yml b/.github/workflows/pr-auto-response.yml index 08afd4b..df4e304 100644 --- a/.github/workflows/pr-auto-response.yml +++ b/.github/workflows/pr-auto-response.yml @@ -40,8 +40,8 @@ jobs: - name: Greet first-time contributors uses: actions/first-interaction@a1db7729b356323c7988c20ed6f0d33fe31297be # v1 with: - repo_token: ${{ secrets.GITHUB_TOKEN }} - issue_message: | + repo-token: ${{ secrets.GITHUB_TOKEN }} + issue-message: | Thanks for opening this issue. Before maintainers triage it, please confirm: @@ -50,7 +50,7 @@ jobs: - Sensitive values are redacted This helps us keep issue throughput high and response latency low. - pr_message: | + pr-message: | Thanks for contributing to ZeroClaw. For faster review, please ensure: diff --git a/src/config/mod.rs b/src/config/mod.rs index 2f7409f..bfe18e7 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -9,7 +9,6 @@ pub use schema::{ LarkConfig, MatrixConfig, MemoryConfig, ModelRouteConfig, ObservabilityConfig, PeripheralBoardConfig, PeripheralsConfig, QueryClassificationConfig, ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig, SchedulerConfig, - WebSearchConfig, WebhookConfig, SecretsConfig, SecurityConfig, SlackConfig, TelegramConfig, TunnelConfig, WebSearchConfig, WebhookConfig, }; diff --git a/src/config/schema.rs b/src/config/schema.rs index f1569e8..5e9d20a 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -2078,6 +2078,12 @@ impl Config { "config.browser.computer_use.api_key", )?; + decrypt_optional_secret( + &store, + &mut config.web_search.brave_api_key, + "config.web_search.brave_api_key", + )?; + for agent in config.agents.values_mut() { decrypt_optional_secret(&store, &mut agent.api_key, "config.agents.*.api_key")?; } @@ -2182,6 +2188,55 @@ impl Config { } } } + + // Web search enabled: ZEROCLAW_WEB_SEARCH_ENABLED or WEB_SEARCH_ENABLED + if let Ok(enabled) = std::env::var("ZEROCLAW_WEB_SEARCH_ENABLED") + .or_else(|_| std::env::var("WEB_SEARCH_ENABLED")) + { + self.web_search.enabled = enabled == "1" || enabled.eq_ignore_ascii_case("true"); + } + + // Web search provider: ZEROCLAW_WEB_SEARCH_PROVIDER or WEB_SEARCH_PROVIDER + if let Ok(provider) = std::env::var("ZEROCLAW_WEB_SEARCH_PROVIDER") + .or_else(|_| std::env::var("WEB_SEARCH_PROVIDER")) + { + let provider = provider.trim(); + if !provider.is_empty() { + self.web_search.provider = provider.to_string(); + } + } + + // Brave API key: ZEROCLAW_BRAVE_API_KEY or BRAVE_API_KEY + if let Ok(api_key) = + std::env::var("ZEROCLAW_BRAVE_API_KEY").or_else(|_| std::env::var("BRAVE_API_KEY")) + { + let api_key = api_key.trim(); + if !api_key.is_empty() { + self.web_search.brave_api_key = Some(api_key.to_string()); + } + } + + // Web search max results: ZEROCLAW_WEB_SEARCH_MAX_RESULTS or WEB_SEARCH_MAX_RESULTS + if let Ok(max_results) = std::env::var("ZEROCLAW_WEB_SEARCH_MAX_RESULTS") + .or_else(|_| std::env::var("WEB_SEARCH_MAX_RESULTS")) + { + if let Ok(max_results) = max_results.parse::() { + if (1..=10).contains(&max_results) { + self.web_search.max_results = max_results; + } + } + } + + // Web search timeout: ZEROCLAW_WEB_SEARCH_TIMEOUT_SECS or WEB_SEARCH_TIMEOUT_SECS + if let Ok(timeout_secs) = std::env::var("ZEROCLAW_WEB_SEARCH_TIMEOUT_SECS") + .or_else(|_| std::env::var("WEB_SEARCH_TIMEOUT_SECS")) + { + if let Ok(timeout_secs) = timeout_secs.parse::() { + if timeout_secs > 0 { + self.web_search.timeout_secs = timeout_secs; + } + } + } } pub fn save(&self) -> Result<()> { @@ -2206,6 +2261,12 @@ impl Config { "config.browser.computer_use.api_key", )?; + encrypt_optional_secret( + &store, + &mut config_to_save.web_search.brave_api_key, + "config.web_search.brave_api_key", + )?; + for agent in config_to_save.agents.values_mut() { encrypt_optional_secret(&store, &mut agent.api_key, "config.agents.*.api_key")?; } @@ -2469,6 +2530,7 @@ default_temperature = 0.7 secrets: SecretsConfig::default(), browser: BrowserConfig::default(), http_request: HttpRequestConfig::default(), + web_search: WebSearchConfig::default(), agent: AgentConfig::default(), identity: IdentityConfig::default(), cost: CostConfig::default(), @@ -2579,6 +2641,7 @@ tool_dispatcher = "xml" secrets: SecretsConfig::default(), browser: BrowserConfig::default(), http_request: HttpRequestConfig::default(), + web_search: WebSearchConfig::default(), agent: AgentConfig::default(), identity: IdentityConfig::default(), cost: CostConfig::default(), @@ -2619,6 +2682,7 @@ tool_dispatcher = "xml" config.api_key = Some("root-credential".into()); config.composio.api_key = Some("composio-credential".into()); config.browser.computer_use.api_key = Some("browser-credential".into()); + config.web_search.brave_api_key = Some("brave-credential".into()); config.agents.insert( "worker".into(), @@ -2660,6 +2724,15 @@ tool_dispatcher = "xml" "browser-credential" ); + let web_search_encrypted = stored.web_search.brave_api_key.as_deref().unwrap(); + assert!(crate::security::SecretStore::is_encrypted( + web_search_encrypted + )); + assert_eq!( + store.decrypt(web_search_encrypted).unwrap(), + "brave-credential" + ); + let worker = stored.agents.get("worker").unwrap(); let worker_encrypted = worker.api_key.as_deref().unwrap(); assert!(crate::security::SecretStore::is_encrypted(worker_encrypted)); @@ -3761,6 +3834,54 @@ default_model = "legacy-model" std::env::remove_var("PORT"); } + #[test] + fn env_override_web_search_config() { + let _env_guard = env_override_test_guard(); + let mut config = Config::default(); + + std::env::set_var("WEB_SEARCH_ENABLED", "false"); + std::env::set_var("WEB_SEARCH_PROVIDER", "brave"); + std::env::set_var("WEB_SEARCH_MAX_RESULTS", "7"); + std::env::set_var("WEB_SEARCH_TIMEOUT_SECS", "20"); + std::env::set_var("BRAVE_API_KEY", "brave-test-key"); + + config.apply_env_overrides(); + + assert!(!config.web_search.enabled); + assert_eq!(config.web_search.provider, "brave"); + assert_eq!(config.web_search.max_results, 7); + assert_eq!(config.web_search.timeout_secs, 20); + assert_eq!( + config.web_search.brave_api_key.as_deref(), + Some("brave-test-key") + ); + + std::env::remove_var("WEB_SEARCH_ENABLED"); + std::env::remove_var("WEB_SEARCH_PROVIDER"); + std::env::remove_var("WEB_SEARCH_MAX_RESULTS"); + std::env::remove_var("WEB_SEARCH_TIMEOUT_SECS"); + std::env::remove_var("BRAVE_API_KEY"); + } + + #[test] + fn env_override_web_search_invalid_values_ignored() { + let _env_guard = env_override_test_guard(); + let mut config = Config::default(); + let original_max_results = config.web_search.max_results; + let original_timeout = config.web_search.timeout_secs; + + std::env::set_var("WEB_SEARCH_MAX_RESULTS", "99"); + std::env::set_var("WEB_SEARCH_TIMEOUT_SECS", "0"); + + config.apply_env_overrides(); + + assert_eq!(config.web_search.max_results, original_max_results); + assert_eq!(config.web_search.timeout_secs, original_timeout); + + std::env::remove_var("WEB_SEARCH_MAX_RESULTS"); + std::env::remove_var("WEB_SEARCH_TIMEOUT_SECS"); + } + #[test] fn gateway_config_default_values() { let g = GatewayConfig::default(); diff --git a/src/tools/web_search_tool.rs b/src/tools/web_search_tool.rs index d9df6b6..fa3b750 100644 --- a/src/tools/web_search_tool.rs +++ b/src/tools/web_search_tool.rs @@ -21,10 +21,10 @@ impl WebSearchTool { timeout_secs: u64, ) -> Self { Self { - provider, + provider: provider.trim().to_lowercase(), brave_api_key, - max_results, - timeout_secs, + max_results: max_results.clamp(1, 10), + timeout_secs: timeout_secs.max(1), } } @@ -79,18 +79,9 @@ impl WebSearchTool { for i in 0..count { let caps = &link_matches[i]; - let mut url_str = caps[1].to_string(); + let url_str = decode_ddg_redirect_url(&caps[1]); let title = strip_tags(&caps[2]); - // Decode DDG redirect URL - if url_str.contains("uddg=") { - if let Ok(decoded) = urlencoding::decode(&url_str) { - if let Some(idx) = decoded.find("uddg=") { - url_str = decoded[idx + 5..].to_string(); - } - } - } - lines.push(format!("{}. {}", i + 1, title.trim())); lines.push(format!(" {}", url_str.trim())); @@ -173,6 +164,18 @@ impl WebSearchTool { } } +fn decode_ddg_redirect_url(raw_url: &str) -> String { + if let Some(index) = raw_url.find("uddg=") { + let encoded = &raw_url[index + 5..]; + let encoded = encoded.split('&').next().unwrap_or(encoded); + if let Ok(decoded) = urlencoding::decode(encoded) { + return decoded.into_owned(); + } + } + + raw_url.to_string() +} + fn strip_tags(content: &str) -> String { let re = Regex::new(r"<[^>]+>").unwrap(); re.replace_all(content, "").to_string() @@ -213,7 +216,7 @@ impl Tool for WebSearchTool { tracing::info!("Searching web for: {}", query); - let result = match self.provider.to_lowercase().as_str() { + let result = match self.provider.as_str() { "duckduckgo" | "ddg" => self.search_duckduckgo(query).await?, "brave" => self.search_brave(query).await?, _ => anyhow::bail!("Unknown search provider: {}", self.provider), @@ -278,6 +281,29 @@ mod tests { assert!(result.contains("https://example.com")); } + #[test] + fn test_parse_duckduckgo_results_decodes_redirect_url() { + let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15); + let html = r#" + Example Title + This is a description + "#; + let result = tool.parse_duckduckgo_results(html, "test").unwrap(); + assert!(result.contains("https://example.com/path?a=1")); + assert!(!result.contains("rut=test")); + } + + #[test] + fn test_constructor_clamps_web_search_limits() { + let tool = WebSearchTool::new("duckduckgo".to_string(), None, 0, 0); + let html = r#" + Example Title + This is a description + "#; + let result = tool.parse_duckduckgo_results(html, "test").unwrap(); + assert!(result.contains("Example Title")); + } + #[tokio::test] async fn test_execute_missing_query() { let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);