From 1757add64aa7c22266b326560967453c67779e85 Mon Sep 17 00:00:00 2001 From: adisusilayasa <3523adisusilayasa@gmail.com> Date: Wed, 18 Feb 2026 14:40:46 +0800 Subject: [PATCH] feat(tools): add web_search_tool for internet search Add native web search capability that works regardless of LLM tool-calling support. This is particularly useful for GLM models via Z.AI that don't reliably support standard tool calling formats. Features: - DuckDuckGo provider (free, no API key required) - Brave Search provider (optional, requires API key) - Configurable max results and timeout - Enabled by default Configuration (config.toml): [web_search] enabled = true provider = "duckduckgo" max_results = 5 The tool allows agents to search the web for current information without requiring proper tool calling support from the LLM. Also includes CI workflow fix for first-interaction action inputs. --- .env.example | 13 ++ .github/workflows/pr-auto-response.yml | 6 +- Cargo.lock | 7 + Cargo.toml | 3 + src/config/mod.rs | 4 +- src/config/schema.rs | 49 ++++ src/onboard/wizard.rs | 2 + src/tools/mod.rs | 12 + src/tools/web_search_tool.rs | 302 +++++++++++++++++++++++++ 9 files changed, 394 insertions(+), 4 deletions(-) create mode 100644 src/tools/web_search_tool.rs diff --git a/.env.example b/.env.example index 97e6b37..93bac19 100644 --- a/.env.example +++ b/.env.example @@ -81,3 +81,16 @@ PROVIDER=openrouter # # Common models: glm-5, glm-4.7, glm-4-plus, glm-4-flash # See docs/zai-glm-setup.md for detailed configuration. + +# ── Web Search ──────────────────────────────────────────────── +# Web search tool for finding information on the internet. +# Enabled by default with DuckDuckGo (free, no API key required). +# +# WEB_SEARCH_ENABLED=true +# WEB_SEARCH_PROVIDER=duckduckgo +# WEB_SEARCH_MAX_RESULTS=5 +# WEB_SEARCH_TIMEOUT_SECS=15 +# +# Optional: Brave Search (requires API key from https://brave.com/search/api) +# WEB_SEARCH_PROVIDER=brave +# BRAVE_API_KEY=your-brave-search-api-key diff --git a/.github/workflows/pr-auto-response.yml b/.github/workflows/pr-auto-response.yml index df4e304..08afd4b 100644 --- a/.github/workflows/pr-auto-response.yml +++ b/.github/workflows/pr-auto-response.yml @@ -40,8 +40,8 @@ jobs: - name: Greet first-time contributors uses: actions/first-interaction@a1db7729b356323c7988c20ed6f0d33fe31297be # v1 with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - issue-message: | + repo_token: ${{ secrets.GITHUB_TOKEN }} + issue_message: | Thanks for opening this issue. Before maintainers triage it, please confirm: @@ -50,7 +50,7 @@ jobs: - Sensitive values are redacted This helps us keep issue throughput high and response latency low. - pr-message: | + pr_message: | Thanks for contributing to ZeroClaw. For faster review, please ensure: diff --git a/Cargo.lock b/Cargo.lock index 8c40ac9..fce1823 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4410,6 +4410,12 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf-8" version = "0.7.6" @@ -5225,6 +5231,7 @@ dependencies = [ "tower-http", "tracing", "tracing-subscriber", + "urlencoding", "uuid", "webpki-roots 1.0.6", ] diff --git a/Cargo.toml b/Cargo.toml index ecb3c0e..fc67ff4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,9 @@ prometheus = { version = "0.14", default-features = false } # Base64 encoding (screenshots, image data) base64 = "0.22" +# URL encoding for web search +urlencoding = "2.1" + # Optional Rust-native browser automation backend fantoccini = { version = "0.22.0", optional = true, default-features = false, features = ["rustls-tls"] } diff --git a/src/config/mod.rs b/src/config/mod.rs index a78b132..2f7409f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -9,7 +9,9 @@ pub use schema::{ LarkConfig, MatrixConfig, MemoryConfig, ModelRouteConfig, ObservabilityConfig, PeripheralBoardConfig, PeripheralsConfig, QueryClassificationConfig, ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig, SchedulerConfig, - SecretsConfig, SecurityConfig, SlackConfig, TelegramConfig, TunnelConfig, WebhookConfig, + WebSearchConfig, WebhookConfig, + SecretsConfig, SecurityConfig, SlackConfig, TelegramConfig, TunnelConfig, WebSearchConfig, + WebhookConfig, }; #[cfg(test)] diff --git a/src/config/schema.rs b/src/config/schema.rs index 175fcbc..f1569e8 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -81,6 +81,9 @@ pub struct Config { #[serde(default)] pub http_request: HttpRequestConfig, + #[serde(default)] + pub web_search: WebSearchConfig, + #[serde(default)] pub identity: IdentityConfig, @@ -721,6 +724,51 @@ fn default_http_timeout_secs() -> u64 { 30 } +// ── Web search ─────────────────────────────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WebSearchConfig { + /// Enable `web_search_tool` for web searches + #[serde(default = "default_true")] + pub enabled: bool, + /// Search provider: "duckduckgo" (free, no API key) or "brave" (requires API key) + #[serde(default = "default_web_search_provider")] + pub provider: String, + /// Brave Search API key (required if provider is "brave") + #[serde(default)] + pub brave_api_key: Option, + /// Maximum results per search (1-10) + #[serde(default = "default_web_search_max_results")] + pub max_results: usize, + /// Request timeout in seconds + #[serde(default = "default_web_search_timeout_secs")] + pub timeout_secs: u64, +} + +fn default_web_search_provider() -> String { + "duckduckgo".into() +} + +fn default_web_search_max_results() -> usize { + 5 +} + +fn default_web_search_timeout_secs() -> u64 { + 15 +} + +impl Default for WebSearchConfig { + fn default() -> Self { + Self { + enabled: true, + provider: default_web_search_provider(), + brave_api_key: None, + max_results: default_web_search_max_results(), + timeout_secs: default_web_search_timeout_secs(), + } + } +} + // ── Memory ─────────────────────────────────────────────────── #[derive(Debug, Clone, Serialize, Deserialize)] @@ -1773,6 +1821,7 @@ impl Default for Config { secrets: SecretsConfig::default(), browser: BrowserConfig::default(), http_request: HttpRequestConfig::default(), + web_search: WebSearchConfig::default(), identity: IdentityConfig::default(), cost: CostConfig::default(), peripherals: PeripheralsConfig::default(), diff --git a/src/onboard/wizard.rs b/src/onboard/wizard.rs index fdba1f9..74b13d4 100644 --- a/src/onboard/wizard.rs +++ b/src/onboard/wizard.rs @@ -131,6 +131,7 @@ pub fn run_wizard() -> Result { secrets: secrets_config, browser: BrowserConfig::default(), http_request: crate::config::HttpRequestConfig::default(), + web_search: crate::config::WebSearchConfig::default(), identity: crate::config::IdentityConfig::default(), cost: crate::config::CostConfig::default(), peripherals: crate::config::PeripheralsConfig::default(), @@ -352,6 +353,7 @@ pub fn run_quick_setup( secrets: SecretsConfig::default(), browser: BrowserConfig::default(), http_request: crate::config::HttpRequestConfig::default(), + web_search: crate::config::WebSearchConfig::default(), identity: crate::config::IdentityConfig::default(), cost: crate::config::CostConfig::default(), peripherals: crate::config::PeripheralsConfig::default(), diff --git a/src/tools/mod.rs b/src/tools/mod.rs index 03fc067..919b27d 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -25,6 +25,7 @@ pub mod schema; pub mod screenshot; pub mod shell; pub mod traits; +pub mod web_search_tool; pub use browser::{BrowserTool, ComputerUseConfig}; pub use browser_open::BrowserOpenTool; @@ -56,6 +57,7 @@ pub use shell::ShellTool; pub use traits::Tool; #[allow(unused_imports)] pub use traits::{ToolResult, ToolSpec}; +pub use web_search_tool::WebSearchTool; use crate::config::{Config, DelegateAgentConfig}; use crate::memory::Memory; @@ -188,6 +190,16 @@ pub fn all_tools_with_runtime( ))); } + // Web search tool (enabled by default for GLM and other models) + if root_config.web_search.enabled { + tools.push(Box::new(WebSearchTool::new( + root_config.web_search.provider.clone(), + root_config.web_search.brave_api_key.clone(), + root_config.web_search.max_results, + root_config.web_search.timeout_secs, + ))); + } + // Vision tools are always available tools.push(Box::new(ScreenshotTool::new(security.clone()))); tools.push(Box::new(ImageInfoTool::new(security.clone()))); diff --git a/src/tools/web_search_tool.rs b/src/tools/web_search_tool.rs new file mode 100644 index 0000000..d9df6b6 --- /dev/null +++ b/src/tools/web_search_tool.rs @@ -0,0 +1,302 @@ +use super::traits::{Tool, ToolResult}; +use async_trait::async_trait; +use regex::Regex; +use serde_json::json; +use std::time::Duration; + +/// Web search tool for searching the internet. +/// Supports multiple providers: DuckDuckGo (free), Brave (requires API key). +pub struct WebSearchTool { + provider: String, + brave_api_key: Option, + max_results: usize, + timeout_secs: u64, +} + +impl WebSearchTool { + pub fn new( + provider: String, + brave_api_key: Option, + max_results: usize, + timeout_secs: u64, + ) -> Self { + Self { + provider, + brave_api_key, + max_results, + timeout_secs, + } + } + + async fn search_duckduckgo(&self, query: &str) -> anyhow::Result { + let encoded_query = urlencoding::encode(query); + let search_url = format!("https://html.duckduckgo.com/html/?q={}", encoded_query); + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(self.timeout_secs)) + .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .build()?; + + let response = client.get(&search_url).send().await?; + + if !response.status().is_success() { + anyhow::bail!( + "DuckDuckGo search failed with status: {}", + response.status() + ); + } + + let html = response.text().await?; + self.parse_duckduckgo_results(&html, query) + } + + fn parse_duckduckgo_results(&self, html: &str, query: &str) -> anyhow::Result { + // Extract result links: Title + let link_regex = Regex::new( + r#"]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)"#, + )?; + + // Extract snippets: ... + let snippet_regex = Regex::new(r#"]*>([\s\S]*?)"#)?; + + let link_matches: Vec<_> = link_regex + .captures_iter(html) + .take(self.max_results + 2) + .collect(); + + let snippet_matches: Vec<_> = snippet_regex + .captures_iter(html) + .take(self.max_results + 2) + .collect(); + + if link_matches.is_empty() { + return Ok(format!("No results found for: {}", query)); + } + + let mut lines = vec![format!("Search results for: {} (via DuckDuckGo)", query)]; + + let count = link_matches.len().min(self.max_results); + + for i in 0..count { + let caps = &link_matches[i]; + let mut url_str = caps[1].to_string(); + let title = strip_tags(&caps[2]); + + // Decode DDG redirect URL + if url_str.contains("uddg=") { + if let Ok(decoded) = urlencoding::decode(&url_str) { + if let Some(idx) = decoded.find("uddg=") { + url_str = decoded[idx + 5..].to_string(); + } + } + } + + lines.push(format!("{}. {}", i + 1, title.trim())); + lines.push(format!(" {}", url_str.trim())); + + // Add snippet if available + if i < snippet_matches.len() { + let snippet = strip_tags(&snippet_matches[i][1]); + let snippet = snippet.trim(); + if !snippet.is_empty() { + lines.push(format!(" {}", snippet)); + } + } + } + + Ok(lines.join("\n")) + } + + async fn search_brave(&self, query: &str) -> anyhow::Result { + let api_key = self + .brave_api_key + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Brave API key not configured"))?; + + let encoded_query = urlencoding::encode(query); + let search_url = format!( + "https://api.search.brave.com/res/v1/web/search?q={}&count={}", + encoded_query, self.max_results + ); + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(self.timeout_secs)) + .build()?; + + let response = client + .get(&search_url) + .header("Accept", "application/json") + .header("X-Subscription-Token", api_key) + .send() + .await?; + + if !response.status().is_success() { + anyhow::bail!("Brave search failed with status: {}", response.status()); + } + + let json: serde_json::Value = response.json().await?; + self.parse_brave_results(&json, query) + } + + fn parse_brave_results(&self, json: &serde_json::Value, query: &str) -> anyhow::Result { + let results = json + .get("web") + .and_then(|w| w.get("results")) + .and_then(|r| r.as_array()) + .ok_or_else(|| anyhow::anyhow!("Invalid Brave API response"))?; + + if results.is_empty() { + return Ok(format!("No results found for: {}", query)); + } + + let mut lines = vec![format!("Search results for: {} (via Brave)", query)]; + + for (i, result) in results.iter().take(self.max_results).enumerate() { + let title = result + .get("title") + .and_then(|t| t.as_str()) + .unwrap_or("No title"); + let url = result.get("url").and_then(|u| u.as_str()).unwrap_or(""); + let description = result + .get("description") + .and_then(|d| d.as_str()) + .unwrap_or(""); + + lines.push(format!("{}. {}", i + 1, title)); + lines.push(format!(" {}", url)); + if !description.is_empty() { + lines.push(format!(" {}", description)); + } + } + + Ok(lines.join("\n")) + } +} + +fn strip_tags(content: &str) -> String { + let re = Regex::new(r"<[^>]+>").unwrap(); + re.replace_all(content, "").to_string() +} + +#[async_trait] +impl Tool for WebSearchTool { + fn name(&self) -> &str { + "web_search_tool" + } + + fn description(&self) -> &str { + "Search the web for information. Returns relevant search results with titles, URLs, and descriptions. Use this to find current information, news, or research topics." + } + + fn parameters_schema(&self) -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query. Be specific for better results." + } + }, + "required": ["query"] + }) + } + + async fn execute(&self, args: serde_json::Value) -> anyhow::Result { + let query = args + .get("query") + .and_then(|q| q.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing required parameter: query"))?; + + if query.trim().is_empty() { + anyhow::bail!("Search query cannot be empty"); + } + + tracing::info!("Searching web for: {}", query); + + let result = match self.provider.to_lowercase().as_str() { + "duckduckgo" | "ddg" => self.search_duckduckgo(query).await?, + "brave" => self.search_brave(query).await?, + _ => anyhow::bail!("Unknown search provider: {}", self.provider), + }; + + Ok(ToolResult { + success: true, + output: result, + error: None, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tool_name() { + let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15); + assert_eq!(tool.name(), "web_search_tool"); + } + + #[test] + fn test_tool_description() { + let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15); + assert!(tool.description().contains("Search the web")); + } + + #[test] + fn test_parameters_schema() { + let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15); + let schema = tool.parameters_schema(); + assert_eq!(schema["type"], "object"); + assert!(schema["properties"]["query"].is_object()); + } + + #[test] + fn test_strip_tags() { + let html = "Hello World"; + assert_eq!(strip_tags(html), "Hello World"); + } + + #[test] + fn test_parse_duckduckgo_results_empty() { + let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15); + let result = tool + .parse_duckduckgo_results("No results here", "test") + .unwrap(); + assert!(result.contains("No results found")); + } + + #[test] + fn test_parse_duckduckgo_results_with_data() { + let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15); + let html = r#" + Example Title + This is a description + "#; + let result = tool.parse_duckduckgo_results(html, "test").unwrap(); + assert!(result.contains("Example Title")); + assert!(result.contains("https://example.com")); + } + + #[tokio::test] + async fn test_execute_missing_query() { + let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15); + let result = tool.execute(json!({})).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_execute_empty_query() { + let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15); + let result = tool.execute(json!({"query": ""})).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_execute_brave_without_api_key() { + let tool = WebSearchTool::new("brave".to_string(), None, 5, 15); + let result = tool.execute(json!({"query": "test"})).await; + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("API key")); + } +}