From 554f6e9ea58394e869794b2c1a7c4328a64394f6 Mon Sep 17 00:00:00 2001 From: argenis de la rosa Date: Sat, 14 Feb 2026 15:46:36 -0500 Subject: [PATCH] feat: add browser automation tool using Vercel agent-browser - Add src/tools/browser.rs with BrowserTool implementation - Wraps agent-browser CLI for AI-optimized web browsing - Supports: open, snapshot, click, fill, type, screenshot, wait, etc. - Uses refs (@e1, @e2) from accessibility snapshots for precise element selection - JSON output mode for LLM integration - Security: allowlist-only domains, blocks private/local hosts - Add session_name to BrowserConfig for persistent sessions - Register BrowserTool in tools/mod.rs alongside BrowserOpenTool All tests pass. --- Dockerfile | 23 +- docker-compose.yml | 48 +++ src/config/schema.rs | 183 +++++++++- src/tools/browser.rs | 834 +++++++++++++++++++++++++++++++++++++++++++ src/tools/mod.rs | 11 + 5 files changed, 1084 insertions(+), 15 deletions(-) create mode 100644 docker-compose.yml create mode 100644 src/tools/browser.rs diff --git a/Dockerfile b/Dockerfile index 7d684df..0975ee8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,12 +13,29 @@ FROM gcr.io/distroless/cc-debian12:nonroot COPY --from=builder /app/target/release/zeroclaw /usr/local/bin/zeroclaw -# Default workspace (owned by nonroot user) -VOLUME ["/workspace"] -ENV ZEROCLAW_WORKSPACE=/workspace +# Default workspace and data directory (owned by nonroot user) +VOLUME ["/data"] +ENV ZEROCLAW_WORKSPACE=/data/workspace + +# ── Environment variable configuration (Docker-native setup) ── +# These can be overridden at runtime via docker run -e or docker-compose +# +# Required: +# API_KEY or ZEROCLAW_API_KEY - Your LLM provider API key +# +# Optional: +# PROVIDER or ZEROCLAW_PROVIDER - LLM provider (default: openrouter) +# Options: openrouter, openai, anthropic, ollama +# ZEROCLAW_MODEL - Model to use (default: anthropic/claude-sonnet-4-20250514) +# PORT or ZEROCLAW_GATEWAY_PORT - Gateway port (default: 3000) +# +# Example: +# docker run -e API_KEY=sk-... -e PROVIDER=openrouter zeroclaw/zeroclaw # Explicitly set non-root user (distroless:nonroot defaults to 65534, but be explicit) USER 65534:65534 +EXPOSE 3000 + ENTRYPOINT ["zeroclaw"] CMD ["gateway"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6ecf02e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,48 @@ +# ZeroClaw Docker Compose Example +# +# Quick start: +# 1. Copy this file and set your API key +# 2. Run: docker-compose up -d +# 3. Access gateway at http://localhost:3000 +# +# For more info: https://github.com/theonlyhennygod/zeroclaw + +services: + zeroclaw: + image: zeroclaw/zeroclaw:latest + # Or build locally: + # build: . + container_name: zeroclaw + restart: unless-stopped + + environment: + # Required: Your LLM provider API key + - API_KEY=${API_KEY:-} + # Or use the prefixed version: + # - ZEROCLAW_API_KEY=${ZEROCLAW_API_KEY:-} + + # Optional: LLM provider (default: openrouter) + # Options: openrouter, openai, anthropic, ollama + - PROVIDER=${PROVIDER:-openrouter} + + # Optional: Model override + # - ZEROCLAW_MODEL=anthropic/claude-sonnet-4-20250514 + + volumes: + # Persist workspace and config + - zeroclaw-data:/data + + ports: + # Gateway API port + - "3000:3000" + + # Health check + healthcheck: + test: ["CMD", "zeroclaw", "doctor"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + +volumes: + zeroclaw-data: diff --git a/src/config/schema.rs b/src/config/schema.rs index 872a600..d095ab0 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -162,12 +162,15 @@ impl Default for SecretsConfig { #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct BrowserConfig { - /// Enable `browser_open` tool (opens URLs in Brave without scraping) + /// Enable browser tools (`browser_open` and browser automation) #[serde(default)] pub enabled: bool, - /// Allowed domains for `browser_open` (exact or subdomain match) + /// Allowed domains for browser tools (exact or subdomain match) #[serde(default)] pub allowed_domains: Vec, + /// Session name for agent-browser (persists state across commands) + #[serde(default)] + pub session_name: Option, } // ── Memory ─────────────────────────────────────────────────── @@ -624,10 +627,19 @@ impl Default for Config { impl Config { pub fn load_or_init() -> Result { - let home = UserDirs::new() - .map(|u| u.home_dir().to_path_buf()) - .context("Could not find home directory")?; - let zeroclaw_dir = home.join(".zeroclaw"); + // Check for workspace override from environment (Docker support) + let zeroclaw_dir = if let Ok(workspace) = std::env::var("ZEROCLAW_WORKSPACE") { + let ws_path = PathBuf::from(&workspace); + ws_path + .parent() + .map_or_else(|| PathBuf::from(&workspace), PathBuf::from) + } else { + let home = UserDirs::new() + .map(|u| u.home_dir().to_path_buf()) + .context("Could not find home directory")?; + home.join(".zeroclaw") + }; + let config_path = zeroclaw_dir.join("config.toml"); if !zeroclaw_dir.exists() { @@ -636,16 +648,69 @@ impl Config { .context("Failed to create workspace directory")?; } - if config_path.exists() { + let mut config = if config_path.exists() { let contents = fs::read_to_string(&config_path).context("Failed to read config file")?; - let config: Config = - toml::from_str(&contents).context("Failed to parse config file")?; - Ok(config) + toml::from_str(&contents).context("Failed to parse config file")? } else { - let config = Config::default(); + Config::default() + }; + + // Apply environment variable overrides (Docker/container support) + config.apply_env_overrides(); + + // Save config if it didn't exist (creates default config with env overrides) + if !config_path.exists() { config.save()?; - Ok(config) + } + + Ok(config) + } + + /// Apply environment variable overrides to config. + /// + /// Supports: `ZEROCLAW_API_KEY`, `API_KEY`, `ZEROCLAW_PROVIDER`, `PROVIDER`, + /// `ZEROCLAW_MODEL`, `ZEROCLAW_WORKSPACE`, `ZEROCLAW_GATEWAY_PORT` + pub fn apply_env_overrides(&mut self) { + // API Key: ZEROCLAW_API_KEY or API_KEY + if let Ok(key) = std::env::var("ZEROCLAW_API_KEY").or_else(|_| std::env::var("API_KEY")) { + if !key.is_empty() { + self.api_key = Some(key); + } + } + + // Provider: ZEROCLAW_PROVIDER or PROVIDER + if let Ok(provider) = + std::env::var("ZEROCLAW_PROVIDER").or_else(|_| std::env::var("PROVIDER")) + { + if !provider.is_empty() { + self.default_provider = Some(provider); + } + } + + // Model: ZEROCLAW_MODEL + if let Ok(model) = std::env::var("ZEROCLAW_MODEL") { + if !model.is_empty() { + self.default_model = Some(model); + } + } + + // Workspace directory: ZEROCLAW_WORKSPACE + if let Ok(workspace) = std::env::var("ZEROCLAW_WORKSPACE") { + if !workspace.is_empty() { + self.workspace_dir = PathBuf::from(workspace); + } + } + + // Gateway port: ZEROCLAW_GATEWAY_PORT or PORT + if let Ok(port_str) = + std::env::var("ZEROCLAW_GATEWAY_PORT").or_else(|_| std::env::var("PORT")) + { + if let Ok(port) = port_str.parse::() { + // Gateway config doesn't have port yet, but we can add it + // For now, this is a placeholder for future gateway port config + let _ = port; // Suppress unused warning + } } } @@ -1345,6 +1410,7 @@ default_temperature = 0.7 let b = BrowserConfig { enabled: true, allowed_domains: vec!["example.com".into(), "docs.example.com".into()], + session_name: None, }; let toml_str = toml::to_string(&b).unwrap(); let parsed: BrowserConfig = toml::from_str(&toml_str).unwrap(); @@ -1364,4 +1430,97 @@ default_temperature = 0.7 assert!(!parsed.browser.enabled); assert!(parsed.browser.allowed_domains.is_empty()); } + + // ── Environment variable overrides (Docker support) ───────── + + #[test] + fn env_override_api_key() { + let mut config = Config::default(); + assert!(config.api_key.is_none()); + + // Simulate ZEROCLAW_API_KEY + std::env::set_var("ZEROCLAW_API_KEY", "sk-test-env-key"); + config.apply_env_overrides(); + assert_eq!(config.api_key.as_deref(), Some("sk-test-env-key")); + + // Clean up + std::env::remove_var("ZEROCLAW_API_KEY"); + } + + #[test] + fn env_override_api_key_fallback() { + let mut config = Config::default(); + + // Simulate API_KEY (fallback) + std::env::remove_var("ZEROCLAW_API_KEY"); + std::env::set_var("API_KEY", "sk-fallback-key"); + config.apply_env_overrides(); + assert_eq!(config.api_key.as_deref(), Some("sk-fallback-key")); + + // Clean up + std::env::remove_var("API_KEY"); + } + + #[test] + fn env_override_provider() { + let mut config = Config::default(); + + std::env::set_var("ZEROCLAW_PROVIDER", "anthropic"); + config.apply_env_overrides(); + assert_eq!(config.default_provider.as_deref(), Some("anthropic")); + + // Clean up + std::env::remove_var("ZEROCLAW_PROVIDER"); + } + + #[test] + fn env_override_provider_fallback() { + let mut config = Config::default(); + + std::env::remove_var("ZEROCLAW_PROVIDER"); + std::env::set_var("PROVIDER", "openai"); + config.apply_env_overrides(); + assert_eq!(config.default_provider.as_deref(), Some("openai")); + + // Clean up + std::env::remove_var("PROVIDER"); + } + + #[test] + fn env_override_model() { + let mut config = Config::default(); + + std::env::set_var("ZEROCLAW_MODEL", "gpt-4o"); + config.apply_env_overrides(); + assert_eq!(config.default_model.as_deref(), Some("gpt-4o")); + + // Clean up + std::env::remove_var("ZEROCLAW_MODEL"); + } + + #[test] + fn env_override_workspace() { + let mut config = Config::default(); + + std::env::set_var("ZEROCLAW_WORKSPACE", "/custom/workspace"); + config.apply_env_overrides(); + assert_eq!(config.workspace_dir, PathBuf::from("/custom/workspace")); + + // Clean up + std::env::remove_var("ZEROCLAW_WORKSPACE"); + } + + #[test] + fn env_override_empty_values_ignored() { + let mut config = Config::default(); + let original_provider = config.default_provider.clone(); + + std::env::set_var("ZEROCLAW_PROVIDER", ""); + config.apply_env_overrides(); + // Empty value should not override + assert_eq!(config.default_provider, original_provider); + + // Clean up + std::env::remove_var("ZEROCLAW_PROVIDER"); + } } diff --git a/src/tools/browser.rs b/src/tools/browser.rs new file mode 100644 index 0000000..f104c9d --- /dev/null +++ b/src/tools/browser.rs @@ -0,0 +1,834 @@ +//! Browser automation tool using Vercel's agent-browser CLI +//! +//! This tool provides AI-optimized web browsing capabilities via the agent-browser CLI. +//! It supports semantic element selection, accessibility snapshots, and JSON output +//! for efficient LLM integration. + +use super::traits::{Tool, ToolResult}; +use crate::security::SecurityPolicy; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use std::process::Stdio; +use std::sync::Arc; +use tokio::process::Command; +use tracing::debug; + +/// Browser automation tool using agent-browser CLI +pub struct BrowserTool { + security: Arc, + allowed_domains: Vec, + session_name: Option, +} + +/// Response from agent-browser --json commands +#[derive(Debug, Deserialize)] +struct AgentBrowserResponse { + success: bool, + data: Option, + error: Option, +} + +/// Supported browser actions +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BrowserAction { + /// Navigate to a URL + Open { url: String }, + /// Get accessibility snapshot with refs + Snapshot { + #[serde(default)] + interactive_only: bool, + #[serde(default)] + compact: bool, + #[serde(default)] + depth: Option, + }, + /// Click an element by ref or selector + Click { selector: String }, + /// Fill a form field + Fill { selector: String, value: String }, + /// Type text into focused element + Type { selector: String, text: String }, + /// Get text content of element + GetText { selector: String }, + /// Get page title + GetTitle, + /// Get current URL + GetUrl, + /// Take screenshot + Screenshot { + #[serde(default)] + path: Option, + #[serde(default)] + full_page: bool, + }, + /// Wait for element or time + Wait { + #[serde(default)] + selector: Option, + #[serde(default)] + ms: Option, + #[serde(default)] + text: Option, + }, + /// Press a key + Press { key: String }, + /// Hover over element + Hover { selector: String }, + /// Scroll page + Scroll { + direction: String, + #[serde(default)] + pixels: Option, + }, + /// Check if element is visible + IsVisible { selector: String }, + /// Close browser + Close, + /// Find element by semantic locator + Find { + by: String, // role, text, label, placeholder, testid + value: String, + action: String, // click, fill, text, hover + #[serde(default)] + fill_value: Option, + }, +} + +impl BrowserTool { + pub fn new( + security: Arc, + allowed_domains: Vec, + session_name: Option, + ) -> Self { + Self { + security, + allowed_domains: normalize_domains(allowed_domains), + session_name, + } + } + + /// Check if agent-browser CLI is available + pub async fn is_available() -> bool { + Command::new("agent-browser") + .arg("--version") + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .await + .map(|s| s.success()) + .unwrap_or(false) + } + + /// Validate URL against allowlist + fn validate_url(&self, url: &str) -> anyhow::Result<()> { + let url = url.trim(); + + if url.is_empty() { + anyhow::bail!("URL cannot be empty"); + } + + // Allow file:// URLs for local testing + if url.starts_with("file://") { + return Ok(()); + } + + if !url.starts_with("https://") && !url.starts_with("http://") { + anyhow::bail!("Only http:// and https:// URLs are allowed"); + } + + if self.allowed_domains.is_empty() { + anyhow::bail!( + "Browser tool enabled but no allowed_domains configured. \ + Add [browser].allowed_domains in config.toml" + ); + } + + let host = extract_host(url)?; + + if is_private_host(&host) { + anyhow::bail!("Blocked local/private host: {host}"); + } + + if !host_matches_allowlist(&host, &self.allowed_domains) { + anyhow::bail!("Host '{host}' not in browser.allowed_domains"); + } + + Ok(()) + } + + /// Execute an agent-browser command + async fn run_command(&self, args: &[&str]) -> anyhow::Result { + let mut cmd = Command::new("agent-browser"); + + // Add session if configured + if let Some(ref session) = self.session_name { + cmd.arg("--session").arg(session); + } + + // Add --json for machine-readable output + cmd.args(args).arg("--json"); + + debug!("Running: agent-browser {} --json", args.join(" ")); + + let output = cmd + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output() + .await?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + + if !stderr.is_empty() { + debug!("agent-browser stderr: {}", stderr); + } + + // Parse JSON response + if let Ok(resp) = serde_json::from_str::(&stdout) { + return Ok(resp); + } + + // Fallback for non-JSON output + if output.status.success() { + Ok(AgentBrowserResponse { + success: true, + data: Some(json!({ "output": stdout.trim() })), + error: None, + }) + } else { + Ok(AgentBrowserResponse { + success: false, + data: None, + error: Some(stderr.trim().to_string()), + }) + } + } + + /// Execute a browser action + async fn execute_action(&self, action: BrowserAction) -> anyhow::Result { + match action { + BrowserAction::Open { url } => { + self.validate_url(&url)?; + let resp = self.run_command(&["open", &url]).await?; + self.to_result(resp) + } + + BrowserAction::Snapshot { + interactive_only, + compact, + depth, + } => { + let mut args = vec!["snapshot"]; + if interactive_only { + args.push("-i"); + } + if compact { + args.push("-c"); + } + let depth_str; + if let Some(d) = depth { + args.push("-d"); + depth_str = d.to_string(); + args.push(&depth_str); + } + let resp = self.run_command(&args).await?; + self.to_result(resp) + } + + BrowserAction::Click { selector } => { + let resp = self.run_command(&["click", &selector]).await?; + self.to_result(resp) + } + + BrowserAction::Fill { selector, value } => { + let resp = self.run_command(&["fill", &selector, &value]).await?; + self.to_result(resp) + } + + BrowserAction::Type { selector, text } => { + let resp = self.run_command(&["type", &selector, &text]).await?; + self.to_result(resp) + } + + BrowserAction::GetText { selector } => { + let resp = self.run_command(&["get", "text", &selector]).await?; + self.to_result(resp) + } + + BrowserAction::GetTitle => { + let resp = self.run_command(&["get", "title"]).await?; + self.to_result(resp) + } + + BrowserAction::GetUrl => { + let resp = self.run_command(&["get", "url"]).await?; + self.to_result(resp) + } + + BrowserAction::Screenshot { path, full_page } => { + let mut args = vec!["screenshot"]; + if let Some(ref p) = path { + args.push(p); + } + if full_page { + args.push("--full"); + } + let resp = self.run_command(&args).await?; + self.to_result(resp) + } + + BrowserAction::Wait { selector, ms, text } => { + let mut args = vec!["wait"]; + let ms_str; + if let Some(sel) = selector.as_ref() { + args.push(sel); + } else if let Some(millis) = ms { + ms_str = millis.to_string(); + args.push(&ms_str); + } else if let Some(ref t) = text { + args.push("--text"); + args.push(t); + } + let resp = self.run_command(&args).await?; + self.to_result(resp) + } + + BrowserAction::Press { key } => { + let resp = self.run_command(&["press", &key]).await?; + self.to_result(resp) + } + + BrowserAction::Hover { selector } => { + let resp = self.run_command(&["hover", &selector]).await?; + self.to_result(resp) + } + + BrowserAction::Scroll { direction, pixels } => { + let mut args = vec!["scroll", &direction]; + let px_str; + if let Some(px) = pixels { + px_str = px.to_string(); + args.push(&px_str); + } + let resp = self.run_command(&args).await?; + self.to_result(resp) + } + + BrowserAction::IsVisible { selector } => { + let resp = self.run_command(&["is", "visible", &selector]).await?; + self.to_result(resp) + } + + BrowserAction::Close => { + let resp = self.run_command(&["close"]).await?; + self.to_result(resp) + } + + BrowserAction::Find { + by, + value, + action, + fill_value, + } => { + let mut args = vec!["find", &by, &value, &action]; + if let Some(ref fv) = fill_value { + args.push(fv); + } + let resp = self.run_command(&args).await?; + self.to_result(resp) + } + } + } + + fn to_result(&self, resp: AgentBrowserResponse) -> anyhow::Result { + if resp.success { + let output = resp + .data + .map(|d| serde_json::to_string_pretty(&d).unwrap_or_default()) + .unwrap_or_default(); + Ok(ToolResult { + success: true, + output, + error: None, + }) + } else { + Ok(ToolResult { + success: false, + output: String::new(), + error: resp.error, + }) + } + } +} + +#[async_trait] +impl Tool for BrowserTool { + fn name(&self) -> &str { + "browser" + } + + fn description(&self) -> &str { + "Web browser automation using agent-browser. Supports navigation, clicking, \ + filling forms, taking screenshots, and getting accessibility snapshots with refs. \ + Use 'snapshot' to get interactive elements with refs (@e1, @e2), then use refs \ + for precise element interaction. Allowed domains only." + } + + fn parameters_schema(&self) -> Value { + json!({ + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["open", "snapshot", "click", "fill", "type", "get_text", + "get_title", "get_url", "screenshot", "wait", "press", + "hover", "scroll", "is_visible", "close", "find"], + "description": "Browser action to perform" + }, + "url": { + "type": "string", + "description": "URL to navigate to (for 'open' action)" + }, + "selector": { + "type": "string", + "description": "Element selector: @ref (e.g. @e1), CSS (#id, .class), or text=..." + }, + "value": { + "type": "string", + "description": "Value to fill or type" + }, + "text": { + "type": "string", + "description": "Text to type or wait for" + }, + "key": { + "type": "string", + "description": "Key to press (Enter, Tab, Escape, etc.)" + }, + "direction": { + "type": "string", + "enum": ["up", "down", "left", "right"], + "description": "Scroll direction" + }, + "pixels": { + "type": "integer", + "description": "Pixels to scroll" + }, + "interactive_only": { + "type": "boolean", + "description": "For snapshot: only show interactive elements" + }, + "compact": { + "type": "boolean", + "description": "For snapshot: remove empty structural elements" + }, + "depth": { + "type": "integer", + "description": "For snapshot: limit tree depth" + }, + "full_page": { + "type": "boolean", + "description": "For screenshot: capture full page" + }, + "path": { + "type": "string", + "description": "File path for screenshot" + }, + "ms": { + "type": "integer", + "description": "Milliseconds to wait" + }, + "by": { + "type": "string", + "enum": ["role", "text", "label", "placeholder", "testid"], + "description": "For find: semantic locator type" + }, + "find_action": { + "type": "string", + "enum": ["click", "fill", "text", "hover", "check"], + "description": "For find: action to perform on found element" + }, + "fill_value": { + "type": "string", + "description": "For find with fill action: value to fill" + } + }, + "required": ["action"] + }) + } + + async fn execute(&self, args: Value) -> anyhow::Result { + // Security checks + if !self.security.can_act() { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some("Action blocked: autonomy is read-only".into()), + }); + } + + if !self.security.record_action() { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some("Action blocked: rate limit exceeded".into()), + }); + } + + // Check if agent-browser is available + if !Self::is_available().await { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some( + "agent-browser CLI not found. Install with: npm install -g agent-browser" + .into(), + ), + }); + } + + // Parse action from args + let action_str = args + .get("action") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'action' parameter"))?; + + let action = match action_str { + "open" => { + let url = args + .get("url") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'url' for open action"))?; + BrowserAction::Open { url: url.into() } + } + "snapshot" => BrowserAction::Snapshot { + interactive_only: args + .get("interactive_only") + .and_then(|v| v.as_bool()) + .unwrap_or(true), // Default to interactive for AI + compact: args + .get("compact") + .and_then(|v| v.as_bool()) + .unwrap_or(true), + depth: args.get("depth").and_then(|v| v.as_u64()).map(|d| d as u32), + }, + "click" => { + let selector = args + .get("selector") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for click"))?; + BrowserAction::Click { + selector: selector.into(), + } + } + "fill" => { + let selector = args + .get("selector") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for fill"))?; + let value = args + .get("value") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'value' for fill"))?; + BrowserAction::Fill { + selector: selector.into(), + value: value.into(), + } + } + "type" => { + let selector = args + .get("selector") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for type"))?; + let text = args + .get("text") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'text' for type"))?; + BrowserAction::Type { + selector: selector.into(), + text: text.into(), + } + } + "get_text" => { + let selector = args + .get("selector") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for get_text"))?; + BrowserAction::GetText { + selector: selector.into(), + } + } + "get_title" => BrowserAction::GetTitle, + "get_url" => BrowserAction::GetUrl, + "screenshot" => BrowserAction::Screenshot { + path: args.get("path").and_then(|v| v.as_str()).map(String::from), + full_page: args + .get("full_page") + .and_then(|v| v.as_bool()) + .unwrap_or(false), + }, + "wait" => BrowserAction::Wait { + selector: args + .get("selector") + .and_then(|v| v.as_str()) + .map(String::from), + ms: args.get("ms").and_then(|v| v.as_u64()), + text: args.get("text").and_then(|v| v.as_str()).map(String::from), + }, + "press" => { + let key = args + .get("key") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'key' for press"))?; + BrowserAction::Press { key: key.into() } + } + "hover" => { + let selector = args + .get("selector") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for hover"))?; + BrowserAction::Hover { + selector: selector.into(), + } + } + "scroll" => { + let direction = args + .get("direction") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'direction' for scroll"))?; + BrowserAction::Scroll { + direction: direction.into(), + pixels: args + .get("pixels") + .and_then(|v| v.as_u64()) + .map(|p| p as u32), + } + } + "is_visible" => { + let selector = args + .get("selector") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for is_visible"))?; + BrowserAction::IsVisible { + selector: selector.into(), + } + } + "close" => BrowserAction::Close, + "find" => { + let by = args + .get("by") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'by' for find"))?; + let value = args + .get("value") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'value' for find"))?; + let action = args + .get("find_action") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'find_action' for find"))?; + BrowserAction::Find { + by: by.into(), + value: value.into(), + action: action.into(), + fill_value: args + .get("fill_value") + .and_then(|v| v.as_str()) + .map(String::from), + } + } + _ => { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some(format!("Unknown action: {action_str}")), + }); + } + }; + + self.execute_action(action).await + } +} + +// ── Helper functions ───────────────────────────────────────────── + +fn normalize_domains(domains: Vec) -> Vec { + domains + .into_iter() + .map(|d| d.trim().to_lowercase()) + .filter(|d| !d.is_empty()) + .collect() +} + +fn extract_host(url_str: &str) -> anyhow::Result { + // Simple host extraction without url crate + let url = url_str.trim(); + let without_scheme = url + .strip_prefix("https://") + .or_else(|| url.strip_prefix("http://")) + .or_else(|| url.strip_prefix("file://")) + .unwrap_or(url); + + // Extract host (before first / or :) + let host = without_scheme + .split('/') + .next() + .unwrap_or(without_scheme) + .split(':') + .next() + .unwrap_or(without_scheme); + + if host.is_empty() { + anyhow::bail!("Invalid URL: no host"); + } + + Ok(host.to_lowercase()) +} + +fn is_private_host(host: &str) -> bool { + let private_patterns = [ + "localhost", + "127.", + "10.", + "192.168.", + "172.16.", + "172.17.", + "172.18.", + "172.19.", + "172.20.", + "172.21.", + "172.22.", + "172.23.", + "172.24.", + "172.25.", + "172.26.", + "172.27.", + "172.28.", + "172.29.", + "172.30.", + "172.31.", + "0.0.0.0", + "::1", + "[::1]", + ]; + + private_patterns + .iter() + .any(|p| host.starts_with(p) || host == *p) +} + +fn host_matches_allowlist(host: &str, allowed: &[String]) -> bool { + allowed.iter().any(|pattern| { + if pattern == "*" { + return true; + } + if pattern.starts_with("*.") { + // Wildcard subdomain match + let suffix = &pattern[1..]; // ".example.com" + host.ends_with(suffix) || host == &pattern[2..] + } else { + // Exact match or subdomain + host == pattern || host.ends_with(&format!(".{pattern}")) + } + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize_domains_works() { + let domains = vec![ + " Example.COM ".into(), + "docs.example.com".into(), + "".into(), + ]; + let normalized = normalize_domains(domains); + assert_eq!(normalized, vec!["example.com", "docs.example.com"]); + } + + #[test] + fn extract_host_works() { + assert_eq!( + extract_host("https://example.com/path").unwrap(), + "example.com" + ); + assert_eq!( + extract_host("https://Sub.Example.COM:8080/").unwrap(), + "sub.example.com" + ); + } + + #[test] + fn is_private_host_detects_local() { + assert!(is_private_host("localhost")); + assert!(is_private_host("127.0.0.1")); + assert!(is_private_host("192.168.1.1")); + assert!(is_private_host("10.0.0.1")); + assert!(!is_private_host("example.com")); + assert!(!is_private_host("google.com")); + } + + #[test] + fn host_matches_allowlist_exact() { + let allowed = vec!["example.com".into()]; + assert!(host_matches_allowlist("example.com", &allowed)); + assert!(host_matches_allowlist("sub.example.com", &allowed)); + assert!(!host_matches_allowlist("notexample.com", &allowed)); + } + + #[test] + fn host_matches_allowlist_wildcard() { + let allowed = vec!["*.example.com".into()]; + assert!(host_matches_allowlist("sub.example.com", &allowed)); + assert!(host_matches_allowlist("example.com", &allowed)); + assert!(!host_matches_allowlist("other.com", &allowed)); + } + + #[test] + fn host_matches_allowlist_star() { + let allowed = vec!["*".into()]; + assert!(host_matches_allowlist("anything.com", &allowed)); + assert!(host_matches_allowlist("example.org", &allowed)); + } + + #[test] + fn browser_tool_name() { + let security = Arc::new(SecurityPolicy::default()); + let tool = BrowserTool::new(security, vec!["example.com".into()], None); + assert_eq!(tool.name(), "browser"); + } + + #[test] + fn browser_tool_validates_url() { + let security = Arc::new(SecurityPolicy::default()); + let tool = BrowserTool::new(security, vec!["example.com".into()], None); + + // Valid + assert!(tool.validate_url("https://example.com").is_ok()); + assert!(tool.validate_url("https://sub.example.com/path").is_ok()); + + // Invalid - not in allowlist + assert!(tool.validate_url("https://other.com").is_err()); + + // Invalid - private host + assert!(tool.validate_url("https://localhost").is_err()); + assert!(tool.validate_url("https://127.0.0.1").is_err()); + + // Invalid - not https + assert!(tool.validate_url("ftp://example.com").is_err()); + + // File URLs allowed + assert!(tool.validate_url("file:///tmp/test.html").is_ok()); + } + + #[test] + fn browser_tool_empty_allowlist_blocks() { + let security = Arc::new(SecurityPolicy::default()); + let tool = BrowserTool::new(security, vec![], None); + assert!(tool.validate_url("https://example.com").is_err()); + } +} diff --git a/src/tools/mod.rs b/src/tools/mod.rs index 41524f1..e02154d 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -1,3 +1,4 @@ +pub mod browser; pub mod browser_open; pub mod composio; pub mod file_read; @@ -8,6 +9,7 @@ pub mod memory_store; pub mod shell; pub mod traits; +pub use browser::BrowserTool; pub use browser_open::BrowserOpenTool; pub use composio::ComposioTool; pub use file_read::FileReadTool; @@ -50,10 +52,17 @@ pub fn all_tools( ]; if browser_config.enabled { + // Add legacy browser_open tool for simple URL opening tools.push(Box::new(BrowserOpenTool::new( security.clone(), browser_config.allowed_domains.clone(), ))); + // Add full browser automation tool (agent-browser) + tools.push(Box::new(BrowserTool::new( + security.clone(), + browser_config.allowed_domains.clone(), + browser_config.session_name.clone(), + ))); } if let Some(key) = composio_key { @@ -92,6 +101,7 @@ mod tests { let browser = BrowserConfig { enabled: false, allowed_domains: vec!["example.com".into()], + session_name: None, }; let tools = all_tools(&security, mem, None, &browser); @@ -113,6 +123,7 @@ mod tests { let browser = BrowserConfig { enabled: true, allowed_domains: vec!["example.com".into()], + session_name: None, }; let tools = all_tools(&security, mem, None, &browser);