//! Browser automation tool with pluggable backends. //! //! By default this uses Vercel's `agent-browser` CLI for automation. //! Optionally, a Rust-native backend can be enabled at build time via //! `--features browser-native` and selected through config. //! Computer-use (OS-level) actions are supported via an optional sidecar endpoint. use super::traits::{Tool, ToolResult}; use crate::security::SecurityPolicy; use anyhow::Context; use async_trait::async_trait; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::net::ToSocketAddrs; use std::process::Stdio; use std::sync::Arc; use std::time::Duration; use tokio::process::Command; use tracing::debug; /// Computer-use sidecar settings. #[derive(Clone)] pub struct ComputerUseConfig { pub endpoint: String, pub api_key: Option, pub timeout_ms: u64, pub allow_remote_endpoint: bool, pub window_allowlist: Vec, pub max_coordinate_x: Option, pub max_coordinate_y: Option, } impl std::fmt::Debug for ComputerUseConfig { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ComputerUseConfig") .field("endpoint", &self.endpoint) .field("api_key", &self.api_key.as_ref().map(|_| "[REDACTED]")) .field("timeout_ms", &self.timeout_ms) .field("allow_remote_endpoint", &self.allow_remote_endpoint) .field("window_allowlist", &self.window_allowlist) .field("max_coordinate_x", &self.max_coordinate_x) .field("max_coordinate_y", &self.max_coordinate_y) .finish() } } impl Default for ComputerUseConfig { fn default() -> Self { Self { endpoint: "http://127.0.0.1:8787/v1/actions".into(), api_key: None, timeout_ms: 15_000, allow_remote_endpoint: false, window_allowlist: Vec::new(), max_coordinate_x: None, max_coordinate_y: None, } } } /// Browser automation tool using pluggable backends. pub struct BrowserTool { security: Arc, allowed_domains: Vec, session_name: Option, backend: String, native_headless: bool, native_webdriver_url: String, native_chrome_path: Option, computer_use: ComputerUseConfig, #[cfg(feature = "browser-native")] native_state: tokio::sync::Mutex, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum BrowserBackendKind { AgentBrowser, RustNative, ComputerUse, Auto, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum ResolvedBackend { AgentBrowser, RustNative, ComputerUse, } impl BrowserBackendKind { fn parse(raw: &str) -> anyhow::Result { let key = raw.trim().to_ascii_lowercase().replace('-', "_"); match key.as_str() { "agent_browser" | "agentbrowser" => Ok(Self::AgentBrowser), "rust_native" | "native" => Ok(Self::RustNative), "computer_use" | "computeruse" => Ok(Self::ComputerUse), "auto" => Ok(Self::Auto), _ => anyhow::bail!( "Unsupported browser backend '{raw}'. Use 'agent_browser', 'rust_native', 'computer_use', or 'auto'" ), } } fn as_str(self) -> &'static str { match self { Self::AgentBrowser => "agent_browser", Self::RustNative => "rust_native", Self::ComputerUse => "computer_use", Self::Auto => "auto", } } } /// Response from agent-browser --json commands #[derive(Debug, Deserialize)] struct AgentBrowserResponse { success: bool, data: Option, error: Option, } /// Response format from computer-use sidecar. #[derive(Debug, Deserialize)] struct ComputerUseResponse { #[serde(default)] success: Option, #[serde(default)] data: Option, #[serde(default)] error: Option, } /// Supported browser actions #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum BrowserAction { /// Navigate to a URL Open { url: String }, /// Get accessibility snapshot with refs Snapshot { #[serde(default)] interactive_only: bool, #[serde(default)] compact: bool, #[serde(default)] depth: Option, }, /// Click an element by ref or selector Click { selector: String }, /// Fill a form field Fill { selector: String, value: String }, /// Type text into focused element Type { selector: String, text: String }, /// Get text content of element GetText { selector: String }, /// Get page title GetTitle, /// Get current URL GetUrl, /// Take screenshot Screenshot { #[serde(default)] path: Option, #[serde(default)] full_page: bool, }, /// Wait for element or time Wait { #[serde(default)] selector: Option, #[serde(default)] ms: Option, #[serde(default)] text: Option, }, /// Press a key Press { key: String }, /// Hover over element Hover { selector: String }, /// Scroll page Scroll { direction: String, #[serde(default)] pixels: Option, }, /// Check if element is visible IsVisible { selector: String }, /// Close browser Close, /// Find element by semantic locator Find { by: String, // role, text, label, placeholder, testid value: String, action: String, // click, fill, text, hover #[serde(default)] fill_value: Option, }, } impl BrowserTool { pub fn new( security: Arc, allowed_domains: Vec, session_name: Option, ) -> Self { Self::new_with_backend( security, allowed_domains, session_name, "agent_browser".into(), true, "http://127.0.0.1:9515".into(), None, ComputerUseConfig::default(), ) } #[allow(clippy::too_many_arguments)] pub fn new_with_backend( security: Arc, allowed_domains: Vec, session_name: Option, backend: String, native_headless: bool, native_webdriver_url: String, native_chrome_path: Option, computer_use: ComputerUseConfig, ) -> Self { Self { security, allowed_domains: normalize_domains(allowed_domains), session_name, backend, native_headless, native_webdriver_url, native_chrome_path, computer_use, #[cfg(feature = "browser-native")] native_state: tokio::sync::Mutex::new(native_backend::NativeBrowserState::default()), } } /// Check if agent-browser CLI is available pub async fn is_agent_browser_available() -> bool { Command::new("agent-browser") .arg("--version") .stdout(Stdio::null()) .stderr(Stdio::null()) .status() .await .map(|s| s.success()) .unwrap_or(false) } /// Backward-compatible alias. pub async fn is_available() -> bool { Self::is_agent_browser_available().await } fn configured_backend(&self) -> anyhow::Result { BrowserBackendKind::parse(&self.backend) } fn rust_native_compiled() -> bool { cfg!(feature = "browser-native") } fn rust_native_available(&self) -> bool { #[cfg(feature = "browser-native")] { native_backend::NativeBrowserState::is_available( self.native_headless, &self.native_webdriver_url, self.native_chrome_path.as_deref(), ) } #[cfg(not(feature = "browser-native"))] { false } } fn computer_use_endpoint_url(&self) -> anyhow::Result { if self.computer_use.timeout_ms == 0 { anyhow::bail!("browser.computer_use.timeout_ms must be > 0"); } let endpoint = self.computer_use.endpoint.trim(); if endpoint.is_empty() { anyhow::bail!("browser.computer_use.endpoint cannot be empty"); } let parsed = reqwest::Url::parse(endpoint).map_err(|_| { anyhow::anyhow!( "Invalid browser.computer_use.endpoint: '{endpoint}'. Expected http(s) URL" ) })?; let scheme = parsed.scheme(); if scheme != "http" && scheme != "https" { anyhow::bail!("browser.computer_use.endpoint must use http:// or https://"); } let host = parsed .host_str() .ok_or_else(|| anyhow::anyhow!("browser.computer_use.endpoint must include host"))?; let host_is_private = is_private_host(host); if !self.computer_use.allow_remote_endpoint && !host_is_private { anyhow::bail!( "browser.computer_use.endpoint host '{host}' is public. Set browser.computer_use.allow_remote_endpoint=true to allow it" ); } if self.computer_use.allow_remote_endpoint && !host_is_private && scheme != "https" { anyhow::bail!( "browser.computer_use.endpoint must use https:// when allow_remote_endpoint=true and host is public" ); } Ok(parsed) } fn computer_use_available(&self) -> anyhow::Result { let endpoint = self.computer_use_endpoint_url()?; Ok(endpoint_reachable(&endpoint, Duration::from_millis(500))) } async fn resolve_backend(&self) -> anyhow::Result { let configured = self.configured_backend()?; match configured { BrowserBackendKind::AgentBrowser => { if Self::is_agent_browser_available().await { Ok(ResolvedBackend::AgentBrowser) } else { anyhow::bail!( "browser.backend='{}' but agent-browser CLI is unavailable. Install with: npm install -g agent-browser", configured.as_str() ) } } BrowserBackendKind::RustNative => { if !Self::rust_native_compiled() { anyhow::bail!( "browser.backend='rust_native' requires build feature 'browser-native'" ); } if !self.rust_native_available() { anyhow::bail!( "Rust-native browser backend is enabled but WebDriver endpoint is unreachable. Set browser.native_webdriver_url and start a compatible driver" ); } Ok(ResolvedBackend::RustNative) } BrowserBackendKind::ComputerUse => { if !self.computer_use_available()? { anyhow::bail!( "browser.backend='computer_use' but sidecar endpoint is unreachable. Check browser.computer_use.endpoint and sidecar status" ); } Ok(ResolvedBackend::ComputerUse) } BrowserBackendKind::Auto => { if Self::rust_native_compiled() && self.rust_native_available() { return Ok(ResolvedBackend::RustNative); } if Self::is_agent_browser_available().await { return Ok(ResolvedBackend::AgentBrowser); } let computer_use_err = match self.computer_use_available() { Ok(true) => return Ok(ResolvedBackend::ComputerUse), Ok(false) => None, Err(err) => Some(err.to_string()), }; if Self::rust_native_compiled() { if let Some(err) = computer_use_err { anyhow::bail!( "browser.backend='auto' found no usable backend (agent-browser missing, rust-native unavailable, computer-use invalid: {err})" ); } anyhow::bail!( "browser.backend='auto' found no usable backend (agent-browser missing, rust-native unavailable, computer-use sidecar unreachable)" ) } if let Some(err) = computer_use_err { anyhow::bail!( "browser.backend='auto' needs agent-browser CLI, browser-native, or valid computer-use sidecar (error: {err})" ); } anyhow::bail!( "browser.backend='auto' needs agent-browser CLI, browser-native, or computer-use sidecar" ) } } } /// Validate URL against allowlist fn validate_url(&self, url: &str) -> anyhow::Result<()> { let url = url.trim(); if url.is_empty() { anyhow::bail!("URL cannot be empty"); } // Block file:// URLs — browser file access bypasses all SSRF and // domain-allowlist controls and can exfiltrate arbitrary local files. if url.starts_with("file://") { anyhow::bail!("file:// URLs are not allowed in browser automation"); } if !url.starts_with("https://") && !url.starts_with("http://") { anyhow::bail!("Only http:// and https:// URLs are allowed"); } if self.allowed_domains.is_empty() { anyhow::bail!( "Browser tool enabled but no allowed_domains configured. \ Add [browser].allowed_domains in config.toml" ); } let host = extract_host(url)?; if is_private_host(&host) { anyhow::bail!("Blocked local/private host: {host}"); } if !host_matches_allowlist(&host, &self.allowed_domains) { anyhow::bail!("Host '{host}' not in browser.allowed_domains"); } Ok(()) } /// Execute an agent-browser command async fn run_command(&self, args: &[&str]) -> anyhow::Result { let mut cmd = Command::new("agent-browser"); // Add session if configured if let Some(ref session) = self.session_name { cmd.arg("--session").arg(session); } // Add --json for machine-readable output cmd.args(args).arg("--json"); debug!("Running: agent-browser {} --json", args.join(" ")); let output = cmd .stdout(Stdio::piped()) .stderr(Stdio::piped()) .output() .await?; let stdout = String::from_utf8_lossy(&output.stdout); let stderr = String::from_utf8_lossy(&output.stderr); if !stderr.is_empty() { debug!("agent-browser stderr: {}", stderr); } // Parse JSON response if let Ok(resp) = serde_json::from_str::(&stdout) { return Ok(resp); } // Fallback for non-JSON output if output.status.success() { Ok(AgentBrowserResponse { success: true, data: Some(json!({ "output": stdout.trim() })), error: None, }) } else { Ok(AgentBrowserResponse { success: false, data: None, error: Some(stderr.trim().to_string()), }) } } /// Execute a browser action via agent-browser CLI #[allow(clippy::too_many_lines)] async fn execute_agent_browser_action( &self, action: BrowserAction, ) -> anyhow::Result { match action { BrowserAction::Open { url } => { self.validate_url(&url)?; let resp = self.run_command(&["open", &url]).await?; self.to_result(resp) } BrowserAction::Snapshot { interactive_only, compact, depth, } => { let mut args = vec!["snapshot"]; if interactive_only { args.push("-i"); } if compact { args.push("-c"); } let depth_str; if let Some(d) = depth { args.push("-d"); depth_str = d.to_string(); args.push(&depth_str); } let resp = self.run_command(&args).await?; self.to_result(resp) } BrowserAction::Click { selector } => { let resp = self.run_command(&["click", &selector]).await?; self.to_result(resp) } BrowserAction::Fill { selector, value } => { let resp = self.run_command(&["fill", &selector, &value]).await?; self.to_result(resp) } BrowserAction::Type { selector, text } => { let resp = self.run_command(&["type", &selector, &text]).await?; self.to_result(resp) } BrowserAction::GetText { selector } => { let resp = self.run_command(&["get", "text", &selector]).await?; self.to_result(resp) } BrowserAction::GetTitle => { let resp = self.run_command(&["get", "title"]).await?; self.to_result(resp) } BrowserAction::GetUrl => { let resp = self.run_command(&["get", "url"]).await?; self.to_result(resp) } BrowserAction::Screenshot { path, full_page } => { let mut args = vec!["screenshot"]; if let Some(ref p) = path { args.push(p); } if full_page { args.push("--full"); } let resp = self.run_command(&args).await?; self.to_result(resp) } BrowserAction::Wait { selector, ms, text } => { let mut args = vec!["wait"]; let ms_str; if let Some(sel) = selector.as_ref() { args.push(sel); } else if let Some(millis) = ms { ms_str = millis.to_string(); args.push(&ms_str); } else if let Some(ref t) = text { args.push("--text"); args.push(t); } let resp = self.run_command(&args).await?; self.to_result(resp) } BrowserAction::Press { key } => { let resp = self.run_command(&["press", &key]).await?; self.to_result(resp) } BrowserAction::Hover { selector } => { let resp = self.run_command(&["hover", &selector]).await?; self.to_result(resp) } BrowserAction::Scroll { direction, pixels } => { let mut args = vec!["scroll", &direction]; let px_str; if let Some(px) = pixels { px_str = px.to_string(); args.push(&px_str); } let resp = self.run_command(&args).await?; self.to_result(resp) } BrowserAction::IsVisible { selector } => { let resp = self.run_command(&["is", "visible", &selector]).await?; self.to_result(resp) } BrowserAction::Close => { let resp = self.run_command(&["close"]).await?; self.to_result(resp) } BrowserAction::Find { by, value, action, fill_value, } => { let mut args = vec!["find", &by, &value, &action]; if let Some(ref fv) = fill_value { args.push(fv); } let resp = self.run_command(&args).await?; self.to_result(resp) } } } #[allow(clippy::unused_async)] async fn execute_rust_native_action( &self, action: BrowserAction, ) -> anyhow::Result { #[cfg(feature = "browser-native")] { let mut state = self.native_state.lock().await; let output = state .execute_action( action, self.native_headless, &self.native_webdriver_url, self.native_chrome_path.as_deref(), ) .await?; Ok(ToolResult { success: true, output: serde_json::to_string_pretty(&output).unwrap_or_default(), error: None, }) } #[cfg(not(feature = "browser-native"))] { let _ = action; anyhow::bail!( "Rust-native browser backend is not compiled. Rebuild with --features browser-native" ) } } fn validate_coordinate(&self, key: &str, value: i64, max: Option) -> anyhow::Result<()> { if value < 0 { anyhow::bail!("'{key}' must be >= 0") } if let Some(limit) = max { if limit < 0 { anyhow::bail!("Configured coordinate limit for '{key}' must be >= 0") } if value > limit { anyhow::bail!("'{key}'={value} exceeds configured limit {limit}") } } Ok(()) } fn read_required_i64( &self, params: &serde_json::Map, key: &str, ) -> anyhow::Result { params .get(key) .and_then(Value::as_i64) .ok_or_else(|| anyhow::anyhow!("Missing or invalid '{key}' parameter")) } fn validate_computer_use_action( &self, action: &str, params: &serde_json::Map, ) -> anyhow::Result<()> { match action { "open" => { let url = params .get("url") .and_then(Value::as_str) .ok_or_else(|| anyhow::anyhow!("Missing 'url' for open action"))?; self.validate_url(url)?; } "mouse_move" | "mouse_click" => { let x = self.read_required_i64(params, "x")?; let y = self.read_required_i64(params, "y")?; self.validate_coordinate("x", x, self.computer_use.max_coordinate_x)?; self.validate_coordinate("y", y, self.computer_use.max_coordinate_y)?; } "mouse_drag" => { let from_x = self.read_required_i64(params, "from_x")?; let from_y = self.read_required_i64(params, "from_y")?; let to_x = self.read_required_i64(params, "to_x")?; let to_y = self.read_required_i64(params, "to_y")?; self.validate_coordinate("from_x", from_x, self.computer_use.max_coordinate_x)?; self.validate_coordinate("to_x", to_x, self.computer_use.max_coordinate_x)?; self.validate_coordinate("from_y", from_y, self.computer_use.max_coordinate_y)?; self.validate_coordinate("to_y", to_y, self.computer_use.max_coordinate_y)?; } _ => {} } Ok(()) } async fn execute_computer_use_action( &self, action: &str, args: &Value, ) -> anyhow::Result { let endpoint = self.computer_use_endpoint_url()?; let mut params = args .as_object() .cloned() .ok_or_else(|| anyhow::anyhow!("browser args must be a JSON object"))?; params.remove("action"); self.validate_computer_use_action(action, ¶ms)?; let payload = json!({ "action": action, "params": params, "policy": { "allowed_domains": self.allowed_domains, "window_allowlist": self.computer_use.window_allowlist, "max_coordinate_x": self.computer_use.max_coordinate_x, "max_coordinate_y": self.computer_use.max_coordinate_y, }, "metadata": { "session_name": self.session_name, "source": "zeroclaw.browser", "version": env!("CARGO_PKG_VERSION"), } }); let client = crate::config::build_runtime_proxy_client("tool.browser"); let mut request = client .post(endpoint) .timeout(Duration::from_millis(self.computer_use.timeout_ms)) .json(&payload); if let Some(api_key) = self.computer_use.api_key.as_deref() { let token = api_key.trim(); if !token.is_empty() { request = request.bearer_auth(token); } } let response = request.send().await.with_context(|| { format!( "Failed to call computer-use sidecar at {}", self.computer_use.endpoint ) })?; let status = response.status(); let body = response .text() .await .context("Failed to read computer-use sidecar response body")?; if let Ok(parsed) = serde_json::from_str::(&body) { if status.is_success() && parsed.success.unwrap_or(true) { let output = parsed .data .map(|data| serde_json::to_string_pretty(&data).unwrap_or_default()) .unwrap_or_else(|| { serde_json::to_string_pretty(&json!({ "backend": "computer_use", "action": action, "ok": true, })) .unwrap_or_default() }); return Ok(ToolResult { success: true, output, error: None, }); } let error = parsed.error.or_else(|| { if status.is_success() && parsed.success == Some(false) { Some("computer-use sidecar returned success=false".to_string()) } else { Some(format!( "computer-use sidecar request failed with status {status}" )) } }); return Ok(ToolResult { success: false, output: String::new(), error, }); } if status.is_success() { return Ok(ToolResult { success: true, output: body, error: None, }); } Ok(ToolResult { success: false, output: String::new(), error: Some(format!( "computer-use sidecar request failed with status {status}: {}", body.trim() )), }) } async fn execute_action( &self, action: BrowserAction, backend: ResolvedBackend, ) -> anyhow::Result { match backend { ResolvedBackend::AgentBrowser => self.execute_agent_browser_action(action).await, ResolvedBackend::RustNative => self.execute_rust_native_action(action).await, ResolvedBackend::ComputerUse => anyhow::bail!( "Internal error: computer_use backend must be handled before BrowserAction parsing" ), } } #[allow(clippy::unnecessary_wraps, clippy::unused_self)] fn to_result(&self, resp: AgentBrowserResponse) -> anyhow::Result { if resp.success { let output = resp .data .map(|d| serde_json::to_string_pretty(&d).unwrap_or_default()) .unwrap_or_default(); Ok(ToolResult { success: true, output, error: None, }) } else { Ok(ToolResult { success: false, output: String::new(), error: resp.error, }) } } } #[async_trait] impl Tool for BrowserTool { fn name(&self) -> &str { "browser" } fn description(&self) -> &str { concat!( "Web/browser automation with pluggable backends (agent-browser, rust-native, computer_use). ", "Supports DOM actions plus optional OS-level actions (mouse_move, mouse_click, mouse_drag, ", "key_type, key_press, screen_capture) through a computer-use sidecar. Use 'snapshot' to map ", "interactive elements to refs (@e1, @e2). Enforces browser.allowed_domains for open actions." ) } fn parameters_schema(&self) -> Value { json!({ "type": "object", "properties": { "action": { "type": "string", "enum": ["open", "snapshot", "click", "fill", "type", "get_text", "get_title", "get_url", "screenshot", "wait", "press", "hover", "scroll", "is_visible", "close", "find", "mouse_move", "mouse_click", "mouse_drag", "key_type", "key_press", "screen_capture"], "description": "Browser action to perform (OS-level actions require backend=computer_use)" }, "url": { "type": "string", "description": "URL to navigate to (for 'open' action)" }, "selector": { "type": "string", "description": "Element selector: @ref (e.g. @e1), CSS (#id, .class), or text=..." }, "value": { "type": "string", "description": "Value to fill or type" }, "text": { "type": "string", "description": "Text to type or wait for" }, "key": { "type": "string", "description": "Key to press (Enter, Tab, Escape, etc.)" }, "x": { "type": "integer", "description": "Screen X coordinate (computer_use: mouse_move/mouse_click)" }, "y": { "type": "integer", "description": "Screen Y coordinate (computer_use: mouse_move/mouse_click)" }, "from_x": { "type": "integer", "description": "Drag source X coordinate (computer_use: mouse_drag)" }, "from_y": { "type": "integer", "description": "Drag source Y coordinate (computer_use: mouse_drag)" }, "to_x": { "type": "integer", "description": "Drag target X coordinate (computer_use: mouse_drag)" }, "to_y": { "type": "integer", "description": "Drag target Y coordinate (computer_use: mouse_drag)" }, "button": { "type": "string", "enum": ["left", "right", "middle"], "description": "Mouse button for computer_use mouse_click" }, "direction": { "type": "string", "enum": ["up", "down", "left", "right"], "description": "Scroll direction" }, "pixels": { "type": "integer", "description": "Pixels to scroll" }, "interactive_only": { "type": "boolean", "description": "For snapshot: only show interactive elements" }, "compact": { "type": "boolean", "description": "For snapshot: remove empty structural elements" }, "depth": { "type": "integer", "description": "For snapshot: limit tree depth" }, "full_page": { "type": "boolean", "description": "For screenshot: capture full page" }, "path": { "type": "string", "description": "File path for screenshot" }, "ms": { "type": "integer", "description": "Milliseconds to wait" }, "by": { "type": "string", "enum": ["role", "text", "label", "placeholder", "testid"], "description": "For find: semantic locator type" }, "find_action": { "type": "string", "enum": ["click", "fill", "text", "hover", "check"], "description": "For find: action to perform on found element" }, "fill_value": { "type": "string", "description": "For find with fill action: value to fill" } }, "required": ["action"] }) } async fn execute(&self, args: Value) -> anyhow::Result { // Security checks if !self.security.can_act() { return Ok(ToolResult { success: false, output: String::new(), error: Some("Action blocked: autonomy is read-only".into()), }); } if !self.security.record_action() { return Ok(ToolResult { success: false, output: String::new(), error: Some("Action blocked: rate limit exceeded".into()), }); } let backend = match self.resolve_backend().await { Ok(selected) => selected, Err(error) => { return Ok(ToolResult { success: false, output: String::new(), error: Some(error.to_string()), }); } }; // Parse action from args let action_str = args .get("action") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'action' parameter"))?; if !is_supported_browser_action(action_str) { return Ok(ToolResult { success: false, output: String::new(), error: Some(format!("Unknown action: {action_str}")), }); } if backend == ResolvedBackend::ComputerUse { return self.execute_computer_use_action(action_str, &args).await; } if is_computer_use_only_action(action_str) { return Ok(ToolResult { success: false, output: String::new(), error: Some(unavailable_action_for_backend_error(action_str, backend)), }); } let action = match parse_browser_action(action_str, &args) { Ok(a) => a, Err(e) => { return Ok(ToolResult { success: false, output: String::new(), error: Some(e.to_string()), }); } }; self.execute_action(action, backend).await } } #[cfg(feature = "browser-native")] mod native_backend { use super::BrowserAction; use anyhow::{Context, Result}; use base64::Engine; use fantoccini::actions::{InputSource, MouseActions, PointerAction}; use fantoccini::key::Key; use fantoccini::{Client, ClientBuilder, Locator}; use serde_json::{json, Map, Value}; use std::net::{TcpStream, ToSocketAddrs}; use std::time::Duration; #[derive(Default)] pub struct NativeBrowserState { client: Option, } impl NativeBrowserState { pub fn is_available( _headless: bool, webdriver_url: &str, _chrome_path: Option<&str>, ) -> bool { webdriver_endpoint_reachable(webdriver_url, Duration::from_millis(500)) } #[allow(clippy::too_many_lines)] pub async fn execute_action( &mut self, action: BrowserAction, headless: bool, webdriver_url: &str, chrome_path: Option<&str>, ) -> Result { match action { BrowserAction::Open { url } => { self.ensure_session(headless, webdriver_url, chrome_path) .await?; let client = self.active_client()?; client .goto(&url) .await .with_context(|| format!("Failed to open URL: {url}"))?; let current_url = client .current_url() .await .context("Failed to read current URL after navigation")?; Ok(json!({ "backend": "rust_native", "action": "open", "url": current_url.as_str(), })) } BrowserAction::Snapshot { interactive_only, compact, depth, } => { let client = self.active_client()?; let snapshot = client .execute( &snapshot_script(interactive_only, compact, depth.map(i64::from)), vec![], ) .await .context("Failed to evaluate snapshot script")?; Ok(json!({ "backend": "rust_native", "action": "snapshot", "data": snapshot, })) } BrowserAction::Click { selector } => { let client = self.active_client()?; find_element(client, &selector).await?.click().await?; Ok(json!({ "backend": "rust_native", "action": "click", "selector": selector, })) } BrowserAction::Fill { selector, value } => { let client = self.active_client()?; let element = find_element(client, &selector).await?; let _ = element.clear().await; element.send_keys(&value).await?; Ok(json!({ "backend": "rust_native", "action": "fill", "selector": selector, })) } BrowserAction::Type { selector, text } => { let client = self.active_client()?; find_element(client, &selector) .await? .send_keys(&text) .await?; Ok(json!({ "backend": "rust_native", "action": "type", "selector": selector, "typed": text.len(), })) } BrowserAction::GetText { selector } => { let client = self.active_client()?; let text = find_element(client, &selector).await?.text().await?; Ok(json!({ "backend": "rust_native", "action": "get_text", "selector": selector, "text": text, })) } BrowserAction::GetTitle => { let client = self.active_client()?; let title = client.title().await.context("Failed to read page title")?; Ok(json!({ "backend": "rust_native", "action": "get_title", "title": title, })) } BrowserAction::GetUrl => { let client = self.active_client()?; let url = client .current_url() .await .context("Failed to read current URL")?; Ok(json!({ "backend": "rust_native", "action": "get_url", "url": url.as_str(), })) } BrowserAction::Screenshot { path, full_page } => { let client = self.active_client()?; let png = client .screenshot() .await .context("Failed to capture screenshot")?; let mut payload = json!({ "backend": "rust_native", "action": "screenshot", "full_page": full_page, "bytes": png.len(), }); if let Some(path_str) = path { std::fs::write(&path_str, &png) .with_context(|| format!("Failed to write screenshot to {path_str}"))?; payload["path"] = Value::String(path_str); } else { payload["png_base64"] = Value::String(base64::engine::general_purpose::STANDARD.encode(&png)); } Ok(payload) } BrowserAction::Wait { selector, ms, text } => { let client = self.active_client()?; if let Some(sel) = selector.as_ref() { wait_for_selector(client, sel).await?; Ok(json!({ "backend": "rust_native", "action": "wait", "selector": sel, })) } else if let Some(duration_ms) = ms { tokio::time::sleep(Duration::from_millis(duration_ms)).await; Ok(json!({ "backend": "rust_native", "action": "wait", "ms": duration_ms, })) } else if let Some(needle) = text.as_ref() { let xpath = xpath_contains_text(needle); client .wait() .for_element(Locator::XPath(&xpath)) .await .with_context(|| { format!("Timed out waiting for text to appear: {needle}") })?; Ok(json!({ "backend": "rust_native", "action": "wait", "text": needle, })) } else { tokio::time::sleep(Duration::from_millis(250)).await; Ok(json!({ "backend": "rust_native", "action": "wait", "ms": 250, })) } } BrowserAction::Press { key } => { let client = self.active_client()?; let key_input = webdriver_key(&key); match client.active_element().await { Ok(element) => { element.send_keys(&key_input).await?; } Err(_) => { find_element(client, "body") .await? .send_keys(&key_input) .await?; } } Ok(json!({ "backend": "rust_native", "action": "press", "key": key, })) } BrowserAction::Hover { selector } => { let client = self.active_client()?; let element = find_element(client, &selector).await?; hover_element(client, &element).await?; Ok(json!({ "backend": "rust_native", "action": "hover", "selector": selector, })) } BrowserAction::Scroll { direction, pixels } => { let client = self.active_client()?; let amount = i64::from(pixels.unwrap_or(600)); let (dx, dy) = match direction.as_str() { "up" => (0, -amount), "down" => (0, amount), "left" => (-amount, 0), "right" => (amount, 0), _ => anyhow::bail!( "Unsupported scroll direction '{direction}'. Use up/down/left/right" ), }; let position = client .execute( "window.scrollBy(arguments[0], arguments[1]); return { x: window.scrollX, y: window.scrollY };", vec![json!(dx), json!(dy)], ) .await .context("Failed to execute scroll script")?; Ok(json!({ "backend": "rust_native", "action": "scroll", "position": position, })) } BrowserAction::IsVisible { selector } => { let client = self.active_client()?; let visible = find_element(client, &selector) .await? .is_displayed() .await?; Ok(json!({ "backend": "rust_native", "action": "is_visible", "selector": selector, "visible": visible, })) } BrowserAction::Close => { if let Some(client) = self.client.take() { let _ = client.close().await; } Ok(json!({ "backend": "rust_native", "action": "close", "closed": true, })) } BrowserAction::Find { by, value, action, fill_value, } => { let client = self.active_client()?; let selector = selector_for_find(&by, &value); let element = find_element(client, &selector).await?; let payload = match action.as_str() { "click" => { element.click().await?; json!({"result": "clicked"}) } "fill" => { let fill = fill_value.ok_or_else(|| { anyhow::anyhow!("find_action='fill' requires fill_value") })?; let _ = element.clear().await; element.send_keys(&fill).await?; json!({"result": "filled", "typed": fill.len()}) } "text" => { let text = element.text().await?; json!({"result": "text", "text": text}) } "hover" => { hover_element(client, &element).await?; json!({"result": "hovered"}) } "check" => { let checked_before = element_checked(&element).await?; if !checked_before { element.click().await?; } let checked_after = element_checked(&element).await?; json!({ "result": "checked", "checked_before": checked_before, "checked_after": checked_after, }) } _ => anyhow::bail!( "Unsupported find_action '{action}'. Use click/fill/text/hover/check" ), }; Ok(json!({ "backend": "rust_native", "action": "find", "by": by, "value": value, "selector": selector, "data": payload, })) } } } async fn ensure_session( &mut self, headless: bool, webdriver_url: &str, chrome_path: Option<&str>, ) -> Result<()> { if self.client.is_some() { return Ok(()); } let mut capabilities: Map = Map::new(); let mut chrome_options: Map = Map::new(); let mut args: Vec = Vec::new(); if headless { args.push(Value::String("--headless=new".to_string())); args.push(Value::String("--disable-gpu".to_string())); } if !args.is_empty() { chrome_options.insert("args".to_string(), Value::Array(args)); } if let Some(path) = chrome_path { let trimmed = path.trim(); if !trimmed.is_empty() { chrome_options.insert("binary".to_string(), Value::String(trimmed.to_string())); } } if !chrome_options.is_empty() { capabilities.insert( "goog:chromeOptions".to_string(), Value::Object(chrome_options), ); } let mut builder = ClientBuilder::rustls().context("Failed to initialize rustls connector")?; if !capabilities.is_empty() { builder.capabilities(capabilities); } let client = builder .connect(webdriver_url) .await .with_context(|| { format!( "Failed to connect to WebDriver at {webdriver_url}. Start chromedriver/geckodriver first" ) })?; self.client = Some(client); Ok(()) } fn active_client(&self) -> Result<&Client> { self.client.as_ref().ok_or_else(|| { anyhow::anyhow!("No active native browser session. Run browser action='open' first") }) } } fn webdriver_endpoint_reachable(webdriver_url: &str, timeout: Duration) -> bool { let parsed = match reqwest::Url::parse(webdriver_url) { Ok(url) => url, Err(_) => return false, }; if parsed.scheme() != "http" && parsed.scheme() != "https" { return false; } let host = match parsed.host_str() { Some(h) if !h.is_empty() => h, _ => return false, }; let port = parsed.port_or_known_default().unwrap_or(4444); let mut addrs = match (host, port).to_socket_addrs() { Ok(iter) => iter, Err(_) => return false, }; let addr = match addrs.next() { Some(a) => a, None => return false, }; TcpStream::connect_timeout(&addr, timeout).is_ok() } fn selector_for_find(by: &str, value: &str) -> String { let escaped = css_attr_escape(value); match by { "role" => format!(r#"[role=\"{escaped}\"]"#), "label" => format!("label={value}"), "placeholder" => format!(r#"[placeholder=\"{escaped}\"]"#), "testid" => format!(r#"[data-testid=\"{escaped}\"]"#), _ => format!("text={value}"), } } async fn wait_for_selector(client: &Client, selector: &str) -> Result<()> { match parse_selector(selector) { SelectorKind::Css(css) => { client .wait() .for_element(Locator::Css(&css)) .await .with_context(|| format!("Timed out waiting for selector '{selector}'"))?; } SelectorKind::XPath(xpath) => { client .wait() .for_element(Locator::XPath(&xpath)) .await .with_context(|| format!("Timed out waiting for selector '{selector}'"))?; } } Ok(()) } async fn find_element( client: &Client, selector: &str, ) -> Result { let element = match parse_selector(selector) { SelectorKind::Css(css) => client .find(Locator::Css(&css)) .await .with_context(|| format!("Failed to find element by CSS '{css}'"))?, SelectorKind::XPath(xpath) => client .find(Locator::XPath(&xpath)) .await .with_context(|| format!("Failed to find element by XPath '{xpath}'"))?, }; Ok(element) } async fn hover_element(client: &Client, element: &fantoccini::elements::Element) -> Result<()> { let actions = MouseActions::new("mouse".to_string()).then(PointerAction::MoveToElement { element: element.clone(), duration: Some(Duration::from_millis(150)), x: 0.0, y: 0.0, }); client .perform_actions(actions) .await .context("Failed to perform hover action")?; let _ = client.release_actions().await; Ok(()) } async fn element_checked(element: &fantoccini::elements::Element) -> Result { let checked = element .prop("checked") .await .context("Failed to read checkbox checked property")? .unwrap_or_default() .to_ascii_lowercase(); Ok(matches!(checked.as_str(), "true" | "checked" | "1")) } enum SelectorKind { Css(String), XPath(String), } fn parse_selector(selector: &str) -> SelectorKind { let trimmed = selector.trim(); if let Some(text_query) = trimmed.strip_prefix("text=") { return SelectorKind::XPath(xpath_contains_text(text_query)); } if let Some(label_query) = trimmed.strip_prefix("label=") { let literal = xpath_literal(label_query); return SelectorKind::XPath(format!( "(//label[contains(normalize-space(.), {literal})]/following::*[self::input or self::textarea or self::select][1] | //*[@aria-label and contains(normalize-space(@aria-label), {literal})] | //label[contains(normalize-space(.), {literal})])" )); } if trimmed.starts_with('@') { let escaped = css_attr_escape(trimmed); return SelectorKind::Css(format!(r#"[data-zc-ref=\"{escaped}\"]"#)); } SelectorKind::Css(trimmed.to_string()) } fn css_attr_escape(input: &str) -> String { input .replace('\\', "\\\\") .replace('"', "\\\"") .replace('\n', " ") } fn xpath_contains_text(text: &str) -> String { format!("//*[contains(normalize-space(.), {})]", xpath_literal(text)) } fn xpath_literal(input: &str) -> String { if !input.contains('"') { return format!("\"{input}\""); } if !input.contains('\'') { return format!("'{input}'"); } let segments: Vec<&str> = input.split('"').collect(); let mut parts: Vec = Vec::new(); for (index, part) in segments.iter().enumerate() { if !part.is_empty() { parts.push(format!("\"{part}\"")); } if index + 1 < segments.len() { parts.push("'\"'".to_string()); } } if parts.is_empty() { "\"\"".to_string() } else { format!("concat({})", parts.join(",")) } } fn webdriver_key(key: &str) -> String { match key.trim().to_ascii_lowercase().as_str() { "enter" => Key::Enter.to_string(), "return" => Key::Return.to_string(), "tab" => Key::Tab.to_string(), "escape" | "esc" => Key::Escape.to_string(), "backspace" => Key::Backspace.to_string(), "delete" => Key::Delete.to_string(), "space" => Key::Space.to_string(), "arrowup" | "up" => Key::Up.to_string(), "arrowdown" | "down" => Key::Down.to_string(), "arrowleft" | "left" => Key::Left.to_string(), "arrowright" | "right" => Key::Right.to_string(), "home" => Key::Home.to_string(), "end" => Key::End.to_string(), "pageup" => Key::PageUp.to_string(), "pagedown" => Key::PageDown.to_string(), other => other.to_string(), } } fn snapshot_script(interactive_only: bool, compact: bool, depth: Option) -> String { let depth_literal = depth .map(|level| level.to_string()) .unwrap_or_else(|| "null".to_string()); format!( r#"(() => {{ const interactiveOnly = {interactive_only}; const compact = {compact}; const maxDepth = {depth_literal}; const nodes = []; const root = document.body || document.documentElement; let counter = 0; const isVisible = (el) => {{ const style = window.getComputedStyle(el); if (style.display === 'none' || style.visibility === 'hidden' || Number(style.opacity || 1) === 0) {{ return false; }} const rect = el.getBoundingClientRect(); return rect.width > 0 && rect.height > 0; }}; const isInteractive = (el) => {{ if (el.matches('a,button,input,select,textarea,summary,[role],*[tabindex]')) return true; return typeof el.onclick === 'function'; }}; const describe = (el, depth) => {{ const interactive = isInteractive(el); const text = (el.innerText || el.textContent || '').trim().replace(/\s+/g, ' ').slice(0, 140); if (interactiveOnly && !interactive) return; if (compact && !interactive && !text) return; const ref = '@e' + (++counter); el.setAttribute('data-zc-ref', ref); nodes.push({{ ref, depth, tag: el.tagName.toLowerCase(), id: el.id || null, role: el.getAttribute('role'), text, interactive, }}); }}; const walk = (el, depth) => {{ if (!(el instanceof Element)) return; if (maxDepth !== null && depth > maxDepth) return; if (isVisible(el)) {{ describe(el, depth); }} for (const child of el.children) {{ walk(child, depth + 1); if (nodes.length >= 400) return; }} }}; if (root) walk(root, 0); return {{ title: document.title, url: window.location.href, count: nodes.length, nodes, }}; }})();"# ) } } // ── Action parsing ────────────────────────────────────────────── /// Parse a JSON `args` object into a typed `BrowserAction`. fn parse_browser_action(action_str: &str, args: &Value) -> anyhow::Result { match action_str { "open" => { let url = args .get("url") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'url' for open action"))?; Ok(BrowserAction::Open { url: url.into() }) } "snapshot" => Ok(BrowserAction::Snapshot { interactive_only: args .get("interactive_only") .and_then(serde_json::Value::as_bool) .unwrap_or(true), compact: args .get("compact") .and_then(serde_json::Value::as_bool) .unwrap_or(true), depth: args .get("depth") .and_then(serde_json::Value::as_u64) .map(|d| u32::try_from(d).unwrap_or(u32::MAX)), }), "click" => { let selector = args .get("selector") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for click"))?; Ok(BrowserAction::Click { selector: selector.into(), }) } "fill" => { let selector = args .get("selector") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for fill"))?; let value = args .get("value") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'value' for fill"))?; Ok(BrowserAction::Fill { selector: selector.into(), value: value.into(), }) } "type" => { let selector = args .get("selector") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for type"))?; let text = args .get("text") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'text' for type"))?; Ok(BrowserAction::Type { selector: selector.into(), text: text.into(), }) } "get_text" => { let selector = args .get("selector") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for get_text"))?; Ok(BrowserAction::GetText { selector: selector.into(), }) } "get_title" => Ok(BrowserAction::GetTitle), "get_url" => Ok(BrowserAction::GetUrl), "screenshot" => Ok(BrowserAction::Screenshot { path: args.get("path").and_then(|v| v.as_str()).map(String::from), full_page: args .get("full_page") .and_then(serde_json::Value::as_bool) .unwrap_or(false), }), "wait" => Ok(BrowserAction::Wait { selector: args .get("selector") .and_then(|v| v.as_str()) .map(String::from), ms: args.get("ms").and_then(serde_json::Value::as_u64), text: args.get("text").and_then(|v| v.as_str()).map(String::from), }), "press" => { let key = args .get("key") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'key' for press"))?; Ok(BrowserAction::Press { key: key.into() }) } "hover" => { let selector = args .get("selector") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for hover"))?; Ok(BrowserAction::Hover { selector: selector.into(), }) } "scroll" => { let direction = args .get("direction") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'direction' for scroll"))?; Ok(BrowserAction::Scroll { direction: direction.into(), pixels: args .get("pixels") .and_then(serde_json::Value::as_u64) .map(|p| u32::try_from(p).unwrap_or(u32::MAX)), }) } "is_visible" => { let selector = args .get("selector") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for is_visible"))?; Ok(BrowserAction::IsVisible { selector: selector.into(), }) } "close" => Ok(BrowserAction::Close), "find" => { let by = args .get("by") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'by' for find"))?; let value = args .get("value") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'value' for find"))?; let action = args .get("find_action") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing 'find_action' for find"))?; Ok(BrowserAction::Find { by: by.into(), value: value.into(), action: action.into(), fill_value: args .get("fill_value") .and_then(|v| v.as_str()) .map(String::from), }) } other => anyhow::bail!("Unsupported browser action: {other}"), } } // ── Helper functions ───────────────────────────────────────────── fn is_supported_browser_action(action: &str) -> bool { matches!( action, "open" | "snapshot" | "click" | "fill" | "type" | "get_text" | "get_title" | "get_url" | "screenshot" | "wait" | "press" | "hover" | "scroll" | "is_visible" | "close" | "find" | "mouse_move" | "mouse_click" | "mouse_drag" | "key_type" | "key_press" | "screen_capture" ) } fn is_computer_use_only_action(action: &str) -> bool { matches!( action, "mouse_move" | "mouse_click" | "mouse_drag" | "key_type" | "key_press" | "screen_capture" ) } fn backend_name(backend: ResolvedBackend) -> &'static str { match backend { ResolvedBackend::AgentBrowser => "agent_browser", ResolvedBackend::RustNative => "rust_native", ResolvedBackend::ComputerUse => "computer_use", } } fn unavailable_action_for_backend_error(action: &str, backend: ResolvedBackend) -> String { format!( "Action '{action}' is unavailable for backend '{}'", backend_name(backend) ) } fn normalize_domains(domains: Vec) -> Vec { domains .into_iter() .map(|d| d.trim().to_lowercase()) .filter(|d| !d.is_empty()) .collect() } fn endpoint_reachable(endpoint: &reqwest::Url, timeout: Duration) -> bool { let host = match endpoint.host_str() { Some(host) if !host.is_empty() => host, _ => return false, }; let port = match endpoint.port_or_known_default() { Some(port) => port, None => return false, }; let mut addrs = match (host, port).to_socket_addrs() { Ok(addrs) => addrs, Err(_) => return false, }; let addr = match addrs.next() { Some(addr) => addr, None => return false, }; std::net::TcpStream::connect_timeout(&addr, timeout).is_ok() } fn extract_host(url_str: &str) -> anyhow::Result { // Simple host extraction without url crate let url = url_str.trim(); let without_scheme = url .strip_prefix("https://") .or_else(|| url.strip_prefix("http://")) .or_else(|| url.strip_prefix("file://")) .unwrap_or(url); // Extract host — handle bracketed IPv6 addresses like [::1]:8080 let authority = without_scheme.split('/').next().unwrap_or(without_scheme); let host = if authority.starts_with('[') { // IPv6: take everything up to and including the closing ']' authority.find(']').map_or(authority, |i| &authority[..=i]) } else { // IPv4 or hostname: take everything before the port separator authority.split(':').next().unwrap_or(authority) }; if host.is_empty() { anyhow::bail!("Invalid URL: no host"); } Ok(host.to_lowercase()) } fn is_private_host(host: &str) -> bool { // Strip brackets from IPv6 addresses like [::1] let bare = host .strip_prefix('[') .and_then(|h| h.strip_suffix(']')) .unwrap_or(host); if bare == "localhost" || bare.ends_with(".localhost") { return true; } // .local TLD (mDNS) if bare .rsplit('.') .next() .is_some_and(|label| label == "local") { return true; } // Parse as IP address to catch all representations (decimal, hex, octal, mapped) if let Ok(ip) = bare.parse::() { return match ip { std::net::IpAddr::V4(v4) => is_non_global_v4(v4), std::net::IpAddr::V6(v6) => is_non_global_v6(v6), }; } false } /// Returns `true` for any IPv4 address that is not globally routable. fn is_non_global_v4(v4: std::net::Ipv4Addr) -> bool { let [a, b, _, _] = v4.octets(); v4.is_loopback() || v4.is_private() || v4.is_link_local() || v4.is_unspecified() || v4.is_broadcast() || v4.is_multicast() // Shared address space (100.64/10) || (a == 100 && (64..=127).contains(&b)) // Reserved (240.0.0.0/4) || a >= 240 // Documentation (192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24) || (a == 192 && b == 0) || (a == 198 && b == 51) || (a == 203 && b == 0) // Benchmarking (198.18.0.0/15) || (a == 198 && (18..=19).contains(&b)) } /// Returns `true` for any IPv6 address that is not globally routable. fn is_non_global_v6(v6: std::net::Ipv6Addr) -> bool { let segs = v6.segments(); v6.is_loopback() || v6.is_unspecified() || v6.is_multicast() // Unique-local (fc00::/7) — IPv6 equivalent of RFC 1918 || (segs[0] & 0xfe00) == 0xfc00 // Link-local (fe80::/10) || (segs[0] & 0xffc0) == 0xfe80 // IPv4-mapped addresses || v6.to_ipv4_mapped().is_some_and(is_non_global_v4) } fn host_matches_allowlist(host: &str, allowed: &[String]) -> bool { allowed.iter().any(|pattern| { if pattern == "*" { return true; } if pattern.starts_with("*.") { // Wildcard subdomain match let suffix = &pattern[1..]; // ".example.com" host.ends_with(suffix) || host == &pattern[2..] } else { // Exact match or subdomain host == pattern || host.ends_with(&format!(".{pattern}")) } }) } #[cfg(test)] mod tests { use super::*; #[test] fn normalize_domains_works() { let domains = vec![ " Example.COM ".into(), "docs.example.com".into(), String::new(), ]; let normalized = normalize_domains(domains); assert_eq!(normalized, vec!["example.com", "docs.example.com"]); } #[test] fn extract_host_works() { assert_eq!( extract_host("https://example.com/path").unwrap(), "example.com" ); assert_eq!( extract_host("https://Sub.Example.COM:8080/").unwrap(), "sub.example.com" ); } #[test] fn extract_host_handles_ipv6() { // IPv6 with brackets (required for URLs with ports) assert_eq!(extract_host("https://[::1]/path").unwrap(), "[::1]"); // IPv6 with brackets and port assert_eq!( extract_host("https://[2001:db8::1]:8080/path").unwrap(), "[2001:db8::1]" ); // IPv6 with brackets, trailing slash assert_eq!(extract_host("https://[fe80::1]/").unwrap(), "[fe80::1]"); } #[test] fn is_private_host_detects_local() { assert!(is_private_host("localhost")); assert!(is_private_host("app.localhost")); assert!(is_private_host("printer.local")); assert!(is_private_host("127.0.0.1")); assert!(is_private_host("192.168.1.1")); assert!(is_private_host("10.0.0.1")); assert!(!is_private_host("example.com")); assert!(!is_private_host("google.com")); } #[test] fn is_private_host_blocks_multicast_and_reserved() { assert!(is_private_host("224.0.0.1")); // multicast assert!(is_private_host("255.255.255.255")); // broadcast assert!(is_private_host("100.64.0.1")); // shared address space assert!(is_private_host("240.0.0.1")); // reserved assert!(is_private_host("192.0.2.1")); // documentation assert!(is_private_host("198.51.100.1")); // documentation assert!(is_private_host("203.0.113.1")); // documentation assert!(is_private_host("198.18.0.1")); // benchmarking } #[test] fn is_private_host_catches_ipv6() { assert!(is_private_host("::1")); assert!(is_private_host("[::1]")); assert!(is_private_host("0.0.0.0")); } #[test] fn is_private_host_catches_mapped_ipv4() { // IPv4-mapped IPv6 addresses assert!(is_private_host("::ffff:127.0.0.1")); assert!(is_private_host("::ffff:10.0.0.1")); assert!(is_private_host("::ffff:192.168.1.1")); } #[test] fn is_private_host_catches_ipv6_private_ranges() { // Unique-local (fc00::/7) assert!(is_private_host("fd00::1")); assert!(is_private_host("fc00::1")); // Link-local (fe80::/10) assert!(is_private_host("fe80::1")); // Public IPv6 should pass assert!(!is_private_host("2001:db8::1")); } #[test] fn validate_url_blocks_ipv6_ssrf() { let security = Arc::new(SecurityPolicy::default()); let tool = BrowserTool::new(security, vec!["*".into()], None); assert!(tool.validate_url("https://[::1]/").is_err()); assert!(tool.validate_url("https://[::ffff:127.0.0.1]/").is_err()); assert!(tool .validate_url("https://[::ffff:10.0.0.1]:8080/") .is_err()); } #[test] fn host_matches_allowlist_exact() { let allowed = vec!["example.com".into()]; assert!(host_matches_allowlist("example.com", &allowed)); assert!(host_matches_allowlist("sub.example.com", &allowed)); assert!(!host_matches_allowlist("notexample.com", &allowed)); } #[test] fn host_matches_allowlist_wildcard() { let allowed = vec!["*.example.com".into()]; assert!(host_matches_allowlist("sub.example.com", &allowed)); assert!(host_matches_allowlist("example.com", &allowed)); assert!(!host_matches_allowlist("other.com", &allowed)); } #[test] fn host_matches_allowlist_star() { let allowed = vec!["*".into()]; assert!(host_matches_allowlist("anything.com", &allowed)); assert!(host_matches_allowlist("example.org", &allowed)); } #[test] fn browser_backend_parser_accepts_supported_values() { assert_eq!( BrowserBackendKind::parse("agent_browser").unwrap(), BrowserBackendKind::AgentBrowser ); assert_eq!( BrowserBackendKind::parse("rust-native").unwrap(), BrowserBackendKind::RustNative ); assert_eq!( BrowserBackendKind::parse("computer_use").unwrap(), BrowserBackendKind::ComputerUse ); assert_eq!( BrowserBackendKind::parse("auto").unwrap(), BrowserBackendKind::Auto ); } #[test] fn browser_backend_parser_rejects_unknown_values() { assert!(BrowserBackendKind::parse("playwright").is_err()); } #[test] fn browser_tool_default_backend_is_agent_browser() { let security = Arc::new(SecurityPolicy::default()); let tool = BrowserTool::new(security, vec!["example.com".into()], None); assert_eq!( tool.configured_backend().unwrap(), BrowserBackendKind::AgentBrowser ); } #[test] fn browser_tool_accepts_auto_backend_config() { let security = Arc::new(SecurityPolicy::default()); let tool = BrowserTool::new_with_backend( security, vec!["example.com".into()], None, "auto".into(), true, "http://127.0.0.1:9515".into(), None, ComputerUseConfig::default(), ); assert_eq!(tool.configured_backend().unwrap(), BrowserBackendKind::Auto); } #[test] fn browser_tool_accepts_computer_use_backend_config() { let security = Arc::new(SecurityPolicy::default()); let tool = BrowserTool::new_with_backend( security, vec!["example.com".into()], None, "computer_use".into(), true, "http://127.0.0.1:9515".into(), None, ComputerUseConfig::default(), ); assert_eq!( tool.configured_backend().unwrap(), BrowserBackendKind::ComputerUse ); } #[test] fn computer_use_endpoint_rejects_public_http_by_default() { let security = Arc::new(SecurityPolicy::default()); let tool = BrowserTool::new_with_backend( security, vec!["example.com".into()], None, "computer_use".into(), true, "http://127.0.0.1:9515".into(), None, ComputerUseConfig { endpoint: "http://computer-use.example.com/v1/actions".into(), ..ComputerUseConfig::default() }, ); assert!(tool.computer_use_endpoint_url().is_err()); } #[test] fn computer_use_endpoint_requires_https_for_public_remote() { let security = Arc::new(SecurityPolicy::default()); let tool = BrowserTool::new_with_backend( security, vec!["example.com".into()], None, "computer_use".into(), true, "http://127.0.0.1:9515".into(), None, ComputerUseConfig { endpoint: "https://computer-use.example.com/v1/actions".into(), allow_remote_endpoint: true, ..ComputerUseConfig::default() }, ); assert!(tool.computer_use_endpoint_url().is_ok()); } #[test] fn computer_use_coordinate_validation_applies_limits() { let security = Arc::new(SecurityPolicy::default()); let tool = BrowserTool::new_with_backend( security, vec!["example.com".into()], None, "computer_use".into(), true, "http://127.0.0.1:9515".into(), None, ComputerUseConfig { max_coordinate_x: Some(100), max_coordinate_y: Some(100), ..ComputerUseConfig::default() }, ); assert!(tool .validate_coordinate("x", 50, tool.computer_use.max_coordinate_x) .is_ok()); assert!(tool .validate_coordinate("x", 101, tool.computer_use.max_coordinate_x) .is_err()); assert!(tool .validate_coordinate("y", -1, tool.computer_use.max_coordinate_y) .is_err()); } #[test] fn browser_tool_name() { let security = Arc::new(SecurityPolicy::default()); let tool = BrowserTool::new(security, vec!["example.com".into()], None); assert_eq!(tool.name(), "browser"); } #[test] fn browser_tool_validates_url() { let security = Arc::new(SecurityPolicy::default()); let tool = BrowserTool::new(security, vec!["example.com".into()], None); // Valid assert!(tool.validate_url("https://example.com").is_ok()); assert!(tool.validate_url("https://sub.example.com/path").is_ok()); // Invalid - not in allowlist assert!(tool.validate_url("https://other.com").is_err()); // Invalid - private host assert!(tool.validate_url("https://localhost").is_err()); assert!(tool.validate_url("https://127.0.0.1").is_err()); // Invalid - not https assert!(tool.validate_url("ftp://example.com").is_err()); // file:// URLs blocked (local file exfiltration risk) assert!(tool.validate_url("file:///tmp/test.html").is_err()); } #[test] fn browser_tool_empty_allowlist_blocks() { let security = Arc::new(SecurityPolicy::default()); let tool = BrowserTool::new(security, vec![], None); assert!(tool.validate_url("https://example.com").is_err()); } #[test] fn computer_use_only_action_detection_is_correct() { assert!(is_computer_use_only_action("mouse_move")); assert!(is_computer_use_only_action("mouse_click")); assert!(is_computer_use_only_action("mouse_drag")); assert!(is_computer_use_only_action("key_type")); assert!(is_computer_use_only_action("key_press")); assert!(is_computer_use_only_action("screen_capture")); assert!(!is_computer_use_only_action("open")); assert!(!is_computer_use_only_action("snapshot")); } #[test] fn unavailable_action_error_preserves_backend_context() { assert_eq!( unavailable_action_for_backend_error("mouse_move", ResolvedBackend::AgentBrowser), "Action 'mouse_move' is unavailable for backend 'agent_browser'" ); assert_eq!( unavailable_action_for_backend_error("mouse_move", ResolvedBackend::RustNative), "Action 'mouse_move' is unavailable for backend 'rust_native'" ); } }