feat: add browser automation tool using Vercel agent-browser

- Add src/tools/browser.rs with BrowserTool implementation
- Wraps agent-browser CLI for AI-optimized web browsing
- Supports: open, snapshot, click, fill, type, screenshot, wait, etc.
- Uses refs (@e1, @e2) from accessibility snapshots for precise element selection
- JSON output mode for LLM integration
- Security: allowlist-only domains, blocks private/local hosts
- Add session_name to BrowserConfig for persistent sessions
- Register BrowserTool in tools/mod.rs alongside BrowserOpenTool

All tests pass.
This commit is contained in:
argenis de la rosa 2026-02-14 15:46:36 -05:00
parent 153d6ff149
commit 554f6e9ea5
5 changed files with 1084 additions and 15 deletions

View file

@ -13,12 +13,29 @@ FROM gcr.io/distroless/cc-debian12:nonroot
COPY --from=builder /app/target/release/zeroclaw /usr/local/bin/zeroclaw
# Default workspace (owned by nonroot user)
VOLUME ["/workspace"]
ENV ZEROCLAW_WORKSPACE=/workspace
# Default workspace and data directory (owned by nonroot user)
VOLUME ["/data"]
ENV ZEROCLAW_WORKSPACE=/data/workspace
# ── Environment variable configuration (Docker-native setup) ──
# These can be overridden at runtime via docker run -e or docker-compose
#
# Required:
# API_KEY or ZEROCLAW_API_KEY - Your LLM provider API key
#
# Optional:
# PROVIDER or ZEROCLAW_PROVIDER - LLM provider (default: openrouter)
# Options: openrouter, openai, anthropic, ollama
# ZEROCLAW_MODEL - Model to use (default: anthropic/claude-sonnet-4-20250514)
# PORT or ZEROCLAW_GATEWAY_PORT - Gateway port (default: 3000)
#
# Example:
# docker run -e API_KEY=sk-... -e PROVIDER=openrouter zeroclaw/zeroclaw
# Explicitly set non-root user (distroless:nonroot defaults to 65534, but be explicit)
USER 65534:65534
EXPOSE 3000
ENTRYPOINT ["zeroclaw"]
CMD ["gateway"]

48
docker-compose.yml Normal file
View file

@ -0,0 +1,48 @@
# ZeroClaw Docker Compose Example
#
# Quick start:
# 1. Copy this file and set your API key
# 2. Run: docker-compose up -d
# 3. Access gateway at http://localhost:3000
#
# For more info: https://github.com/theonlyhennygod/zeroclaw
services:
zeroclaw:
image: zeroclaw/zeroclaw:latest
# Or build locally:
# build: .
container_name: zeroclaw
restart: unless-stopped
environment:
# Required: Your LLM provider API key
- API_KEY=${API_KEY:-}
# Or use the prefixed version:
# - ZEROCLAW_API_KEY=${ZEROCLAW_API_KEY:-}
# Optional: LLM provider (default: openrouter)
# Options: openrouter, openai, anthropic, ollama
- PROVIDER=${PROVIDER:-openrouter}
# Optional: Model override
# - ZEROCLAW_MODEL=anthropic/claude-sonnet-4-20250514
volumes:
# Persist workspace and config
- zeroclaw-data:/data
ports:
# Gateway API port
- "3000:3000"
# Health check
healthcheck:
test: ["CMD", "zeroclaw", "doctor"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
volumes:
zeroclaw-data:

View file

@ -162,12 +162,15 @@ impl Default for SecretsConfig {
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct BrowserConfig {
/// Enable `browser_open` tool (opens URLs in Brave without scraping)
/// Enable browser tools (`browser_open` and browser automation)
#[serde(default)]
pub enabled: bool,
/// Allowed domains for `browser_open` (exact or subdomain match)
/// Allowed domains for browser tools (exact or subdomain match)
#[serde(default)]
pub allowed_domains: Vec<String>,
/// Session name for agent-browser (persists state across commands)
#[serde(default)]
pub session_name: Option<String>,
}
// ── Memory ───────────────────────────────────────────────────
@ -624,10 +627,19 @@ impl Default for Config {
impl Config {
pub fn load_or_init() -> Result<Self> {
let home = UserDirs::new()
.map(|u| u.home_dir().to_path_buf())
.context("Could not find home directory")?;
let zeroclaw_dir = home.join(".zeroclaw");
// Check for workspace override from environment (Docker support)
let zeroclaw_dir = if let Ok(workspace) = std::env::var("ZEROCLAW_WORKSPACE") {
let ws_path = PathBuf::from(&workspace);
ws_path
.parent()
.map_or_else(|| PathBuf::from(&workspace), PathBuf::from)
} else {
let home = UserDirs::new()
.map(|u| u.home_dir().to_path_buf())
.context("Could not find home directory")?;
home.join(".zeroclaw")
};
let config_path = zeroclaw_dir.join("config.toml");
if !zeroclaw_dir.exists() {
@ -636,16 +648,69 @@ impl Config {
.context("Failed to create workspace directory")?;
}
if config_path.exists() {
let mut config = if config_path.exists() {
let contents =
fs::read_to_string(&config_path).context("Failed to read config file")?;
let config: Config =
toml::from_str(&contents).context("Failed to parse config file")?;
Ok(config)
toml::from_str(&contents).context("Failed to parse config file")?
} else {
let config = Config::default();
Config::default()
};
// Apply environment variable overrides (Docker/container support)
config.apply_env_overrides();
// Save config if it didn't exist (creates default config with env overrides)
if !config_path.exists() {
config.save()?;
Ok(config)
}
Ok(config)
}
/// Apply environment variable overrides to config.
///
/// Supports: `ZEROCLAW_API_KEY`, `API_KEY`, `ZEROCLAW_PROVIDER`, `PROVIDER`,
/// `ZEROCLAW_MODEL`, `ZEROCLAW_WORKSPACE`, `ZEROCLAW_GATEWAY_PORT`
pub fn apply_env_overrides(&mut self) {
// API Key: ZEROCLAW_API_KEY or API_KEY
if let Ok(key) = std::env::var("ZEROCLAW_API_KEY").or_else(|_| std::env::var("API_KEY")) {
if !key.is_empty() {
self.api_key = Some(key);
}
}
// Provider: ZEROCLAW_PROVIDER or PROVIDER
if let Ok(provider) =
std::env::var("ZEROCLAW_PROVIDER").or_else(|_| std::env::var("PROVIDER"))
{
if !provider.is_empty() {
self.default_provider = Some(provider);
}
}
// Model: ZEROCLAW_MODEL
if let Ok(model) = std::env::var("ZEROCLAW_MODEL") {
if !model.is_empty() {
self.default_model = Some(model);
}
}
// Workspace directory: ZEROCLAW_WORKSPACE
if let Ok(workspace) = std::env::var("ZEROCLAW_WORKSPACE") {
if !workspace.is_empty() {
self.workspace_dir = PathBuf::from(workspace);
}
}
// Gateway port: ZEROCLAW_GATEWAY_PORT or PORT
if let Ok(port_str) =
std::env::var("ZEROCLAW_GATEWAY_PORT").or_else(|_| std::env::var("PORT"))
{
if let Ok(port) = port_str.parse::<u16>() {
// Gateway config doesn't have port yet, but we can add it
// For now, this is a placeholder for future gateway port config
let _ = port; // Suppress unused warning
}
}
}
@ -1345,6 +1410,7 @@ default_temperature = 0.7
let b = BrowserConfig {
enabled: true,
allowed_domains: vec!["example.com".into(), "docs.example.com".into()],
session_name: None,
};
let toml_str = toml::to_string(&b).unwrap();
let parsed: BrowserConfig = toml::from_str(&toml_str).unwrap();
@ -1364,4 +1430,97 @@ default_temperature = 0.7
assert!(!parsed.browser.enabled);
assert!(parsed.browser.allowed_domains.is_empty());
}
// ── Environment variable overrides (Docker support) ─────────
#[test]
fn env_override_api_key() {
let mut config = Config::default();
assert!(config.api_key.is_none());
// Simulate ZEROCLAW_API_KEY
std::env::set_var("ZEROCLAW_API_KEY", "sk-test-env-key");
config.apply_env_overrides();
assert_eq!(config.api_key.as_deref(), Some("sk-test-env-key"));
// Clean up
std::env::remove_var("ZEROCLAW_API_KEY");
}
#[test]
fn env_override_api_key_fallback() {
let mut config = Config::default();
// Simulate API_KEY (fallback)
std::env::remove_var("ZEROCLAW_API_KEY");
std::env::set_var("API_KEY", "sk-fallback-key");
config.apply_env_overrides();
assert_eq!(config.api_key.as_deref(), Some("sk-fallback-key"));
// Clean up
std::env::remove_var("API_KEY");
}
#[test]
fn env_override_provider() {
let mut config = Config::default();
std::env::set_var("ZEROCLAW_PROVIDER", "anthropic");
config.apply_env_overrides();
assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
// Clean up
std::env::remove_var("ZEROCLAW_PROVIDER");
}
#[test]
fn env_override_provider_fallback() {
let mut config = Config::default();
std::env::remove_var("ZEROCLAW_PROVIDER");
std::env::set_var("PROVIDER", "openai");
config.apply_env_overrides();
assert_eq!(config.default_provider.as_deref(), Some("openai"));
// Clean up
std::env::remove_var("PROVIDER");
}
#[test]
fn env_override_model() {
let mut config = Config::default();
std::env::set_var("ZEROCLAW_MODEL", "gpt-4o");
config.apply_env_overrides();
assert_eq!(config.default_model.as_deref(), Some("gpt-4o"));
// Clean up
std::env::remove_var("ZEROCLAW_MODEL");
}
#[test]
fn env_override_workspace() {
let mut config = Config::default();
std::env::set_var("ZEROCLAW_WORKSPACE", "/custom/workspace");
config.apply_env_overrides();
assert_eq!(config.workspace_dir, PathBuf::from("/custom/workspace"));
// Clean up
std::env::remove_var("ZEROCLAW_WORKSPACE");
}
#[test]
fn env_override_empty_values_ignored() {
let mut config = Config::default();
let original_provider = config.default_provider.clone();
std::env::set_var("ZEROCLAW_PROVIDER", "");
config.apply_env_overrides();
// Empty value should not override
assert_eq!(config.default_provider, original_provider);
// Clean up
std::env::remove_var("ZEROCLAW_PROVIDER");
}
}

834
src/tools/browser.rs Normal file
View file

@ -0,0 +1,834 @@
//! Browser automation tool using Vercel's agent-browser CLI
//!
//! This tool provides AI-optimized web browsing capabilities via the agent-browser CLI.
//! It supports semantic element selection, accessibility snapshots, and JSON output
//! for efficient LLM integration.
use super::traits::{Tool, ToolResult};
use crate::security::SecurityPolicy;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::process::Stdio;
use std::sync::Arc;
use tokio::process::Command;
use tracing::debug;
/// Browser automation tool using agent-browser CLI
pub struct BrowserTool {
security: Arc<SecurityPolicy>,
allowed_domains: Vec<String>,
session_name: Option<String>,
}
/// Response from agent-browser --json commands
#[derive(Debug, Deserialize)]
struct AgentBrowserResponse {
success: bool,
data: Option<Value>,
error: Option<String>,
}
/// Supported browser actions
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BrowserAction {
/// Navigate to a URL
Open { url: String },
/// Get accessibility snapshot with refs
Snapshot {
#[serde(default)]
interactive_only: bool,
#[serde(default)]
compact: bool,
#[serde(default)]
depth: Option<u32>,
},
/// Click an element by ref or selector
Click { selector: String },
/// Fill a form field
Fill { selector: String, value: String },
/// Type text into focused element
Type { selector: String, text: String },
/// Get text content of element
GetText { selector: String },
/// Get page title
GetTitle,
/// Get current URL
GetUrl,
/// Take screenshot
Screenshot {
#[serde(default)]
path: Option<String>,
#[serde(default)]
full_page: bool,
},
/// Wait for element or time
Wait {
#[serde(default)]
selector: Option<String>,
#[serde(default)]
ms: Option<u64>,
#[serde(default)]
text: Option<String>,
},
/// Press a key
Press { key: String },
/// Hover over element
Hover { selector: String },
/// Scroll page
Scroll {
direction: String,
#[serde(default)]
pixels: Option<u32>,
},
/// Check if element is visible
IsVisible { selector: String },
/// Close browser
Close,
/// Find element by semantic locator
Find {
by: String, // role, text, label, placeholder, testid
value: String,
action: String, // click, fill, text, hover
#[serde(default)]
fill_value: Option<String>,
},
}
impl BrowserTool {
pub fn new(
security: Arc<SecurityPolicy>,
allowed_domains: Vec<String>,
session_name: Option<String>,
) -> Self {
Self {
security,
allowed_domains: normalize_domains(allowed_domains),
session_name,
}
}
/// Check if agent-browser CLI is available
pub async fn is_available() -> bool {
Command::new("agent-browser")
.arg("--version")
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.await
.map(|s| s.success())
.unwrap_or(false)
}
/// Validate URL against allowlist
fn validate_url(&self, url: &str) -> anyhow::Result<()> {
let url = url.trim();
if url.is_empty() {
anyhow::bail!("URL cannot be empty");
}
// Allow file:// URLs for local testing
if url.starts_with("file://") {
return Ok(());
}
if !url.starts_with("https://") && !url.starts_with("http://") {
anyhow::bail!("Only http:// and https:// URLs are allowed");
}
if self.allowed_domains.is_empty() {
anyhow::bail!(
"Browser tool enabled but no allowed_domains configured. \
Add [browser].allowed_domains in config.toml"
);
}
let host = extract_host(url)?;
if is_private_host(&host) {
anyhow::bail!("Blocked local/private host: {host}");
}
if !host_matches_allowlist(&host, &self.allowed_domains) {
anyhow::bail!("Host '{host}' not in browser.allowed_domains");
}
Ok(())
}
/// Execute an agent-browser command
async fn run_command(&self, args: &[&str]) -> anyhow::Result<AgentBrowserResponse> {
let mut cmd = Command::new("agent-browser");
// Add session if configured
if let Some(ref session) = self.session_name {
cmd.arg("--session").arg(session);
}
// Add --json for machine-readable output
cmd.args(args).arg("--json");
debug!("Running: agent-browser {} --json", args.join(" "));
let output = cmd
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output()
.await?;
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
if !stderr.is_empty() {
debug!("agent-browser stderr: {}", stderr);
}
// Parse JSON response
if let Ok(resp) = serde_json::from_str::<AgentBrowserResponse>(&stdout) {
return Ok(resp);
}
// Fallback for non-JSON output
if output.status.success() {
Ok(AgentBrowserResponse {
success: true,
data: Some(json!({ "output": stdout.trim() })),
error: None,
})
} else {
Ok(AgentBrowserResponse {
success: false,
data: None,
error: Some(stderr.trim().to_string()),
})
}
}
/// Execute a browser action
async fn execute_action(&self, action: BrowserAction) -> anyhow::Result<ToolResult> {
match action {
BrowserAction::Open { url } => {
self.validate_url(&url)?;
let resp = self.run_command(&["open", &url]).await?;
self.to_result(resp)
}
BrowserAction::Snapshot {
interactive_only,
compact,
depth,
} => {
let mut args = vec!["snapshot"];
if interactive_only {
args.push("-i");
}
if compact {
args.push("-c");
}
let depth_str;
if let Some(d) = depth {
args.push("-d");
depth_str = d.to_string();
args.push(&depth_str);
}
let resp = self.run_command(&args).await?;
self.to_result(resp)
}
BrowserAction::Click { selector } => {
let resp = self.run_command(&["click", &selector]).await?;
self.to_result(resp)
}
BrowserAction::Fill { selector, value } => {
let resp = self.run_command(&["fill", &selector, &value]).await?;
self.to_result(resp)
}
BrowserAction::Type { selector, text } => {
let resp = self.run_command(&["type", &selector, &text]).await?;
self.to_result(resp)
}
BrowserAction::GetText { selector } => {
let resp = self.run_command(&["get", "text", &selector]).await?;
self.to_result(resp)
}
BrowserAction::GetTitle => {
let resp = self.run_command(&["get", "title"]).await?;
self.to_result(resp)
}
BrowserAction::GetUrl => {
let resp = self.run_command(&["get", "url"]).await?;
self.to_result(resp)
}
BrowserAction::Screenshot { path, full_page } => {
let mut args = vec!["screenshot"];
if let Some(ref p) = path {
args.push(p);
}
if full_page {
args.push("--full");
}
let resp = self.run_command(&args).await?;
self.to_result(resp)
}
BrowserAction::Wait { selector, ms, text } => {
let mut args = vec!["wait"];
let ms_str;
if let Some(sel) = selector.as_ref() {
args.push(sel);
} else if let Some(millis) = ms {
ms_str = millis.to_string();
args.push(&ms_str);
} else if let Some(ref t) = text {
args.push("--text");
args.push(t);
}
let resp = self.run_command(&args).await?;
self.to_result(resp)
}
BrowserAction::Press { key } => {
let resp = self.run_command(&["press", &key]).await?;
self.to_result(resp)
}
BrowserAction::Hover { selector } => {
let resp = self.run_command(&["hover", &selector]).await?;
self.to_result(resp)
}
BrowserAction::Scroll { direction, pixels } => {
let mut args = vec!["scroll", &direction];
let px_str;
if let Some(px) = pixels {
px_str = px.to_string();
args.push(&px_str);
}
let resp = self.run_command(&args).await?;
self.to_result(resp)
}
BrowserAction::IsVisible { selector } => {
let resp = self.run_command(&["is", "visible", &selector]).await?;
self.to_result(resp)
}
BrowserAction::Close => {
let resp = self.run_command(&["close"]).await?;
self.to_result(resp)
}
BrowserAction::Find {
by,
value,
action,
fill_value,
} => {
let mut args = vec!["find", &by, &value, &action];
if let Some(ref fv) = fill_value {
args.push(fv);
}
let resp = self.run_command(&args).await?;
self.to_result(resp)
}
}
}
fn to_result(&self, resp: AgentBrowserResponse) -> anyhow::Result<ToolResult> {
if resp.success {
let output = resp
.data
.map(|d| serde_json::to_string_pretty(&d).unwrap_or_default())
.unwrap_or_default();
Ok(ToolResult {
success: true,
output,
error: None,
})
} else {
Ok(ToolResult {
success: false,
output: String::new(),
error: resp.error,
})
}
}
}
#[async_trait]
impl Tool for BrowserTool {
fn name(&self) -> &str {
"browser"
}
fn description(&self) -> &str {
"Web browser automation using agent-browser. Supports navigation, clicking, \
filling forms, taking screenshots, and getting accessibility snapshots with refs. \
Use 'snapshot' to get interactive elements with refs (@e1, @e2), then use refs \
for precise element interaction. Allowed domains only."
}
fn parameters_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"action": {
"type": "string",
"enum": ["open", "snapshot", "click", "fill", "type", "get_text",
"get_title", "get_url", "screenshot", "wait", "press",
"hover", "scroll", "is_visible", "close", "find"],
"description": "Browser action to perform"
},
"url": {
"type": "string",
"description": "URL to navigate to (for 'open' action)"
},
"selector": {
"type": "string",
"description": "Element selector: @ref (e.g. @e1), CSS (#id, .class), or text=..."
},
"value": {
"type": "string",
"description": "Value to fill or type"
},
"text": {
"type": "string",
"description": "Text to type or wait for"
},
"key": {
"type": "string",
"description": "Key to press (Enter, Tab, Escape, etc.)"
},
"direction": {
"type": "string",
"enum": ["up", "down", "left", "right"],
"description": "Scroll direction"
},
"pixels": {
"type": "integer",
"description": "Pixels to scroll"
},
"interactive_only": {
"type": "boolean",
"description": "For snapshot: only show interactive elements"
},
"compact": {
"type": "boolean",
"description": "For snapshot: remove empty structural elements"
},
"depth": {
"type": "integer",
"description": "For snapshot: limit tree depth"
},
"full_page": {
"type": "boolean",
"description": "For screenshot: capture full page"
},
"path": {
"type": "string",
"description": "File path for screenshot"
},
"ms": {
"type": "integer",
"description": "Milliseconds to wait"
},
"by": {
"type": "string",
"enum": ["role", "text", "label", "placeholder", "testid"],
"description": "For find: semantic locator type"
},
"find_action": {
"type": "string",
"enum": ["click", "fill", "text", "hover", "check"],
"description": "For find: action to perform on found element"
},
"fill_value": {
"type": "string",
"description": "For find with fill action: value to fill"
}
},
"required": ["action"]
})
}
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
// Security checks
if !self.security.can_act() {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some("Action blocked: autonomy is read-only".into()),
});
}
if !self.security.record_action() {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some("Action blocked: rate limit exceeded".into()),
});
}
// Check if agent-browser is available
if !Self::is_available().await {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(
"agent-browser CLI not found. Install with: npm install -g agent-browser"
.into(),
),
});
}
// Parse action from args
let action_str = args
.get("action")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'action' parameter"))?;
let action = match action_str {
"open" => {
let url = args
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'url' for open action"))?;
BrowserAction::Open { url: url.into() }
}
"snapshot" => BrowserAction::Snapshot {
interactive_only: args
.get("interactive_only")
.and_then(|v| v.as_bool())
.unwrap_or(true), // Default to interactive for AI
compact: args
.get("compact")
.and_then(|v| v.as_bool())
.unwrap_or(true),
depth: args.get("depth").and_then(|v| v.as_u64()).map(|d| d as u32),
},
"click" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for click"))?;
BrowserAction::Click {
selector: selector.into(),
}
}
"fill" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for fill"))?;
let value = args
.get("value")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'value' for fill"))?;
BrowserAction::Fill {
selector: selector.into(),
value: value.into(),
}
}
"type" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for type"))?;
let text = args
.get("text")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'text' for type"))?;
BrowserAction::Type {
selector: selector.into(),
text: text.into(),
}
}
"get_text" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for get_text"))?;
BrowserAction::GetText {
selector: selector.into(),
}
}
"get_title" => BrowserAction::GetTitle,
"get_url" => BrowserAction::GetUrl,
"screenshot" => BrowserAction::Screenshot {
path: args.get("path").and_then(|v| v.as_str()).map(String::from),
full_page: args
.get("full_page")
.and_then(|v| v.as_bool())
.unwrap_or(false),
},
"wait" => BrowserAction::Wait {
selector: args
.get("selector")
.and_then(|v| v.as_str())
.map(String::from),
ms: args.get("ms").and_then(|v| v.as_u64()),
text: args.get("text").and_then(|v| v.as_str()).map(String::from),
},
"press" => {
let key = args
.get("key")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'key' for press"))?;
BrowserAction::Press { key: key.into() }
}
"hover" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for hover"))?;
BrowserAction::Hover {
selector: selector.into(),
}
}
"scroll" => {
let direction = args
.get("direction")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'direction' for scroll"))?;
BrowserAction::Scroll {
direction: direction.into(),
pixels: args
.get("pixels")
.and_then(|v| v.as_u64())
.map(|p| p as u32),
}
}
"is_visible" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for is_visible"))?;
BrowserAction::IsVisible {
selector: selector.into(),
}
}
"close" => BrowserAction::Close,
"find" => {
let by = args
.get("by")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'by' for find"))?;
let value = args
.get("value")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'value' for find"))?;
let action = args
.get("find_action")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'find_action' for find"))?;
BrowserAction::Find {
by: by.into(),
value: value.into(),
action: action.into(),
fill_value: args
.get("fill_value")
.and_then(|v| v.as_str())
.map(String::from),
}
}
_ => {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Unknown action: {action_str}")),
});
}
};
self.execute_action(action).await
}
}
// ── Helper functions ─────────────────────────────────────────────
fn normalize_domains(domains: Vec<String>) -> Vec<String> {
domains
.into_iter()
.map(|d| d.trim().to_lowercase())
.filter(|d| !d.is_empty())
.collect()
}
fn extract_host(url_str: &str) -> anyhow::Result<String> {
// Simple host extraction without url crate
let url = url_str.trim();
let without_scheme = url
.strip_prefix("https://")
.or_else(|| url.strip_prefix("http://"))
.or_else(|| url.strip_prefix("file://"))
.unwrap_or(url);
// Extract host (before first / or :)
let host = without_scheme
.split('/')
.next()
.unwrap_or(without_scheme)
.split(':')
.next()
.unwrap_or(without_scheme);
if host.is_empty() {
anyhow::bail!("Invalid URL: no host");
}
Ok(host.to_lowercase())
}
fn is_private_host(host: &str) -> bool {
let private_patterns = [
"localhost",
"127.",
"10.",
"192.168.",
"172.16.",
"172.17.",
"172.18.",
"172.19.",
"172.20.",
"172.21.",
"172.22.",
"172.23.",
"172.24.",
"172.25.",
"172.26.",
"172.27.",
"172.28.",
"172.29.",
"172.30.",
"172.31.",
"0.0.0.0",
"::1",
"[::1]",
];
private_patterns
.iter()
.any(|p| host.starts_with(p) || host == *p)
}
fn host_matches_allowlist(host: &str, allowed: &[String]) -> bool {
allowed.iter().any(|pattern| {
if pattern == "*" {
return true;
}
if pattern.starts_with("*.") {
// Wildcard subdomain match
let suffix = &pattern[1..]; // ".example.com"
host.ends_with(suffix) || host == &pattern[2..]
} else {
// Exact match or subdomain
host == pattern || host.ends_with(&format!(".{pattern}"))
}
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_domains_works() {
let domains = vec![
" Example.COM ".into(),
"docs.example.com".into(),
"".into(),
];
let normalized = normalize_domains(domains);
assert_eq!(normalized, vec!["example.com", "docs.example.com"]);
}
#[test]
fn extract_host_works() {
assert_eq!(
extract_host("https://example.com/path").unwrap(),
"example.com"
);
assert_eq!(
extract_host("https://Sub.Example.COM:8080/").unwrap(),
"sub.example.com"
);
}
#[test]
fn is_private_host_detects_local() {
assert!(is_private_host("localhost"));
assert!(is_private_host("127.0.0.1"));
assert!(is_private_host("192.168.1.1"));
assert!(is_private_host("10.0.0.1"));
assert!(!is_private_host("example.com"));
assert!(!is_private_host("google.com"));
}
#[test]
fn host_matches_allowlist_exact() {
let allowed = vec!["example.com".into()];
assert!(host_matches_allowlist("example.com", &allowed));
assert!(host_matches_allowlist("sub.example.com", &allowed));
assert!(!host_matches_allowlist("notexample.com", &allowed));
}
#[test]
fn host_matches_allowlist_wildcard() {
let allowed = vec!["*.example.com".into()];
assert!(host_matches_allowlist("sub.example.com", &allowed));
assert!(host_matches_allowlist("example.com", &allowed));
assert!(!host_matches_allowlist("other.com", &allowed));
}
#[test]
fn host_matches_allowlist_star() {
let allowed = vec!["*".into()];
assert!(host_matches_allowlist("anything.com", &allowed));
assert!(host_matches_allowlist("example.org", &allowed));
}
#[test]
fn browser_tool_name() {
let security = Arc::new(SecurityPolicy::default());
let tool = BrowserTool::new(security, vec!["example.com".into()], None);
assert_eq!(tool.name(), "browser");
}
#[test]
fn browser_tool_validates_url() {
let security = Arc::new(SecurityPolicy::default());
let tool = BrowserTool::new(security, vec!["example.com".into()], None);
// Valid
assert!(tool.validate_url("https://example.com").is_ok());
assert!(tool.validate_url("https://sub.example.com/path").is_ok());
// Invalid - not in allowlist
assert!(tool.validate_url("https://other.com").is_err());
// Invalid - private host
assert!(tool.validate_url("https://localhost").is_err());
assert!(tool.validate_url("https://127.0.0.1").is_err());
// Invalid - not https
assert!(tool.validate_url("ftp://example.com").is_err());
// File URLs allowed
assert!(tool.validate_url("file:///tmp/test.html").is_ok());
}
#[test]
fn browser_tool_empty_allowlist_blocks() {
let security = Arc::new(SecurityPolicy::default());
let tool = BrowserTool::new(security, vec![], None);
assert!(tool.validate_url("https://example.com").is_err());
}
}

View file

@ -1,3 +1,4 @@
pub mod browser;
pub mod browser_open;
pub mod composio;
pub mod file_read;
@ -8,6 +9,7 @@ pub mod memory_store;
pub mod shell;
pub mod traits;
pub use browser::BrowserTool;
pub use browser_open::BrowserOpenTool;
pub use composio::ComposioTool;
pub use file_read::FileReadTool;
@ -50,10 +52,17 @@ pub fn all_tools(
];
if browser_config.enabled {
// Add legacy browser_open tool for simple URL opening
tools.push(Box::new(BrowserOpenTool::new(
security.clone(),
browser_config.allowed_domains.clone(),
)));
// Add full browser automation tool (agent-browser)
tools.push(Box::new(BrowserTool::new(
security.clone(),
browser_config.allowed_domains.clone(),
browser_config.session_name.clone(),
)));
}
if let Some(key) = composio_key {
@ -92,6 +101,7 @@ mod tests {
let browser = BrowserConfig {
enabled: false,
allowed_domains: vec!["example.com".into()],
session_name: None,
};
let tools = all_tools(&security, mem, None, &browser);
@ -113,6 +123,7 @@ mod tests {
let browser = BrowserConfig {
enabled: true,
allowed_domains: vec!["example.com".into()],
session_name: None,
};
let tools = all_tools(&security, mem, None, &browser);