feat: add browser automation tool using Vercel agent-browser
- Add src/tools/browser.rs with BrowserTool implementation - Wraps agent-browser CLI for AI-optimized web browsing - Supports: open, snapshot, click, fill, type, screenshot, wait, etc. - Uses refs (@e1, @e2) from accessibility snapshots for precise element selection - JSON output mode for LLM integration - Security: allowlist-only domains, blocks private/local hosts - Add session_name to BrowserConfig for persistent sessions - Register BrowserTool in tools/mod.rs alongside BrowserOpenTool All tests pass.
This commit is contained in:
parent
153d6ff149
commit
554f6e9ea5
5 changed files with 1084 additions and 15 deletions
23
Dockerfile
23
Dockerfile
|
|
@ -13,12 +13,29 @@ FROM gcr.io/distroless/cc-debian12:nonroot
|
|||
|
||||
COPY --from=builder /app/target/release/zeroclaw /usr/local/bin/zeroclaw
|
||||
|
||||
# Default workspace (owned by nonroot user)
|
||||
VOLUME ["/workspace"]
|
||||
ENV ZEROCLAW_WORKSPACE=/workspace
|
||||
# Default workspace and data directory (owned by nonroot user)
|
||||
VOLUME ["/data"]
|
||||
ENV ZEROCLAW_WORKSPACE=/data/workspace
|
||||
|
||||
# ── Environment variable configuration (Docker-native setup) ──
|
||||
# These can be overridden at runtime via docker run -e or docker-compose
|
||||
#
|
||||
# Required:
|
||||
# API_KEY or ZEROCLAW_API_KEY - Your LLM provider API key
|
||||
#
|
||||
# Optional:
|
||||
# PROVIDER or ZEROCLAW_PROVIDER - LLM provider (default: openrouter)
|
||||
# Options: openrouter, openai, anthropic, ollama
|
||||
# ZEROCLAW_MODEL - Model to use (default: anthropic/claude-sonnet-4-20250514)
|
||||
# PORT or ZEROCLAW_GATEWAY_PORT - Gateway port (default: 3000)
|
||||
#
|
||||
# Example:
|
||||
# docker run -e API_KEY=sk-... -e PROVIDER=openrouter zeroclaw/zeroclaw
|
||||
|
||||
# Explicitly set non-root user (distroless:nonroot defaults to 65534, but be explicit)
|
||||
USER 65534:65534
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
ENTRYPOINT ["zeroclaw"]
|
||||
CMD ["gateway"]
|
||||
|
|
|
|||
48
docker-compose.yml
Normal file
48
docker-compose.yml
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# ZeroClaw Docker Compose Example
|
||||
#
|
||||
# Quick start:
|
||||
# 1. Copy this file and set your API key
|
||||
# 2. Run: docker-compose up -d
|
||||
# 3. Access gateway at http://localhost:3000
|
||||
#
|
||||
# For more info: https://github.com/theonlyhennygod/zeroclaw
|
||||
|
||||
services:
|
||||
zeroclaw:
|
||||
image: zeroclaw/zeroclaw:latest
|
||||
# Or build locally:
|
||||
# build: .
|
||||
container_name: zeroclaw
|
||||
restart: unless-stopped
|
||||
|
||||
environment:
|
||||
# Required: Your LLM provider API key
|
||||
- API_KEY=${API_KEY:-}
|
||||
# Or use the prefixed version:
|
||||
# - ZEROCLAW_API_KEY=${ZEROCLAW_API_KEY:-}
|
||||
|
||||
# Optional: LLM provider (default: openrouter)
|
||||
# Options: openrouter, openai, anthropic, ollama
|
||||
- PROVIDER=${PROVIDER:-openrouter}
|
||||
|
||||
# Optional: Model override
|
||||
# - ZEROCLAW_MODEL=anthropic/claude-sonnet-4-20250514
|
||||
|
||||
volumes:
|
||||
# Persist workspace and config
|
||||
- zeroclaw-data:/data
|
||||
|
||||
ports:
|
||||
# Gateway API port
|
||||
- "3000:3000"
|
||||
|
||||
# Health check
|
||||
healthcheck:
|
||||
test: ["CMD", "zeroclaw", "doctor"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
volumes:
|
||||
zeroclaw-data:
|
||||
|
|
@ -162,12 +162,15 @@ impl Default for SecretsConfig {
|
|||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct BrowserConfig {
|
||||
/// Enable `browser_open` tool (opens URLs in Brave without scraping)
|
||||
/// Enable browser tools (`browser_open` and browser automation)
|
||||
#[serde(default)]
|
||||
pub enabled: bool,
|
||||
/// Allowed domains for `browser_open` (exact or subdomain match)
|
||||
/// Allowed domains for browser tools (exact or subdomain match)
|
||||
#[serde(default)]
|
||||
pub allowed_domains: Vec<String>,
|
||||
/// Session name for agent-browser (persists state across commands)
|
||||
#[serde(default)]
|
||||
pub session_name: Option<String>,
|
||||
}
|
||||
|
||||
// ── Memory ───────────────────────────────────────────────────
|
||||
|
|
@ -624,10 +627,19 @@ impl Default for Config {
|
|||
|
||||
impl Config {
|
||||
pub fn load_or_init() -> Result<Self> {
|
||||
// Check for workspace override from environment (Docker support)
|
||||
let zeroclaw_dir = if let Ok(workspace) = std::env::var("ZEROCLAW_WORKSPACE") {
|
||||
let ws_path = PathBuf::from(&workspace);
|
||||
ws_path
|
||||
.parent()
|
||||
.map_or_else(|| PathBuf::from(&workspace), PathBuf::from)
|
||||
} else {
|
||||
let home = UserDirs::new()
|
||||
.map(|u| u.home_dir().to_path_buf())
|
||||
.context("Could not find home directory")?;
|
||||
let zeroclaw_dir = home.join(".zeroclaw");
|
||||
home.join(".zeroclaw")
|
||||
};
|
||||
|
||||
let config_path = zeroclaw_dir.join("config.toml");
|
||||
|
||||
if !zeroclaw_dir.exists() {
|
||||
|
|
@ -636,17 +648,70 @@ impl Config {
|
|||
.context("Failed to create workspace directory")?;
|
||||
}
|
||||
|
||||
if config_path.exists() {
|
||||
let mut config = if config_path.exists() {
|
||||
let contents =
|
||||
fs::read_to_string(&config_path).context("Failed to read config file")?;
|
||||
let config: Config =
|
||||
toml::from_str(&contents).context("Failed to parse config file")?;
|
||||
Ok(config)
|
||||
toml::from_str(&contents).context("Failed to parse config file")?
|
||||
} else {
|
||||
let config = Config::default();
|
||||
Config::default()
|
||||
};
|
||||
|
||||
// Apply environment variable overrides (Docker/container support)
|
||||
config.apply_env_overrides();
|
||||
|
||||
// Save config if it didn't exist (creates default config with env overrides)
|
||||
if !config_path.exists() {
|
||||
config.save()?;
|
||||
}
|
||||
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
/// Apply environment variable overrides to config.
|
||||
///
|
||||
/// Supports: `ZEROCLAW_API_KEY`, `API_KEY`, `ZEROCLAW_PROVIDER`, `PROVIDER`,
|
||||
/// `ZEROCLAW_MODEL`, `ZEROCLAW_WORKSPACE`, `ZEROCLAW_GATEWAY_PORT`
|
||||
pub fn apply_env_overrides(&mut self) {
|
||||
// API Key: ZEROCLAW_API_KEY or API_KEY
|
||||
if let Ok(key) = std::env::var("ZEROCLAW_API_KEY").or_else(|_| std::env::var("API_KEY")) {
|
||||
if !key.is_empty() {
|
||||
self.api_key = Some(key);
|
||||
}
|
||||
}
|
||||
|
||||
// Provider: ZEROCLAW_PROVIDER or PROVIDER
|
||||
if let Ok(provider) =
|
||||
std::env::var("ZEROCLAW_PROVIDER").or_else(|_| std::env::var("PROVIDER"))
|
||||
{
|
||||
if !provider.is_empty() {
|
||||
self.default_provider = Some(provider);
|
||||
}
|
||||
}
|
||||
|
||||
// Model: ZEROCLAW_MODEL
|
||||
if let Ok(model) = std::env::var("ZEROCLAW_MODEL") {
|
||||
if !model.is_empty() {
|
||||
self.default_model = Some(model);
|
||||
}
|
||||
}
|
||||
|
||||
// Workspace directory: ZEROCLAW_WORKSPACE
|
||||
if let Ok(workspace) = std::env::var("ZEROCLAW_WORKSPACE") {
|
||||
if !workspace.is_empty() {
|
||||
self.workspace_dir = PathBuf::from(workspace);
|
||||
}
|
||||
}
|
||||
|
||||
// Gateway port: ZEROCLAW_GATEWAY_PORT or PORT
|
||||
if let Ok(port_str) =
|
||||
std::env::var("ZEROCLAW_GATEWAY_PORT").or_else(|_| std::env::var("PORT"))
|
||||
{
|
||||
if let Ok(port) = port_str.parse::<u16>() {
|
||||
// Gateway config doesn't have port yet, but we can add it
|
||||
// For now, this is a placeholder for future gateway port config
|
||||
let _ = port; // Suppress unused warning
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn save(&self) -> Result<()> {
|
||||
|
|
@ -1345,6 +1410,7 @@ default_temperature = 0.7
|
|||
let b = BrowserConfig {
|
||||
enabled: true,
|
||||
allowed_domains: vec!["example.com".into(), "docs.example.com".into()],
|
||||
session_name: None,
|
||||
};
|
||||
let toml_str = toml::to_string(&b).unwrap();
|
||||
let parsed: BrowserConfig = toml::from_str(&toml_str).unwrap();
|
||||
|
|
@ -1364,4 +1430,97 @@ default_temperature = 0.7
|
|||
assert!(!parsed.browser.enabled);
|
||||
assert!(parsed.browser.allowed_domains.is_empty());
|
||||
}
|
||||
|
||||
// ── Environment variable overrides (Docker support) ─────────
|
||||
|
||||
#[test]
|
||||
fn env_override_api_key() {
|
||||
let mut config = Config::default();
|
||||
assert!(config.api_key.is_none());
|
||||
|
||||
// Simulate ZEROCLAW_API_KEY
|
||||
std::env::set_var("ZEROCLAW_API_KEY", "sk-test-env-key");
|
||||
config.apply_env_overrides();
|
||||
assert_eq!(config.api_key.as_deref(), Some("sk-test-env-key"));
|
||||
|
||||
// Clean up
|
||||
std::env::remove_var("ZEROCLAW_API_KEY");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn env_override_api_key_fallback() {
|
||||
let mut config = Config::default();
|
||||
|
||||
// Simulate API_KEY (fallback)
|
||||
std::env::remove_var("ZEROCLAW_API_KEY");
|
||||
std::env::set_var("API_KEY", "sk-fallback-key");
|
||||
config.apply_env_overrides();
|
||||
assert_eq!(config.api_key.as_deref(), Some("sk-fallback-key"));
|
||||
|
||||
// Clean up
|
||||
std::env::remove_var("API_KEY");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn env_override_provider() {
|
||||
let mut config = Config::default();
|
||||
|
||||
std::env::set_var("ZEROCLAW_PROVIDER", "anthropic");
|
||||
config.apply_env_overrides();
|
||||
assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
|
||||
|
||||
// Clean up
|
||||
std::env::remove_var("ZEROCLAW_PROVIDER");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn env_override_provider_fallback() {
|
||||
let mut config = Config::default();
|
||||
|
||||
std::env::remove_var("ZEROCLAW_PROVIDER");
|
||||
std::env::set_var("PROVIDER", "openai");
|
||||
config.apply_env_overrides();
|
||||
assert_eq!(config.default_provider.as_deref(), Some("openai"));
|
||||
|
||||
// Clean up
|
||||
std::env::remove_var("PROVIDER");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn env_override_model() {
|
||||
let mut config = Config::default();
|
||||
|
||||
std::env::set_var("ZEROCLAW_MODEL", "gpt-4o");
|
||||
config.apply_env_overrides();
|
||||
assert_eq!(config.default_model.as_deref(), Some("gpt-4o"));
|
||||
|
||||
// Clean up
|
||||
std::env::remove_var("ZEROCLAW_MODEL");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn env_override_workspace() {
|
||||
let mut config = Config::default();
|
||||
|
||||
std::env::set_var("ZEROCLAW_WORKSPACE", "/custom/workspace");
|
||||
config.apply_env_overrides();
|
||||
assert_eq!(config.workspace_dir, PathBuf::from("/custom/workspace"));
|
||||
|
||||
// Clean up
|
||||
std::env::remove_var("ZEROCLAW_WORKSPACE");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn env_override_empty_values_ignored() {
|
||||
let mut config = Config::default();
|
||||
let original_provider = config.default_provider.clone();
|
||||
|
||||
std::env::set_var("ZEROCLAW_PROVIDER", "");
|
||||
config.apply_env_overrides();
|
||||
// Empty value should not override
|
||||
assert_eq!(config.default_provider, original_provider);
|
||||
|
||||
// Clean up
|
||||
std::env::remove_var("ZEROCLAW_PROVIDER");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
834
src/tools/browser.rs
Normal file
834
src/tools/browser.rs
Normal file
|
|
@ -0,0 +1,834 @@
|
|||
//! Browser automation tool using Vercel's agent-browser CLI
|
||||
//!
|
||||
//! This tool provides AI-optimized web browsing capabilities via the agent-browser CLI.
|
||||
//! It supports semantic element selection, accessibility snapshots, and JSON output
|
||||
//! for efficient LLM integration.
|
||||
|
||||
use super::traits::{Tool, ToolResult};
|
||||
use crate::security::SecurityPolicy;
|
||||
use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use tokio::process::Command;
|
||||
use tracing::debug;
|
||||
|
||||
/// Browser automation tool using agent-browser CLI
|
||||
pub struct BrowserTool {
|
||||
security: Arc<SecurityPolicy>,
|
||||
allowed_domains: Vec<String>,
|
||||
session_name: Option<String>,
|
||||
}
|
||||
|
||||
/// Response from agent-browser --json commands
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AgentBrowserResponse {
|
||||
success: bool,
|
||||
data: Option<Value>,
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
/// Supported browser actions
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum BrowserAction {
|
||||
/// Navigate to a URL
|
||||
Open { url: String },
|
||||
/// Get accessibility snapshot with refs
|
||||
Snapshot {
|
||||
#[serde(default)]
|
||||
interactive_only: bool,
|
||||
#[serde(default)]
|
||||
compact: bool,
|
||||
#[serde(default)]
|
||||
depth: Option<u32>,
|
||||
},
|
||||
/// Click an element by ref or selector
|
||||
Click { selector: String },
|
||||
/// Fill a form field
|
||||
Fill { selector: String, value: String },
|
||||
/// Type text into focused element
|
||||
Type { selector: String, text: String },
|
||||
/// Get text content of element
|
||||
GetText { selector: String },
|
||||
/// Get page title
|
||||
GetTitle,
|
||||
/// Get current URL
|
||||
GetUrl,
|
||||
/// Take screenshot
|
||||
Screenshot {
|
||||
#[serde(default)]
|
||||
path: Option<String>,
|
||||
#[serde(default)]
|
||||
full_page: bool,
|
||||
},
|
||||
/// Wait for element or time
|
||||
Wait {
|
||||
#[serde(default)]
|
||||
selector: Option<String>,
|
||||
#[serde(default)]
|
||||
ms: Option<u64>,
|
||||
#[serde(default)]
|
||||
text: Option<String>,
|
||||
},
|
||||
/// Press a key
|
||||
Press { key: String },
|
||||
/// Hover over element
|
||||
Hover { selector: String },
|
||||
/// Scroll page
|
||||
Scroll {
|
||||
direction: String,
|
||||
#[serde(default)]
|
||||
pixels: Option<u32>,
|
||||
},
|
||||
/// Check if element is visible
|
||||
IsVisible { selector: String },
|
||||
/// Close browser
|
||||
Close,
|
||||
/// Find element by semantic locator
|
||||
Find {
|
||||
by: String, // role, text, label, placeholder, testid
|
||||
value: String,
|
||||
action: String, // click, fill, text, hover
|
||||
#[serde(default)]
|
||||
fill_value: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
impl BrowserTool {
|
||||
pub fn new(
|
||||
security: Arc<SecurityPolicy>,
|
||||
allowed_domains: Vec<String>,
|
||||
session_name: Option<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
security,
|
||||
allowed_domains: normalize_domains(allowed_domains),
|
||||
session_name,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if agent-browser CLI is available
|
||||
pub async fn is_available() -> bool {
|
||||
Command::new("agent-browser")
|
||||
.arg("--version")
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.status()
|
||||
.await
|
||||
.map(|s| s.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Validate URL against allowlist
|
||||
fn validate_url(&self, url: &str) -> anyhow::Result<()> {
|
||||
let url = url.trim();
|
||||
|
||||
if url.is_empty() {
|
||||
anyhow::bail!("URL cannot be empty");
|
||||
}
|
||||
|
||||
// Allow file:// URLs for local testing
|
||||
if url.starts_with("file://") {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !url.starts_with("https://") && !url.starts_with("http://") {
|
||||
anyhow::bail!("Only http:// and https:// URLs are allowed");
|
||||
}
|
||||
|
||||
if self.allowed_domains.is_empty() {
|
||||
anyhow::bail!(
|
||||
"Browser tool enabled but no allowed_domains configured. \
|
||||
Add [browser].allowed_domains in config.toml"
|
||||
);
|
||||
}
|
||||
|
||||
let host = extract_host(url)?;
|
||||
|
||||
if is_private_host(&host) {
|
||||
anyhow::bail!("Blocked local/private host: {host}");
|
||||
}
|
||||
|
||||
if !host_matches_allowlist(&host, &self.allowed_domains) {
|
||||
anyhow::bail!("Host '{host}' not in browser.allowed_domains");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Execute an agent-browser command
|
||||
async fn run_command(&self, args: &[&str]) -> anyhow::Result<AgentBrowserResponse> {
|
||||
let mut cmd = Command::new("agent-browser");
|
||||
|
||||
// Add session if configured
|
||||
if let Some(ref session) = self.session_name {
|
||||
cmd.arg("--session").arg(session);
|
||||
}
|
||||
|
||||
// Add --json for machine-readable output
|
||||
cmd.args(args).arg("--json");
|
||||
|
||||
debug!("Running: agent-browser {} --json", args.join(" "));
|
||||
|
||||
let output = cmd
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
|
||||
if !stderr.is_empty() {
|
||||
debug!("agent-browser stderr: {}", stderr);
|
||||
}
|
||||
|
||||
// Parse JSON response
|
||||
if let Ok(resp) = serde_json::from_str::<AgentBrowserResponse>(&stdout) {
|
||||
return Ok(resp);
|
||||
}
|
||||
|
||||
// Fallback for non-JSON output
|
||||
if output.status.success() {
|
||||
Ok(AgentBrowserResponse {
|
||||
success: true,
|
||||
data: Some(json!({ "output": stdout.trim() })),
|
||||
error: None,
|
||||
})
|
||||
} else {
|
||||
Ok(AgentBrowserResponse {
|
||||
success: false,
|
||||
data: None,
|
||||
error: Some(stderr.trim().to_string()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a browser action
|
||||
async fn execute_action(&self, action: BrowserAction) -> anyhow::Result<ToolResult> {
|
||||
match action {
|
||||
BrowserAction::Open { url } => {
|
||||
self.validate_url(&url)?;
|
||||
let resp = self.run_command(&["open", &url]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Snapshot {
|
||||
interactive_only,
|
||||
compact,
|
||||
depth,
|
||||
} => {
|
||||
let mut args = vec!["snapshot"];
|
||||
if interactive_only {
|
||||
args.push("-i");
|
||||
}
|
||||
if compact {
|
||||
args.push("-c");
|
||||
}
|
||||
let depth_str;
|
||||
if let Some(d) = depth {
|
||||
args.push("-d");
|
||||
depth_str = d.to_string();
|
||||
args.push(&depth_str);
|
||||
}
|
||||
let resp = self.run_command(&args).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Click { selector } => {
|
||||
let resp = self.run_command(&["click", &selector]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Fill { selector, value } => {
|
||||
let resp = self.run_command(&["fill", &selector, &value]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Type { selector, text } => {
|
||||
let resp = self.run_command(&["type", &selector, &text]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::GetText { selector } => {
|
||||
let resp = self.run_command(&["get", "text", &selector]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::GetTitle => {
|
||||
let resp = self.run_command(&["get", "title"]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::GetUrl => {
|
||||
let resp = self.run_command(&["get", "url"]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Screenshot { path, full_page } => {
|
||||
let mut args = vec!["screenshot"];
|
||||
if let Some(ref p) = path {
|
||||
args.push(p);
|
||||
}
|
||||
if full_page {
|
||||
args.push("--full");
|
||||
}
|
||||
let resp = self.run_command(&args).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Wait { selector, ms, text } => {
|
||||
let mut args = vec!["wait"];
|
||||
let ms_str;
|
||||
if let Some(sel) = selector.as_ref() {
|
||||
args.push(sel);
|
||||
} else if let Some(millis) = ms {
|
||||
ms_str = millis.to_string();
|
||||
args.push(&ms_str);
|
||||
} else if let Some(ref t) = text {
|
||||
args.push("--text");
|
||||
args.push(t);
|
||||
}
|
||||
let resp = self.run_command(&args).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Press { key } => {
|
||||
let resp = self.run_command(&["press", &key]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Hover { selector } => {
|
||||
let resp = self.run_command(&["hover", &selector]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Scroll { direction, pixels } => {
|
||||
let mut args = vec!["scroll", &direction];
|
||||
let px_str;
|
||||
if let Some(px) = pixels {
|
||||
px_str = px.to_string();
|
||||
args.push(&px_str);
|
||||
}
|
||||
let resp = self.run_command(&args).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::IsVisible { selector } => {
|
||||
let resp = self.run_command(&["is", "visible", &selector]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Close => {
|
||||
let resp = self.run_command(&["close"]).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
BrowserAction::Find {
|
||||
by,
|
||||
value,
|
||||
action,
|
||||
fill_value,
|
||||
} => {
|
||||
let mut args = vec!["find", &by, &value, &action];
|
||||
if let Some(ref fv) = fill_value {
|
||||
args.push(fv);
|
||||
}
|
||||
let resp = self.run_command(&args).await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn to_result(&self, resp: AgentBrowserResponse) -> anyhow::Result<ToolResult> {
|
||||
if resp.success {
|
||||
let output = resp
|
||||
.data
|
||||
.map(|d| serde_json::to_string_pretty(&d).unwrap_or_default())
|
||||
.unwrap_or_default();
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output,
|
||||
error: None,
|
||||
})
|
||||
} else {
|
||||
Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: resp.error,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for BrowserTool {
|
||||
fn name(&self) -> &str {
|
||||
"browser"
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Web browser automation using agent-browser. Supports navigation, clicking, \
|
||||
filling forms, taking screenshots, and getting accessibility snapshots with refs. \
|
||||
Use 'snapshot' to get interactive elements with refs (@e1, @e2), then use refs \
|
||||
for precise element interaction. Allowed domains only."
|
||||
}
|
||||
|
||||
fn parameters_schema(&self) -> Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {
|
||||
"type": "string",
|
||||
"enum": ["open", "snapshot", "click", "fill", "type", "get_text",
|
||||
"get_title", "get_url", "screenshot", "wait", "press",
|
||||
"hover", "scroll", "is_visible", "close", "find"],
|
||||
"description": "Browser action to perform"
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "URL to navigate to (for 'open' action)"
|
||||
},
|
||||
"selector": {
|
||||
"type": "string",
|
||||
"description": "Element selector: @ref (e.g. @e1), CSS (#id, .class), or text=..."
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "Value to fill or type"
|
||||
},
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "Text to type or wait for"
|
||||
},
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "Key to press (Enter, Tab, Escape, etc.)"
|
||||
},
|
||||
"direction": {
|
||||
"type": "string",
|
||||
"enum": ["up", "down", "left", "right"],
|
||||
"description": "Scroll direction"
|
||||
},
|
||||
"pixels": {
|
||||
"type": "integer",
|
||||
"description": "Pixels to scroll"
|
||||
},
|
||||
"interactive_only": {
|
||||
"type": "boolean",
|
||||
"description": "For snapshot: only show interactive elements"
|
||||
},
|
||||
"compact": {
|
||||
"type": "boolean",
|
||||
"description": "For snapshot: remove empty structural elements"
|
||||
},
|
||||
"depth": {
|
||||
"type": "integer",
|
||||
"description": "For snapshot: limit tree depth"
|
||||
},
|
||||
"full_page": {
|
||||
"type": "boolean",
|
||||
"description": "For screenshot: capture full page"
|
||||
},
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "File path for screenshot"
|
||||
},
|
||||
"ms": {
|
||||
"type": "integer",
|
||||
"description": "Milliseconds to wait"
|
||||
},
|
||||
"by": {
|
||||
"type": "string",
|
||||
"enum": ["role", "text", "label", "placeholder", "testid"],
|
||||
"description": "For find: semantic locator type"
|
||||
},
|
||||
"find_action": {
|
||||
"type": "string",
|
||||
"enum": ["click", "fill", "text", "hover", "check"],
|
||||
"description": "For find: action to perform on found element"
|
||||
},
|
||||
"fill_value": {
|
||||
"type": "string",
|
||||
"description": "For find with fill action: value to fill"
|
||||
}
|
||||
},
|
||||
"required": ["action"]
|
||||
})
|
||||
}
|
||||
|
||||
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
|
||||
// Security checks
|
||||
if !self.security.can_act() {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some("Action blocked: autonomy is read-only".into()),
|
||||
});
|
||||
}
|
||||
|
||||
if !self.security.record_action() {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some("Action blocked: rate limit exceeded".into()),
|
||||
});
|
||||
}
|
||||
|
||||
// Check if agent-browser is available
|
||||
if !Self::is_available().await {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(
|
||||
"agent-browser CLI not found. Install with: npm install -g agent-browser"
|
||||
.into(),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
// Parse action from args
|
||||
let action_str = args
|
||||
.get("action")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'action' parameter"))?;
|
||||
|
||||
let action = match action_str {
|
||||
"open" => {
|
||||
let url = args
|
||||
.get("url")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'url' for open action"))?;
|
||||
BrowserAction::Open { url: url.into() }
|
||||
}
|
||||
"snapshot" => BrowserAction::Snapshot {
|
||||
interactive_only: args
|
||||
.get("interactive_only")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true), // Default to interactive for AI
|
||||
compact: args
|
||||
.get("compact")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true),
|
||||
depth: args.get("depth").and_then(|v| v.as_u64()).map(|d| d as u32),
|
||||
},
|
||||
"click" => {
|
||||
let selector = args
|
||||
.get("selector")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for click"))?;
|
||||
BrowserAction::Click {
|
||||
selector: selector.into(),
|
||||
}
|
||||
}
|
||||
"fill" => {
|
||||
let selector = args
|
||||
.get("selector")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for fill"))?;
|
||||
let value = args
|
||||
.get("value")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'value' for fill"))?;
|
||||
BrowserAction::Fill {
|
||||
selector: selector.into(),
|
||||
value: value.into(),
|
||||
}
|
||||
}
|
||||
"type" => {
|
||||
let selector = args
|
||||
.get("selector")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for type"))?;
|
||||
let text = args
|
||||
.get("text")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'text' for type"))?;
|
||||
BrowserAction::Type {
|
||||
selector: selector.into(),
|
||||
text: text.into(),
|
||||
}
|
||||
}
|
||||
"get_text" => {
|
||||
let selector = args
|
||||
.get("selector")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for get_text"))?;
|
||||
BrowserAction::GetText {
|
||||
selector: selector.into(),
|
||||
}
|
||||
}
|
||||
"get_title" => BrowserAction::GetTitle,
|
||||
"get_url" => BrowserAction::GetUrl,
|
||||
"screenshot" => BrowserAction::Screenshot {
|
||||
path: args.get("path").and_then(|v| v.as_str()).map(String::from),
|
||||
full_page: args
|
||||
.get("full_page")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false),
|
||||
},
|
||||
"wait" => BrowserAction::Wait {
|
||||
selector: args
|
||||
.get("selector")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from),
|
||||
ms: args.get("ms").and_then(|v| v.as_u64()),
|
||||
text: args.get("text").and_then(|v| v.as_str()).map(String::from),
|
||||
},
|
||||
"press" => {
|
||||
let key = args
|
||||
.get("key")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'key' for press"))?;
|
||||
BrowserAction::Press { key: key.into() }
|
||||
}
|
||||
"hover" => {
|
||||
let selector = args
|
||||
.get("selector")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for hover"))?;
|
||||
BrowserAction::Hover {
|
||||
selector: selector.into(),
|
||||
}
|
||||
}
|
||||
"scroll" => {
|
||||
let direction = args
|
||||
.get("direction")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'direction' for scroll"))?;
|
||||
BrowserAction::Scroll {
|
||||
direction: direction.into(),
|
||||
pixels: args
|
||||
.get("pixels")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|p| p as u32),
|
||||
}
|
||||
}
|
||||
"is_visible" => {
|
||||
let selector = args
|
||||
.get("selector")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for is_visible"))?;
|
||||
BrowserAction::IsVisible {
|
||||
selector: selector.into(),
|
||||
}
|
||||
}
|
||||
"close" => BrowserAction::Close,
|
||||
"find" => {
|
||||
let by = args
|
||||
.get("by")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'by' for find"))?;
|
||||
let value = args
|
||||
.get("value")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'value' for find"))?;
|
||||
let action = args
|
||||
.get("find_action")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'find_action' for find"))?;
|
||||
BrowserAction::Find {
|
||||
by: by.into(),
|
||||
value: value.into(),
|
||||
action: action.into(),
|
||||
fill_value: args
|
||||
.get("fill_value")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(format!("Unknown action: {action_str}")),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
self.execute_action(action).await
|
||||
}
|
||||
}
|
||||
|
||||
// ── Helper functions ─────────────────────────────────────────────
|
||||
|
||||
fn normalize_domains(domains: Vec<String>) -> Vec<String> {
|
||||
domains
|
||||
.into_iter()
|
||||
.map(|d| d.trim().to_lowercase())
|
||||
.filter(|d| !d.is_empty())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_host(url_str: &str) -> anyhow::Result<String> {
|
||||
// Simple host extraction without url crate
|
||||
let url = url_str.trim();
|
||||
let without_scheme = url
|
||||
.strip_prefix("https://")
|
||||
.or_else(|| url.strip_prefix("http://"))
|
||||
.or_else(|| url.strip_prefix("file://"))
|
||||
.unwrap_or(url);
|
||||
|
||||
// Extract host (before first / or :)
|
||||
let host = without_scheme
|
||||
.split('/')
|
||||
.next()
|
||||
.unwrap_or(without_scheme)
|
||||
.split(':')
|
||||
.next()
|
||||
.unwrap_or(without_scheme);
|
||||
|
||||
if host.is_empty() {
|
||||
anyhow::bail!("Invalid URL: no host");
|
||||
}
|
||||
|
||||
Ok(host.to_lowercase())
|
||||
}
|
||||
|
||||
fn is_private_host(host: &str) -> bool {
|
||||
let private_patterns = [
|
||||
"localhost",
|
||||
"127.",
|
||||
"10.",
|
||||
"192.168.",
|
||||
"172.16.",
|
||||
"172.17.",
|
||||
"172.18.",
|
||||
"172.19.",
|
||||
"172.20.",
|
||||
"172.21.",
|
||||
"172.22.",
|
||||
"172.23.",
|
||||
"172.24.",
|
||||
"172.25.",
|
||||
"172.26.",
|
||||
"172.27.",
|
||||
"172.28.",
|
||||
"172.29.",
|
||||
"172.30.",
|
||||
"172.31.",
|
||||
"0.0.0.0",
|
||||
"::1",
|
||||
"[::1]",
|
||||
];
|
||||
|
||||
private_patterns
|
||||
.iter()
|
||||
.any(|p| host.starts_with(p) || host == *p)
|
||||
}
|
||||
|
||||
fn host_matches_allowlist(host: &str, allowed: &[String]) -> bool {
|
||||
allowed.iter().any(|pattern| {
|
||||
if pattern == "*" {
|
||||
return true;
|
||||
}
|
||||
if pattern.starts_with("*.") {
|
||||
// Wildcard subdomain match
|
||||
let suffix = &pattern[1..]; // ".example.com"
|
||||
host.ends_with(suffix) || host == &pattern[2..]
|
||||
} else {
|
||||
// Exact match or subdomain
|
||||
host == pattern || host.ends_with(&format!(".{pattern}"))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn normalize_domains_works() {
|
||||
let domains = vec![
|
||||
" Example.COM ".into(),
|
||||
"docs.example.com".into(),
|
||||
"".into(),
|
||||
];
|
||||
let normalized = normalize_domains(domains);
|
||||
assert_eq!(normalized, vec!["example.com", "docs.example.com"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_host_works() {
|
||||
assert_eq!(
|
||||
extract_host("https://example.com/path").unwrap(),
|
||||
"example.com"
|
||||
);
|
||||
assert_eq!(
|
||||
extract_host("https://Sub.Example.COM:8080/").unwrap(),
|
||||
"sub.example.com"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_private_host_detects_local() {
|
||||
assert!(is_private_host("localhost"));
|
||||
assert!(is_private_host("127.0.0.1"));
|
||||
assert!(is_private_host("192.168.1.1"));
|
||||
assert!(is_private_host("10.0.0.1"));
|
||||
assert!(!is_private_host("example.com"));
|
||||
assert!(!is_private_host("google.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn host_matches_allowlist_exact() {
|
||||
let allowed = vec!["example.com".into()];
|
||||
assert!(host_matches_allowlist("example.com", &allowed));
|
||||
assert!(host_matches_allowlist("sub.example.com", &allowed));
|
||||
assert!(!host_matches_allowlist("notexample.com", &allowed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn host_matches_allowlist_wildcard() {
|
||||
let allowed = vec!["*.example.com".into()];
|
||||
assert!(host_matches_allowlist("sub.example.com", &allowed));
|
||||
assert!(host_matches_allowlist("example.com", &allowed));
|
||||
assert!(!host_matches_allowlist("other.com", &allowed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn host_matches_allowlist_star() {
|
||||
let allowed = vec!["*".into()];
|
||||
assert!(host_matches_allowlist("anything.com", &allowed));
|
||||
assert!(host_matches_allowlist("example.org", &allowed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn browser_tool_name() {
|
||||
let security = Arc::new(SecurityPolicy::default());
|
||||
let tool = BrowserTool::new(security, vec!["example.com".into()], None);
|
||||
assert_eq!(tool.name(), "browser");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn browser_tool_validates_url() {
|
||||
let security = Arc::new(SecurityPolicy::default());
|
||||
let tool = BrowserTool::new(security, vec!["example.com".into()], None);
|
||||
|
||||
// Valid
|
||||
assert!(tool.validate_url("https://example.com").is_ok());
|
||||
assert!(tool.validate_url("https://sub.example.com/path").is_ok());
|
||||
|
||||
// Invalid - not in allowlist
|
||||
assert!(tool.validate_url("https://other.com").is_err());
|
||||
|
||||
// Invalid - private host
|
||||
assert!(tool.validate_url("https://localhost").is_err());
|
||||
assert!(tool.validate_url("https://127.0.0.1").is_err());
|
||||
|
||||
// Invalid - not https
|
||||
assert!(tool.validate_url("ftp://example.com").is_err());
|
||||
|
||||
// File URLs allowed
|
||||
assert!(tool.validate_url("file:///tmp/test.html").is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn browser_tool_empty_allowlist_blocks() {
|
||||
let security = Arc::new(SecurityPolicy::default());
|
||||
let tool = BrowserTool::new(security, vec![], None);
|
||||
assert!(tool.validate_url("https://example.com").is_err());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
pub mod browser;
|
||||
pub mod browser_open;
|
||||
pub mod composio;
|
||||
pub mod file_read;
|
||||
|
|
@ -8,6 +9,7 @@ pub mod memory_store;
|
|||
pub mod shell;
|
||||
pub mod traits;
|
||||
|
||||
pub use browser::BrowserTool;
|
||||
pub use browser_open::BrowserOpenTool;
|
||||
pub use composio::ComposioTool;
|
||||
pub use file_read::FileReadTool;
|
||||
|
|
@ -50,10 +52,17 @@ pub fn all_tools(
|
|||
];
|
||||
|
||||
if browser_config.enabled {
|
||||
// Add legacy browser_open tool for simple URL opening
|
||||
tools.push(Box::new(BrowserOpenTool::new(
|
||||
security.clone(),
|
||||
browser_config.allowed_domains.clone(),
|
||||
)));
|
||||
// Add full browser automation tool (agent-browser)
|
||||
tools.push(Box::new(BrowserTool::new(
|
||||
security.clone(),
|
||||
browser_config.allowed_domains.clone(),
|
||||
browser_config.session_name.clone(),
|
||||
)));
|
||||
}
|
||||
|
||||
if let Some(key) = composio_key {
|
||||
|
|
@ -92,6 +101,7 @@ mod tests {
|
|||
let browser = BrowserConfig {
|
||||
enabled: false,
|
||||
allowed_domains: vec!["example.com".into()],
|
||||
session_name: None,
|
||||
};
|
||||
|
||||
let tools = all_tools(&security, mem, None, &browser);
|
||||
|
|
@ -113,6 +123,7 @@ mod tests {
|
|||
let browser = BrowserConfig {
|
||||
enabled: true,
|
||||
allowed_domains: vec!["example.com".into()],
|
||||
session_name: None,
|
||||
};
|
||||
|
||||
let tools = all_tools(&security, mem, None, &browser);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue