feat(browser): add optional computer-use sidecar backend (#335)
This commit is contained in:
parent
db0904c8a4
commit
04bf94443f
8 changed files with 625 additions and 24 deletions
21
README.md
21
README.md
|
|
@ -305,15 +305,34 @@ encrypt = true # API keys encrypted with local key file
|
|||
[browser]
|
||||
enabled = false # opt-in browser_open + browser tools
|
||||
allowed_domains = ["docs.rs"] # required when browser is enabled
|
||||
backend = "agent_browser" # "agent_browser" (default), "rust_native", "auto"
|
||||
backend = "agent_browser" # "agent_browser" (default), "rust_native", "computer_use", "auto"
|
||||
native_headless = true # applies when backend uses rust-native
|
||||
native_webdriver_url = "http://127.0.0.1:9515" # WebDriver endpoint (chromedriver/selenium)
|
||||
# native_chrome_path = "/usr/bin/chromium" # optional explicit browser binary for driver
|
||||
|
||||
[browser.computer_use]
|
||||
endpoint = "http://127.0.0.1:8787/v1/actions" # computer-use sidecar HTTP endpoint
|
||||
timeout_ms = 15000 # per-action timeout
|
||||
allow_remote_endpoint = false # secure default: only private/localhost endpoint
|
||||
window_allowlist = [] # optional window title/process allowlist hints
|
||||
# api_key = "..." # optional bearer token for sidecar
|
||||
# max_coordinate_x = 3840 # optional coordinate guardrail
|
||||
# max_coordinate_y = 2160 # optional coordinate guardrail
|
||||
|
||||
# Rust-native backend build flag:
|
||||
# cargo build --release --features browser-native
|
||||
# Ensure a WebDriver server is running, e.g. chromedriver --port=9515
|
||||
|
||||
# Computer-use sidecar contract (MVP)
|
||||
# POST browser.computer_use.endpoint
|
||||
# Request: {
|
||||
# "action": "mouse_click",
|
||||
# "params": {"x": 640, "y": 360, "button": "left"},
|
||||
# "policy": {"allowed_domains": [...], "window_allowlist": [...], "max_coordinate_x": 3840, "max_coordinate_y": 2160},
|
||||
# "metadata": {"session_name": "...", "source": "zeroclaw.browser", "version": "..."}
|
||||
# }
|
||||
# Response: {"success": true, "data": {...}} or {"success": false, "error": "..."}
|
||||
|
||||
[composio]
|
||||
enabled = false # opt-in: 1000+ OAuth apps via composio.dev
|
||||
# api_key = "cmp_..." # optional: stored encrypted when [secrets].encrypt = true
|
||||
|
|
|
|||
|
|
@ -2,11 +2,11 @@ pub mod schema;
|
|||
|
||||
#[allow(unused_imports)]
|
||||
pub use schema::{
|
||||
AuditConfig, AutonomyConfig, BrowserConfig, ChannelsConfig, ComposioConfig, Config, CostConfig,
|
||||
DelegateAgentConfig, DiscordConfig, DockerRuntimeConfig, GatewayConfig, HeartbeatConfig,
|
||||
HttpRequestConfig, IMessageConfig, IdentityConfig, LarkConfig, MatrixConfig, MemoryConfig,
|
||||
ModelRouteConfig, ObservabilityConfig, ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig,
|
||||
SandboxBackend, SandboxConfig, SchedulerConfig, SecretsConfig, SecurityConfig, SlackConfig,
|
||||
AuditConfig, AutonomyConfig, BrowserComputerUseConfig, BrowserConfig, ChannelsConfig,
|
||||
ComposioConfig, Config, DelegateAgentConfig, DiscordConfig, DockerRuntimeConfig, GatewayConfig,
|
||||
HeartbeatConfig, HttpRequestConfig, IMessageConfig, IdentityConfig, LarkConfig, MatrixConfig,
|
||||
MemoryConfig, ModelRouteConfig, ObservabilityConfig, ReliabilityConfig, ResourceLimitsConfig,
|
||||
RuntimeConfig, SandboxBackend, SandboxConfig, SecretsConfig, SecurityConfig, SlackConfig,
|
||||
TelegramConfig, TunnelConfig, WebhookConfig,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -419,6 +419,53 @@ impl Default for SecretsConfig {
|
|||
|
||||
// ── Browser (friendly-service browsing only) ───────────────────
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BrowserComputerUseConfig {
|
||||
/// Sidecar endpoint for computer-use actions (OS-level mouse/keyboard/screenshot)
|
||||
#[serde(default = "default_browser_computer_use_endpoint")]
|
||||
pub endpoint: String,
|
||||
/// Optional bearer token for computer-use sidecar
|
||||
#[serde(default)]
|
||||
pub api_key: Option<String>,
|
||||
/// Per-action request timeout in milliseconds
|
||||
#[serde(default = "default_browser_computer_use_timeout_ms")]
|
||||
pub timeout_ms: u64,
|
||||
/// Allow remote/public endpoint for computer-use sidecar (default: false)
|
||||
#[serde(default)]
|
||||
pub allow_remote_endpoint: bool,
|
||||
/// Optional window title/process allowlist forwarded to sidecar policy
|
||||
#[serde(default)]
|
||||
pub window_allowlist: Vec<String>,
|
||||
/// Optional X-axis boundary for coordinate-based actions
|
||||
#[serde(default)]
|
||||
pub max_coordinate_x: Option<i64>,
|
||||
/// Optional Y-axis boundary for coordinate-based actions
|
||||
#[serde(default)]
|
||||
pub max_coordinate_y: Option<i64>,
|
||||
}
|
||||
|
||||
fn default_browser_computer_use_endpoint() -> String {
|
||||
"http://127.0.0.1:8787/v1/actions".into()
|
||||
}
|
||||
|
||||
fn default_browser_computer_use_timeout_ms() -> u64 {
|
||||
15_000
|
||||
}
|
||||
|
||||
impl Default for BrowserComputerUseConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
endpoint: default_browser_computer_use_endpoint(),
|
||||
api_key: None,
|
||||
timeout_ms: default_browser_computer_use_timeout_ms(),
|
||||
allow_remote_endpoint: false,
|
||||
window_allowlist: Vec::new(),
|
||||
max_coordinate_x: None,
|
||||
max_coordinate_y: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BrowserConfig {
|
||||
/// Enable `browser_open` tool (opens URLs in Brave without scraping)
|
||||
|
|
@ -430,7 +477,7 @@ pub struct BrowserConfig {
|
|||
/// Browser session name (for agent-browser automation)
|
||||
#[serde(default)]
|
||||
pub session_name: Option<String>,
|
||||
/// Browser automation backend: "agent_browser" | "rust_native" | "auto"
|
||||
/// Browser automation backend: "agent_browser" | "rust_native" | "computer_use" | "auto"
|
||||
#[serde(default = "default_browser_backend")]
|
||||
pub backend: String,
|
||||
/// Headless mode for rust-native backend
|
||||
|
|
@ -442,6 +489,9 @@ pub struct BrowserConfig {
|
|||
/// Optional Chrome/Chromium executable path for rust-native backend
|
||||
#[serde(default)]
|
||||
pub native_chrome_path: Option<String>,
|
||||
/// Computer-use sidecar configuration
|
||||
#[serde(default)]
|
||||
pub computer_use: BrowserComputerUseConfig,
|
||||
}
|
||||
|
||||
fn default_browser_backend() -> String {
|
||||
|
|
@ -462,6 +512,7 @@ impl Default for BrowserConfig {
|
|||
native_headless: default_true(),
|
||||
native_webdriver_url: default_browser_webdriver_url(),
|
||||
native_chrome_path: None,
|
||||
computer_use: BrowserComputerUseConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2334,6 +2385,12 @@ default_temperature = 0.7
|
|||
assert!(b.native_headless);
|
||||
assert_eq!(b.native_webdriver_url, "http://127.0.0.1:9515");
|
||||
assert!(b.native_chrome_path.is_none());
|
||||
assert_eq!(b.computer_use.endpoint, "http://127.0.0.1:8787/v1/actions");
|
||||
assert_eq!(b.computer_use.timeout_ms, 15_000);
|
||||
assert!(!b.computer_use.allow_remote_endpoint);
|
||||
assert!(b.computer_use.window_allowlist.is_empty());
|
||||
assert!(b.computer_use.max_coordinate_x.is_none());
|
||||
assert!(b.computer_use.max_coordinate_y.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -2346,6 +2403,15 @@ default_temperature = 0.7
|
|||
native_headless: false,
|
||||
native_webdriver_url: "http://localhost:4444".into(),
|
||||
native_chrome_path: Some("/usr/bin/chromium".into()),
|
||||
computer_use: BrowserComputerUseConfig {
|
||||
endpoint: "https://computer-use.example.com/v1/actions".into(),
|
||||
api_key: Some("test-token".into()),
|
||||
timeout_ms: 8_000,
|
||||
allow_remote_endpoint: true,
|
||||
window_allowlist: vec!["Chrome".into(), "Visual Studio Code".into()],
|
||||
max_coordinate_x: Some(3840),
|
||||
max_coordinate_y: Some(2160),
|
||||
},
|
||||
};
|
||||
let toml_str = toml::to_string(&b).unwrap();
|
||||
let parsed: BrowserConfig = toml::from_str(&toml_str).unwrap();
|
||||
|
|
@ -2359,6 +2425,16 @@ default_temperature = 0.7
|
|||
parsed.native_chrome_path.as_deref(),
|
||||
Some("/usr/bin/chromium")
|
||||
);
|
||||
assert_eq!(
|
||||
parsed.computer_use.endpoint,
|
||||
"https://computer-use.example.com/v1/actions"
|
||||
);
|
||||
assert_eq!(parsed.computer_use.api_key.as_deref(), Some("test-token"));
|
||||
assert_eq!(parsed.computer_use.timeout_ms, 8_000);
|
||||
assert!(parsed.computer_use.allow_remote_endpoint);
|
||||
assert_eq!(parsed.computer_use.window_allowlist.len(), 2);
|
||||
assert_eq!(parsed.computer_use.max_coordinate_x, Some(3840));
|
||||
assert_eq!(parsed.computer_use.max_coordinate_y, Some(2160));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use super::types::{BudgetCheck, CostRecord, CostSummary, ModelStats, TokenUsage, UsagePeriod};
|
||||
use crate::config::CostConfig;
|
||||
use crate::config::schema::CostConfig;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use chrono::{Datelike, NaiveDate, Utc};
|
||||
use std::collections::HashMap;
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ pub fn run_wizard() -> Result<Config> {
|
|||
autonomy: AutonomyConfig::default(),
|
||||
runtime: RuntimeConfig::default(),
|
||||
reliability: crate::config::ReliabilityConfig::default(),
|
||||
scheduler: crate::config::SchedulerConfig::default(),
|
||||
scheduler: crate::config::schema::SchedulerConfig::default(),
|
||||
model_routes: Vec::new(),
|
||||
heartbeat: HeartbeatConfig::default(),
|
||||
channels_config,
|
||||
|
|
@ -122,7 +122,7 @@ pub fn run_wizard() -> Result<Config> {
|
|||
browser: BrowserConfig::default(),
|
||||
http_request: crate::config::HttpRequestConfig::default(),
|
||||
identity: crate::config::IdentityConfig::default(),
|
||||
cost: crate::config::CostConfig::default(),
|
||||
cost: crate::config::schema::CostConfig::default(),
|
||||
hardware: hardware_config,
|
||||
agents: std::collections::HashMap::new(),
|
||||
security: crate::config::SecurityConfig::default(),
|
||||
|
|
@ -307,7 +307,7 @@ pub fn run_quick_setup(
|
|||
autonomy: AutonomyConfig::default(),
|
||||
runtime: RuntimeConfig::default(),
|
||||
reliability: crate::config::ReliabilityConfig::default(),
|
||||
scheduler: crate::config::SchedulerConfig::default(),
|
||||
scheduler: crate::config::schema::SchedulerConfig::default(),
|
||||
model_routes: Vec::new(),
|
||||
heartbeat: HeartbeatConfig::default(),
|
||||
channels_config: ChannelsConfig::default(),
|
||||
|
|
@ -319,7 +319,7 @@ pub fn run_quick_setup(
|
|||
browser: BrowserConfig::default(),
|
||||
http_request: crate::config::HttpRequestConfig::default(),
|
||||
identity: crate::config::IdentityConfig::default(),
|
||||
cost: crate::config::CostConfig::default(),
|
||||
cost: crate::config::schema::CostConfig::default(),
|
||||
hardware: HardwareConfig::default(),
|
||||
agents: std::collections::HashMap::new(),
|
||||
security: crate::config::SecurityConfig::default(),
|
||||
|
|
|
|||
|
|
@ -3,18 +3,48 @@
|
|||
//! By default this uses Vercel's `agent-browser` CLI for automation.
|
||||
//! Optionally, a Rust-native backend can be enabled at build time via
|
||||
//! `--features browser-native` and selected through config.
|
||||
//! Computer-use (OS-level) actions are supported via an optional sidecar endpoint.
|
||||
|
||||
use super::traits::{Tool, ToolResult};
|
||||
use crate::security::SecurityPolicy;
|
||||
use anyhow::Context;
|
||||
use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use std::net::ToSocketAddrs;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::process::Command;
|
||||
use tracing::debug;
|
||||
|
||||
/// Browser automation tool using agent-browser CLI
|
||||
/// Computer-use sidecar settings.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ComputerUseConfig {
|
||||
pub endpoint: String,
|
||||
pub api_key: Option<String>,
|
||||
pub timeout_ms: u64,
|
||||
pub allow_remote_endpoint: bool,
|
||||
pub window_allowlist: Vec<String>,
|
||||
pub max_coordinate_x: Option<i64>,
|
||||
pub max_coordinate_y: Option<i64>,
|
||||
}
|
||||
|
||||
impl Default for ComputerUseConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
endpoint: "http://127.0.0.1:8787/v1/actions".into(),
|
||||
api_key: None,
|
||||
timeout_ms: 15_000,
|
||||
allow_remote_endpoint: false,
|
||||
window_allowlist: Vec::new(),
|
||||
max_coordinate_x: None,
|
||||
max_coordinate_y: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Browser automation tool using pluggable backends.
|
||||
pub struct BrowserTool {
|
||||
security: Arc<SecurityPolicy>,
|
||||
allowed_domains: Vec<String>,
|
||||
|
|
@ -23,6 +53,7 @@ pub struct BrowserTool {
|
|||
native_headless: bool,
|
||||
native_webdriver_url: String,
|
||||
native_chrome_path: Option<String>,
|
||||
computer_use: ComputerUseConfig,
|
||||
#[cfg(feature = "browser-native")]
|
||||
native_state: tokio::sync::Mutex<native_backend::NativeBrowserState>,
|
||||
}
|
||||
|
|
@ -31,6 +62,7 @@ pub struct BrowserTool {
|
|||
enum BrowserBackendKind {
|
||||
AgentBrowser,
|
||||
RustNative,
|
||||
ComputerUse,
|
||||
Auto,
|
||||
}
|
||||
|
||||
|
|
@ -38,6 +70,7 @@ enum BrowserBackendKind {
|
|||
enum ResolvedBackend {
|
||||
AgentBrowser,
|
||||
RustNative,
|
||||
ComputerUse,
|
||||
}
|
||||
|
||||
impl BrowserBackendKind {
|
||||
|
|
@ -46,9 +79,10 @@ impl BrowserBackendKind {
|
|||
match key.as_str() {
|
||||
"agent_browser" | "agentbrowser" => Ok(Self::AgentBrowser),
|
||||
"rust_native" | "native" => Ok(Self::RustNative),
|
||||
"computer_use" | "computeruse" => Ok(Self::ComputerUse),
|
||||
"auto" => Ok(Self::Auto),
|
||||
_ => anyhow::bail!(
|
||||
"Unsupported browser backend '{raw}'. Use 'agent_browser', 'rust_native', or 'auto'"
|
||||
"Unsupported browser backend '{raw}'. Use 'agent_browser', 'rust_native', 'computer_use', or 'auto'"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
|
@ -57,6 +91,7 @@ impl BrowserBackendKind {
|
|||
match self {
|
||||
Self::AgentBrowser => "agent_browser",
|
||||
Self::RustNative => "rust_native",
|
||||
Self::ComputerUse => "computer_use",
|
||||
Self::Auto => "auto",
|
||||
}
|
||||
}
|
||||
|
|
@ -70,6 +105,17 @@ struct AgentBrowserResponse {
|
|||
error: Option<String>,
|
||||
}
|
||||
|
||||
/// Response format from computer-use sidecar.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ComputerUseResponse {
|
||||
#[serde(default)]
|
||||
success: Option<bool>,
|
||||
#[serde(default)]
|
||||
data: Option<Value>,
|
||||
#[serde(default)]
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
/// Supported browser actions
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
|
|
@ -151,9 +197,11 @@ impl BrowserTool {
|
|||
true,
|
||||
"http://127.0.0.1:9515".into(),
|
||||
None,
|
||||
ComputerUseConfig::default(),
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new_with_backend(
|
||||
security: Arc<SecurityPolicy>,
|
||||
allowed_domains: Vec<String>,
|
||||
|
|
@ -162,6 +210,7 @@ impl BrowserTool {
|
|||
native_headless: bool,
|
||||
native_webdriver_url: String,
|
||||
native_chrome_path: Option<String>,
|
||||
computer_use: ComputerUseConfig,
|
||||
) -> Self {
|
||||
Self {
|
||||
security,
|
||||
|
|
@ -171,6 +220,7 @@ impl BrowserTool {
|
|||
native_headless,
|
||||
native_webdriver_url,
|
||||
native_chrome_path,
|
||||
computer_use,
|
||||
#[cfg(feature = "browser-native")]
|
||||
native_state: tokio::sync::Mutex::new(native_backend::NativeBrowserState::default()),
|
||||
}
|
||||
|
|
@ -216,6 +266,52 @@ impl BrowserTool {
|
|||
}
|
||||
}
|
||||
|
||||
fn computer_use_endpoint_url(&self) -> anyhow::Result<reqwest::Url> {
|
||||
if self.computer_use.timeout_ms == 0 {
|
||||
anyhow::bail!("browser.computer_use.timeout_ms must be > 0");
|
||||
}
|
||||
|
||||
let endpoint = self.computer_use.endpoint.trim();
|
||||
if endpoint.is_empty() {
|
||||
anyhow::bail!("browser.computer_use.endpoint cannot be empty");
|
||||
}
|
||||
|
||||
let parsed = reqwest::Url::parse(endpoint).map_err(|_| {
|
||||
anyhow::anyhow!(
|
||||
"Invalid browser.computer_use.endpoint: '{endpoint}'. Expected http(s) URL"
|
||||
)
|
||||
})?;
|
||||
|
||||
let scheme = parsed.scheme();
|
||||
if scheme != "http" && scheme != "https" {
|
||||
anyhow::bail!("browser.computer_use.endpoint must use http:// or https://");
|
||||
}
|
||||
|
||||
let host = parsed
|
||||
.host_str()
|
||||
.ok_or_else(|| anyhow::anyhow!("browser.computer_use.endpoint must include host"))?;
|
||||
|
||||
let host_is_private = is_private_host(host);
|
||||
if !self.computer_use.allow_remote_endpoint && !host_is_private {
|
||||
anyhow::bail!(
|
||||
"browser.computer_use.endpoint host '{host}' is public. Set browser.computer_use.allow_remote_endpoint=true to allow it"
|
||||
);
|
||||
}
|
||||
|
||||
if self.computer_use.allow_remote_endpoint && !host_is_private && scheme != "https" {
|
||||
anyhow::bail!(
|
||||
"browser.computer_use.endpoint must use https:// when allow_remote_endpoint=true and host is public"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(parsed)
|
||||
}
|
||||
|
||||
fn computer_use_available(&self) -> anyhow::Result<bool> {
|
||||
let endpoint = self.computer_use_endpoint_url()?;
|
||||
Ok(endpoint_reachable(&endpoint, Duration::from_millis(500)))
|
||||
}
|
||||
|
||||
async fn resolve_backend(&self) -> anyhow::Result<ResolvedBackend> {
|
||||
let configured = self.configured_backend()?;
|
||||
|
||||
|
|
@ -243,6 +339,14 @@ impl BrowserTool {
|
|||
}
|
||||
Ok(ResolvedBackend::RustNative)
|
||||
}
|
||||
BrowserBackendKind::ComputerUse => {
|
||||
if !self.computer_use_available()? {
|
||||
anyhow::bail!(
|
||||
"browser.backend='computer_use' but sidecar endpoint is unreachable. Check browser.computer_use.endpoint and sidecar status"
|
||||
);
|
||||
}
|
||||
Ok(ResolvedBackend::ComputerUse)
|
||||
}
|
||||
BrowserBackendKind::Auto => {
|
||||
if Self::rust_native_compiled() && self.rust_native_available() {
|
||||
return Ok(ResolvedBackend::RustNative);
|
||||
|
|
@ -251,14 +355,31 @@ impl BrowserTool {
|
|||
return Ok(ResolvedBackend::AgentBrowser);
|
||||
}
|
||||
|
||||
let computer_use_err = match self.computer_use_available() {
|
||||
Ok(true) => return Ok(ResolvedBackend::ComputerUse),
|
||||
Ok(false) => None,
|
||||
Err(err) => Some(err.to_string()),
|
||||
};
|
||||
|
||||
if Self::rust_native_compiled() {
|
||||
if let Some(err) = computer_use_err {
|
||||
anyhow::bail!(
|
||||
"browser.backend='auto' found no usable backend (agent-browser missing, rust-native unavailable)"
|
||||
"browser.backend='auto' found no usable backend (agent-browser missing, rust-native unavailable, computer-use invalid: {err})"
|
||||
);
|
||||
}
|
||||
anyhow::bail!(
|
||||
"browser.backend='auto' found no usable backend (agent-browser missing, rust-native unavailable, computer-use sidecar unreachable)"
|
||||
)
|
||||
}
|
||||
|
||||
if let Some(err) = computer_use_err {
|
||||
anyhow::bail!(
|
||||
"browser.backend='auto' needs agent-browser CLI, or build with --features browser-native"
|
||||
"browser.backend='auto' needs agent-browser CLI, browser-native, or valid computer-use sidecar (error: {err})"
|
||||
);
|
||||
}
|
||||
|
||||
anyhow::bail!(
|
||||
"browser.backend='auto' needs agent-browser CLI, browser-native, or computer-use sidecar"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -523,6 +644,179 @@ impl BrowserTool {
|
|||
}
|
||||
}
|
||||
|
||||
fn validate_coordinate(&self, key: &str, value: i64, max: Option<i64>) -> anyhow::Result<()> {
|
||||
if value < 0 {
|
||||
anyhow::bail!("'{key}' must be >= 0")
|
||||
}
|
||||
if let Some(limit) = max {
|
||||
if limit < 0 {
|
||||
anyhow::bail!("Configured coordinate limit for '{key}' must be >= 0")
|
||||
}
|
||||
if value > limit {
|
||||
anyhow::bail!("'{key}'={value} exceeds configured limit {limit}")
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_required_i64(
|
||||
&self,
|
||||
params: &serde_json::Map<String, Value>,
|
||||
key: &str,
|
||||
) -> anyhow::Result<i64> {
|
||||
params
|
||||
.get(key)
|
||||
.and_then(Value::as_i64)
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing or invalid '{key}' parameter"))
|
||||
}
|
||||
|
||||
fn validate_computer_use_action(
|
||||
&self,
|
||||
action: &str,
|
||||
params: &serde_json::Map<String, Value>,
|
||||
) -> anyhow::Result<()> {
|
||||
match action {
|
||||
"open" => {
|
||||
let url = params
|
||||
.get("url")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'url' for open action"))?;
|
||||
self.validate_url(url)?;
|
||||
}
|
||||
"mouse_move" | "mouse_click" => {
|
||||
let x = self.read_required_i64(params, "x")?;
|
||||
let y = self.read_required_i64(params, "y")?;
|
||||
self.validate_coordinate("x", x, self.computer_use.max_coordinate_x)?;
|
||||
self.validate_coordinate("y", y, self.computer_use.max_coordinate_y)?;
|
||||
}
|
||||
"mouse_drag" => {
|
||||
let from_x = self.read_required_i64(params, "from_x")?;
|
||||
let from_y = self.read_required_i64(params, "from_y")?;
|
||||
let to_x = self.read_required_i64(params, "to_x")?;
|
||||
let to_y = self.read_required_i64(params, "to_y")?;
|
||||
self.validate_coordinate("from_x", from_x, self.computer_use.max_coordinate_x)?;
|
||||
self.validate_coordinate("to_x", to_x, self.computer_use.max_coordinate_x)?;
|
||||
self.validate_coordinate("from_y", from_y, self.computer_use.max_coordinate_y)?;
|
||||
self.validate_coordinate("to_y", to_y, self.computer_use.max_coordinate_y)?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn execute_computer_use_action(
|
||||
&self,
|
||||
action: &str,
|
||||
args: &Value,
|
||||
) -> anyhow::Result<ToolResult> {
|
||||
let endpoint = self.computer_use_endpoint_url()?;
|
||||
|
||||
let mut params = args
|
||||
.as_object()
|
||||
.cloned()
|
||||
.ok_or_else(|| anyhow::anyhow!("browser args must be a JSON object"))?;
|
||||
params.remove("action");
|
||||
|
||||
self.validate_computer_use_action(action, ¶ms)?;
|
||||
|
||||
let payload = json!({
|
||||
"action": action,
|
||||
"params": params,
|
||||
"policy": {
|
||||
"allowed_domains": self.allowed_domains,
|
||||
"window_allowlist": self.computer_use.window_allowlist,
|
||||
"max_coordinate_x": self.computer_use.max_coordinate_x,
|
||||
"max_coordinate_y": self.computer_use.max_coordinate_y,
|
||||
},
|
||||
"metadata": {
|
||||
"session_name": self.session_name,
|
||||
"source": "zeroclaw.browser",
|
||||
"version": env!("CARGO_PKG_VERSION"),
|
||||
}
|
||||
});
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let mut request = client
|
||||
.post(endpoint)
|
||||
.timeout(Duration::from_millis(self.computer_use.timeout_ms))
|
||||
.json(&payload);
|
||||
|
||||
if let Some(api_key) = self.computer_use.api_key.as_deref() {
|
||||
let token = api_key.trim();
|
||||
if !token.is_empty() {
|
||||
request = request.bearer_auth(token);
|
||||
}
|
||||
}
|
||||
|
||||
let response = request.send().await.with_context(|| {
|
||||
format!(
|
||||
"Failed to call computer-use sidecar at {}",
|
||||
self.computer_use.endpoint
|
||||
)
|
||||
})?;
|
||||
|
||||
let status = response.status();
|
||||
let body = response
|
||||
.text()
|
||||
.await
|
||||
.context("Failed to read computer-use sidecar response body")?;
|
||||
|
||||
if let Ok(parsed) = serde_json::from_str::<ComputerUseResponse>(&body) {
|
||||
if status.is_success() && parsed.success.unwrap_or(true) {
|
||||
let output = parsed
|
||||
.data
|
||||
.map(|data| serde_json::to_string_pretty(&data).unwrap_or_default())
|
||||
.unwrap_or_else(|| {
|
||||
serde_json::to_string_pretty(&json!({
|
||||
"backend": "computer_use",
|
||||
"action": action,
|
||||
"ok": true,
|
||||
}))
|
||||
.unwrap_or_default()
|
||||
});
|
||||
|
||||
return Ok(ToolResult {
|
||||
success: true,
|
||||
output,
|
||||
error: None,
|
||||
});
|
||||
}
|
||||
|
||||
let error = parsed.error.or_else(|| {
|
||||
if status.is_success() && parsed.success == Some(false) {
|
||||
Some("computer-use sidecar returned success=false".to_string())
|
||||
} else {
|
||||
Some(format!(
|
||||
"computer-use sidecar request failed with status {status}"
|
||||
))
|
||||
}
|
||||
});
|
||||
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error,
|
||||
});
|
||||
}
|
||||
|
||||
if status.is_success() {
|
||||
return Ok(ToolResult {
|
||||
success: true,
|
||||
output: body,
|
||||
error: None,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(format!(
|
||||
"computer-use sidecar request failed with status {status}: {}",
|
||||
body.trim()
|
||||
)),
|
||||
})
|
||||
}
|
||||
|
||||
async fn execute_action(
|
||||
&self,
|
||||
action: BrowserAction,
|
||||
|
|
@ -531,6 +825,9 @@ impl BrowserTool {
|
|||
match backend {
|
||||
ResolvedBackend::AgentBrowser => self.execute_agent_browser_action(action).await,
|
||||
ResolvedBackend::RustNative => self.execute_rust_native_action(action).await,
|
||||
ResolvedBackend::ComputerUse => anyhow::bail!(
|
||||
"Internal error: computer_use backend must be handled before BrowserAction parsing"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -564,10 +861,12 @@ impl Tool for BrowserTool {
|
|||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Web browser automation with pluggable backends (agent-browser or rust-native). \
|
||||
Supports navigation, clicking, filling forms, screenshots, and page snapshots. \
|
||||
Use 'snapshot' to map interactive elements to refs (@e1, @e2), then use refs for \
|
||||
precise interaction. Enforces browser.allowed_domains for open actions."
|
||||
concat!(
|
||||
"Web/browser automation with pluggable backends (agent-browser, rust-native, computer_use). ",
|
||||
"Supports DOM actions plus optional OS-level actions (mouse_move, mouse_click, mouse_drag, ",
|
||||
"key_type, key_press, screen_capture) through a computer-use sidecar. Use 'snapshot' to map ",
|
||||
"interactive elements to refs (@e1, @e2). Enforces browser.allowed_domains for open actions."
|
||||
)
|
||||
}
|
||||
|
||||
fn parameters_schema(&self) -> Value {
|
||||
|
|
@ -578,8 +877,10 @@ impl Tool for BrowserTool {
|
|||
"type": "string",
|
||||
"enum": ["open", "snapshot", "click", "fill", "type", "get_text",
|
||||
"get_title", "get_url", "screenshot", "wait", "press",
|
||||
"hover", "scroll", "is_visible", "close", "find"],
|
||||
"description": "Browser action to perform"
|
||||
"hover", "scroll", "is_visible", "close", "find",
|
||||
"mouse_move", "mouse_click", "mouse_drag", "key_type",
|
||||
"key_press", "screen_capture"],
|
||||
"description": "Browser action to perform (OS-level actions require backend=computer_use)"
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
|
|
@ -601,6 +902,35 @@ impl Tool for BrowserTool {
|
|||
"type": "string",
|
||||
"description": "Key to press (Enter, Tab, Escape, etc.)"
|
||||
},
|
||||
"x": {
|
||||
"type": "integer",
|
||||
"description": "Screen X coordinate (computer_use: mouse_move/mouse_click)"
|
||||
},
|
||||
"y": {
|
||||
"type": "integer",
|
||||
"description": "Screen Y coordinate (computer_use: mouse_move/mouse_click)"
|
||||
},
|
||||
"from_x": {
|
||||
"type": "integer",
|
||||
"description": "Drag source X coordinate (computer_use: mouse_drag)"
|
||||
},
|
||||
"from_y": {
|
||||
"type": "integer",
|
||||
"description": "Drag source Y coordinate (computer_use: mouse_drag)"
|
||||
},
|
||||
"to_x": {
|
||||
"type": "integer",
|
||||
"description": "Drag target X coordinate (computer_use: mouse_drag)"
|
||||
},
|
||||
"to_y": {
|
||||
"type": "integer",
|
||||
"description": "Drag target Y coordinate (computer_use: mouse_drag)"
|
||||
},
|
||||
"button": {
|
||||
"type": "string",
|
||||
"enum": ["left", "right", "middle"],
|
||||
"description": "Mouse button for computer_use mouse_click"
|
||||
},
|
||||
"direction": {
|
||||
"type": "string",
|
||||
"enum": ["up", "down", "left", "right"],
|
||||
|
|
@ -688,6 +1018,18 @@ impl Tool for BrowserTool {
|
|||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'action' parameter"))?;
|
||||
|
||||
if !is_supported_browser_action(action_str) {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(format!("Unknown action: {action_str}")),
|
||||
});
|
||||
}
|
||||
|
||||
if backend == ResolvedBackend::ComputerUse {
|
||||
return self.execute_computer_use_action(action_str, &args).await;
|
||||
}
|
||||
|
||||
let action = match action_str {
|
||||
"open" => {
|
||||
let url = args
|
||||
|
|
@ -839,7 +1181,14 @@ impl Tool for BrowserTool {
|
|||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(format!("Unknown action: {action_str}")),
|
||||
error: Some(format!(
|
||||
"Action '{action_str}' is unavailable for backend '{}'",
|
||||
match backend {
|
||||
ResolvedBackend::AgentBrowser => "agent_browser",
|
||||
ResolvedBackend::RustNative => "rust_native",
|
||||
ResolvedBackend::ComputerUse => "computer_use",
|
||||
}
|
||||
)),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
|
@ -1523,6 +1872,34 @@ mod native_backend {
|
|||
|
||||
// ── Helper functions ─────────────────────────────────────────────
|
||||
|
||||
fn is_supported_browser_action(action: &str) -> bool {
|
||||
matches!(
|
||||
action,
|
||||
"open"
|
||||
| "snapshot"
|
||||
| "click"
|
||||
| "fill"
|
||||
| "type"
|
||||
| "get_text"
|
||||
| "get_title"
|
||||
| "get_url"
|
||||
| "screenshot"
|
||||
| "wait"
|
||||
| "press"
|
||||
| "hover"
|
||||
| "scroll"
|
||||
| "is_visible"
|
||||
| "close"
|
||||
| "find"
|
||||
| "mouse_move"
|
||||
| "mouse_click"
|
||||
| "mouse_drag"
|
||||
| "key_type"
|
||||
| "key_press"
|
||||
| "screen_capture"
|
||||
)
|
||||
}
|
||||
|
||||
fn normalize_domains(domains: Vec<String>) -> Vec<String> {
|
||||
domains
|
||||
.into_iter()
|
||||
|
|
@ -1531,6 +1908,30 @@ fn normalize_domains(domains: Vec<String>) -> Vec<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn endpoint_reachable(endpoint: &reqwest::Url, timeout: Duration) -> bool {
|
||||
let host = match endpoint.host_str() {
|
||||
Some(host) if !host.is_empty() => host,
|
||||
_ => return false,
|
||||
};
|
||||
|
||||
let port = match endpoint.port_or_known_default() {
|
||||
Some(port) => port,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
let mut addrs = match (host, port).to_socket_addrs() {
|
||||
Ok(addrs) => addrs,
|
||||
Err(_) => return false,
|
||||
};
|
||||
|
||||
let addr = match addrs.next() {
|
||||
Some(addr) => addr,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
std::net::TcpStream::connect_timeout(&addr, timeout).is_ok()
|
||||
}
|
||||
|
||||
fn extract_host(url_str: &str) -> anyhow::Result<String> {
|
||||
// Simple host extraction without url crate
|
||||
let url = url_str.trim();
|
||||
|
|
@ -1746,6 +2147,10 @@ mod tests {
|
|||
BrowserBackendKind::parse("rust-native").unwrap(),
|
||||
BrowserBackendKind::RustNative
|
||||
);
|
||||
assert_eq!(
|
||||
BrowserBackendKind::parse("computer_use").unwrap(),
|
||||
BrowserBackendKind::ComputerUse
|
||||
);
|
||||
assert_eq!(
|
||||
BrowserBackendKind::parse("auto").unwrap(),
|
||||
BrowserBackendKind::Auto
|
||||
|
|
@ -1778,10 +2183,100 @@ mod tests {
|
|||
true,
|
||||
"http://127.0.0.1:9515".into(),
|
||||
None,
|
||||
ComputerUseConfig::default(),
|
||||
);
|
||||
assert_eq!(tool.configured_backend().unwrap(), BrowserBackendKind::Auto);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn browser_tool_accepts_computer_use_backend_config() {
|
||||
let security = Arc::new(SecurityPolicy::default());
|
||||
let tool = BrowserTool::new_with_backend(
|
||||
security,
|
||||
vec!["example.com".into()],
|
||||
None,
|
||||
"computer_use".into(),
|
||||
true,
|
||||
"http://127.0.0.1:9515".into(),
|
||||
None,
|
||||
ComputerUseConfig::default(),
|
||||
);
|
||||
assert_eq!(
|
||||
tool.configured_backend().unwrap(),
|
||||
BrowserBackendKind::ComputerUse
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn computer_use_endpoint_rejects_public_http_by_default() {
|
||||
let security = Arc::new(SecurityPolicy::default());
|
||||
let tool = BrowserTool::new_with_backend(
|
||||
security,
|
||||
vec!["example.com".into()],
|
||||
None,
|
||||
"computer_use".into(),
|
||||
true,
|
||||
"http://127.0.0.1:9515".into(),
|
||||
None,
|
||||
ComputerUseConfig {
|
||||
endpoint: "http://computer-use.example.com/v1/actions".into(),
|
||||
..ComputerUseConfig::default()
|
||||
},
|
||||
);
|
||||
|
||||
assert!(tool.computer_use_endpoint_url().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn computer_use_endpoint_requires_https_for_public_remote() {
|
||||
let security = Arc::new(SecurityPolicy::default());
|
||||
let tool = BrowserTool::new_with_backend(
|
||||
security,
|
||||
vec!["example.com".into()],
|
||||
None,
|
||||
"computer_use".into(),
|
||||
true,
|
||||
"http://127.0.0.1:9515".into(),
|
||||
None,
|
||||
ComputerUseConfig {
|
||||
endpoint: "https://computer-use.example.com/v1/actions".into(),
|
||||
allow_remote_endpoint: true,
|
||||
..ComputerUseConfig::default()
|
||||
},
|
||||
);
|
||||
|
||||
assert!(tool.computer_use_endpoint_url().is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn computer_use_coordinate_validation_applies_limits() {
|
||||
let security = Arc::new(SecurityPolicy::default());
|
||||
let tool = BrowserTool::new_with_backend(
|
||||
security,
|
||||
vec!["example.com".into()],
|
||||
None,
|
||||
"computer_use".into(),
|
||||
true,
|
||||
"http://127.0.0.1:9515".into(),
|
||||
None,
|
||||
ComputerUseConfig {
|
||||
max_coordinate_x: Some(100),
|
||||
max_coordinate_y: Some(100),
|
||||
..ComputerUseConfig::default()
|
||||
},
|
||||
);
|
||||
|
||||
assert!(tool
|
||||
.validate_coordinate("x", 50, tool.computer_use.max_coordinate_x)
|
||||
.is_ok());
|
||||
assert!(tool
|
||||
.validate_coordinate("x", 101, tool.computer_use.max_coordinate_x)
|
||||
.is_err());
|
||||
assert!(tool
|
||||
.validate_coordinate("y", -1, tool.computer_use.max_coordinate_y)
|
||||
.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn browser_tool_name() {
|
||||
let security = Arc::new(SecurityPolicy::default());
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ use super::traits::{Tool, ToolResult};
|
|||
use crate::security::{AutonomyLevel, SecurityPolicy};
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
#[cfg(test)]
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Git operations tool for structured repository management.
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ pub mod screenshot;
|
|||
pub mod shell;
|
||||
pub mod traits;
|
||||
|
||||
pub use browser::BrowserTool;
|
||||
pub use browser::{BrowserTool, ComputerUseConfig};
|
||||
pub use browser_open::BrowserOpenTool;
|
||||
pub use composio::ComposioTool;
|
||||
pub use delegate::DelegateTool;
|
||||
|
|
@ -131,6 +131,15 @@ pub fn all_tools_with_runtime(
|
|||
browser_config.native_headless,
|
||||
browser_config.native_webdriver_url.clone(),
|
||||
browser_config.native_chrome_path.clone(),
|
||||
ComputerUseConfig {
|
||||
endpoint: browser_config.computer_use.endpoint.clone(),
|
||||
api_key: browser_config.computer_use.api_key.clone(),
|
||||
timeout_ms: browser_config.computer_use.timeout_ms,
|
||||
allow_remote_endpoint: browser_config.computer_use.allow_remote_endpoint,
|
||||
window_allowlist: browser_config.computer_use.window_allowlist.clone(),
|
||||
max_coordinate_x: browser_config.computer_use.max_coordinate_x,
|
||||
max_coordinate_y: browser_config.computer_use.max_coordinate_y,
|
||||
},
|
||||
)));
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue