323 lines
11 KiB
Rust
323 lines
11 KiB
Rust
use super::traits::{Tool, ToolResult};
|
|
use crate::security::SecurityPolicy;
|
|
use async_trait::async_trait;
|
|
use serde_json::json;
|
|
use std::fmt::Write;
|
|
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
|
|
/// Maximum time to wait for a screenshot command to complete.
|
|
const SCREENSHOT_TIMEOUT_SECS: u64 = 15;
|
|
/// Maximum base64 payload size to return (2 MB of base64 ≈ 1.5 MB image).
|
|
const MAX_BASE64_BYTES: usize = 2_097_152;
|
|
|
|
/// Tool for capturing screenshots using platform-native commands.
|
|
///
|
|
/// macOS: `screencapture`
|
|
/// Linux: tries `gnome-screenshot`, `scrot`, `import` (`ImageMagick`) in order.
|
|
pub struct ScreenshotTool {
|
|
security: Arc<SecurityPolicy>,
|
|
}
|
|
|
|
impl ScreenshotTool {
|
|
pub fn new(security: Arc<SecurityPolicy>) -> Self {
|
|
Self { security }
|
|
}
|
|
|
|
/// Determine the screenshot command for the current platform.
|
|
fn screenshot_command(output_path: &str) -> Option<Vec<String>> {
|
|
if cfg!(target_os = "macos") {
|
|
Some(vec![
|
|
"screencapture".into(),
|
|
"-x".into(), // no sound
|
|
output_path.into(),
|
|
])
|
|
} else if cfg!(target_os = "linux") {
|
|
Some(vec![
|
|
"sh".into(),
|
|
"-c".into(),
|
|
format!(
|
|
"if command -v gnome-screenshot >/dev/null 2>&1; then \
|
|
gnome-screenshot -f '{output_path}'; \
|
|
elif command -v scrot >/dev/null 2>&1; then \
|
|
scrot '{output_path}'; \
|
|
elif command -v import >/dev/null 2>&1; then \
|
|
import -window root '{output_path}'; \
|
|
else \
|
|
echo 'NO_SCREENSHOT_TOOL' >&2; exit 1; \
|
|
fi"
|
|
),
|
|
])
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// Execute the screenshot capture and return the result.
|
|
async fn capture(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
|
|
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
|
|
let filename = args
|
|
.get("filename")
|
|
.and_then(|v| v.as_str())
|
|
.map_or_else(|| format!("screenshot_{timestamp}.png"), String::from);
|
|
|
|
// Sanitize filename to prevent path traversal
|
|
let safe_name = PathBuf::from(&filename).file_name().map_or_else(
|
|
|| format!("screenshot_{timestamp}.png"),
|
|
|n| n.to_string_lossy().to_string(),
|
|
);
|
|
|
|
// Reject filenames with shell-breaking characters to prevent injection in sh -c
|
|
const SHELL_UNSAFE: &[char] = &[
|
|
'\'', '"', '`', '$', '\\', ';', '|', '&', '\n', '\0', '(', ')',
|
|
];
|
|
if safe_name.contains(SHELL_UNSAFE) {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some("Filename contains characters unsafe for shell execution".into()),
|
|
});
|
|
}
|
|
|
|
let output_path = self.security.workspace_dir.join(&safe_name);
|
|
let output_str = output_path.to_string_lossy().to_string();
|
|
|
|
let Some(mut cmd_args) = Self::screenshot_command(&output_str) else {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some("Screenshot not supported on this platform".into()),
|
|
});
|
|
};
|
|
|
|
// macOS region flags
|
|
if cfg!(target_os = "macos") {
|
|
if let Some(region) = args.get("region").and_then(|v| v.as_str()) {
|
|
match region {
|
|
"selection" => cmd_args.insert(1, "-s".into()),
|
|
"window" => cmd_args.insert(1, "-w".into()),
|
|
_ => {} // ignore unknown regions
|
|
}
|
|
}
|
|
}
|
|
|
|
let program = cmd_args.remove(0);
|
|
let result = tokio::time::timeout(
|
|
Duration::from_secs(SCREENSHOT_TIMEOUT_SECS),
|
|
tokio::process::Command::new(&program)
|
|
.args(&cmd_args)
|
|
.output(),
|
|
)
|
|
.await;
|
|
|
|
match result {
|
|
Ok(Ok(output)) => {
|
|
if !output.status.success() {
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
if stderr.contains("NO_SCREENSHOT_TOOL") {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(
|
|
"No screenshot tool found. Install gnome-screenshot, scrot, or ImageMagick."
|
|
.into(),
|
|
),
|
|
});
|
|
}
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("Screenshot command failed: {stderr}")),
|
|
});
|
|
}
|
|
|
|
Self::read_and_encode(&output_path).await
|
|
}
|
|
Ok(Err(e)) => Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("Failed to execute screenshot command: {e}")),
|
|
}),
|
|
Err(_) => Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!(
|
|
"Screenshot timed out after {SCREENSHOT_TIMEOUT_SECS}s"
|
|
)),
|
|
}),
|
|
}
|
|
}
|
|
|
|
/// Read the screenshot file and return base64-encoded result.
|
|
async fn read_and_encode(output_path: &std::path::Path) -> anyhow::Result<ToolResult> {
|
|
// Check file size before reading to prevent OOM on large screenshots
|
|
const MAX_RAW_BYTES: u64 = 1_572_864; // ~1.5 MB (base64 expands ~33%)
|
|
if let Ok(meta) = tokio::fs::metadata(output_path).await {
|
|
if meta.len() > MAX_RAW_BYTES {
|
|
return Ok(ToolResult {
|
|
success: true,
|
|
output: format!(
|
|
"Screenshot saved to: {}\nSize: {} bytes (too large to base64-encode inline)",
|
|
output_path.display(),
|
|
meta.len(),
|
|
),
|
|
error: None,
|
|
});
|
|
}
|
|
}
|
|
|
|
match tokio::fs::read(output_path).await {
|
|
Ok(bytes) => {
|
|
use base64::Engine;
|
|
let size = bytes.len();
|
|
let mut encoded = base64::engine::general_purpose::STANDARD.encode(&bytes);
|
|
let truncated = if encoded.len() > MAX_BASE64_BYTES {
|
|
encoded.truncate(encoded.floor_char_boundary(MAX_BASE64_BYTES));
|
|
true
|
|
} else {
|
|
false
|
|
};
|
|
|
|
let mut output_msg = format!(
|
|
"Screenshot saved to: {}\nSize: {size} bytes\nBase64 length: {}",
|
|
output_path.display(),
|
|
encoded.len(),
|
|
);
|
|
if truncated {
|
|
output_msg.push_str(" (truncated)");
|
|
}
|
|
let mime = match output_path.extension().and_then(|e| e.to_str()) {
|
|
Some("jpg" | "jpeg") => "image/jpeg",
|
|
Some("bmp") => "image/bmp",
|
|
Some("gif") => "image/gif",
|
|
Some("webp") => "image/webp",
|
|
_ => "image/png",
|
|
};
|
|
let _ = write!(output_msg, "\ndata:{mime};base64,{encoded}");
|
|
|
|
Ok(ToolResult {
|
|
success: true,
|
|
output: output_msg,
|
|
error: None,
|
|
})
|
|
}
|
|
Err(e) => Ok(ToolResult {
|
|
success: false,
|
|
output: format!("Screenshot saved to: {}", output_path.display()),
|
|
error: Some(format!("Failed to read screenshot file: {e}")),
|
|
}),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Tool for ScreenshotTool {
|
|
fn name(&self) -> &str {
|
|
"screenshot"
|
|
}
|
|
|
|
fn description(&self) -> &str {
|
|
"Capture a screenshot of the current screen. Returns the file path and base64-encoded PNG data."
|
|
}
|
|
|
|
fn parameters_schema(&self) -> serde_json::Value {
|
|
json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"filename": {
|
|
"type": "string",
|
|
"description": "Optional filename (default: screenshot_<timestamp>.png). Saved in workspace."
|
|
},
|
|
"region": {
|
|
"type": "string",
|
|
"description": "Optional region for macOS: 'selection' for interactive crop, 'window' for front window. Ignored on Linux."
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
|
|
if !self.security.can_act() {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some("Action blocked: autonomy is read-only".into()),
|
|
});
|
|
}
|
|
self.capture(args).await
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::security::{AutonomyLevel, SecurityPolicy};
|
|
|
|
fn test_security() -> Arc<SecurityPolicy> {
|
|
Arc::new(SecurityPolicy {
|
|
autonomy: AutonomyLevel::Full,
|
|
workspace_dir: std::env::temp_dir(),
|
|
..SecurityPolicy::default()
|
|
})
|
|
}
|
|
|
|
#[test]
|
|
fn screenshot_tool_name() {
|
|
let tool = ScreenshotTool::new(test_security());
|
|
assert_eq!(tool.name(), "screenshot");
|
|
}
|
|
|
|
#[test]
|
|
fn screenshot_tool_description() {
|
|
let tool = ScreenshotTool::new(test_security());
|
|
assert!(!tool.description().is_empty());
|
|
assert!(tool.description().contains("screenshot"));
|
|
}
|
|
|
|
#[test]
|
|
fn screenshot_tool_schema() {
|
|
let tool = ScreenshotTool::new(test_security());
|
|
let schema = tool.parameters_schema();
|
|
assert!(schema["properties"]["filename"].is_object());
|
|
assert!(schema["properties"]["region"].is_object());
|
|
}
|
|
|
|
#[test]
|
|
fn screenshot_tool_spec() {
|
|
let tool = ScreenshotTool::new(test_security());
|
|
let spec = tool.spec();
|
|
assert_eq!(spec.name, "screenshot");
|
|
assert!(spec.parameters.is_object());
|
|
}
|
|
|
|
#[test]
|
|
#[cfg(any(target_os = "macos", target_os = "linux"))]
|
|
fn screenshot_command_exists() {
|
|
let cmd = ScreenshotTool::screenshot_command("/tmp/test.png");
|
|
assert!(cmd.is_some());
|
|
let args = cmd.unwrap();
|
|
assert!(!args.is_empty());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn screenshot_rejects_shell_injection_filename() {
|
|
let tool = ScreenshotTool::new(test_security());
|
|
let result = tool
|
|
.execute(json!({"filename": "test'injection.png"}))
|
|
.await
|
|
.unwrap();
|
|
assert!(!result.success);
|
|
assert!(result.error.unwrap().contains("unsafe for shell execution"));
|
|
}
|
|
|
|
#[test]
|
|
fn screenshot_command_contains_output_path() {
|
|
let cmd = ScreenshotTool::screenshot_command("/tmp/my_screenshot.png").unwrap();
|
|
let joined = cmd.join(" ");
|
|
assert!(
|
|
joined.contains("/tmp/my_screenshot.png"),
|
|
"Command should contain the output path"
|
|
);
|
|
}
|
|
}
|