diff --git a/Cargo.lock b/Cargo.lock index f0a6be7..e19c5c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4927,6 +4927,7 @@ dependencies = [ "prometheus", "prost", "rand 0.8.5", + "regex", "reqwest", "rppal", "rusqlite", diff --git a/Cargo.toml b/Cargo.toml index f2c097f..d1ba9ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -90,6 +90,7 @@ glob = "0.3" tokio-tungstenite = { version = "0.24", features = ["rustls-tls-webpki-roots"] } futures-util = { version = "0.3", default-features = false, features = ["sink"] } futures = "0.3" +regex = "1.10" hostname = "0.4.2" lettre = { version = "0.11.19", default-features = false, features = ["builder", "smtp-transport", "rustls-tls"] } mail-parser = "0.11.2" diff --git a/src/agent/loop_.rs b/src/agent/loop_.rs index 08ce859..81882d6 100644 --- a/src/agent/loop_.rs +++ b/src/agent/loop_.rs @@ -7,14 +7,70 @@ use crate::security::SecurityPolicy; use crate::tools::{self, Tool}; use crate::util::truncate_with_ellipsis; use anyhow::Result; +use regex::{Regex, RegexSet}; use std::fmt::Write; use std::io::Write as _; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use std::time::Instant; use uuid::Uuid; + /// Maximum agentic tool-use iterations per user message to prevent runaway loops. const MAX_TOOL_ITERATIONS: usize = 10; +static SENSITIVE_KEY_PATTERNS: LazyLock = LazyLock::new(|| { + RegexSet::new([ + r"(?i)token", + r"(?i)api[_-]?key", + r"(?i)password", + r"(?i)secret", + r"(?i)user[_-]?key", + r"(?i)bearer", + r"(?i)credential", + ]) + .unwrap() +}); + +static SENSITIVE_KV_REGEX: LazyLock = LazyLock::new(|| { + Regex::new(r#"(?i)(token|api[_-]?key|password|secret|user[_-]?key|bearer|credential)["']?\s*[:=]\s*(?:"([^"]{8,})"|'([^']{8,})'|([a-zA-Z0-9_\-\.]{8,}))"#).unwrap() +}); + +/// Scrub credentials from tool output to prevent accidental exfiltration. +/// Replaces known credential patterns with a redacted placeholder while preserving +/// a small prefix for context. +fn scrub_credentials(input: &str) -> String { + SENSITIVE_KV_REGEX + .replace_all(input, |caps: ®ex::Captures| { + let full_match = &caps[0]; + let key = &caps[1]; + let val = caps + .get(2) + .or(caps.get(3)) + .or(caps.get(4)) + .map(|m| m.as_str()) + .unwrap_or(""); + + // Preserve first 4 chars for context, then redact + let prefix = if val.len() > 4 { &val[..4] } else { "" }; + + if full_match.contains(':') { + if full_match.contains('"') { + format!("\"{}\": \"{}*[REDACTED]\"", key, prefix) + } else { + format!("{}: {}*[REDACTED]", key, prefix) + } + } else if full_match.contains('=') { + if full_match.contains('"') { + format!("{}=\"{}*[REDACTED]\"", key, prefix) + } else { + format!("{}={}*[REDACTED]", key, prefix) + } + } else { + format!("{}: {}*[REDACTED]", key, prefix) + } + }) + .to_string() +} + /// Trigger auto-compaction when non-system message count exceeds this threshold. const MAX_HISTORY_MESSAGES: usize = 50; @@ -608,7 +664,7 @@ pub(crate) async fn run_tool_call_loop( success: r.success, }); if r.success { - r.output + scrub_credentials(&r.output) } else { format!("Error: {}", r.error.unwrap_or_else(|| r.output)) } @@ -1222,6 +1278,25 @@ pub async fn process_message(config: Config, message: &str) -> Result { #[cfg(test)] mod tests { use super::*; + + #[test] + fn test_scrub_credentials() { + let input = "API_KEY=sk-1234567890abcdef; token: 1234567890; password=\"secret123456\""; + let scrubbed = scrub_credentials(input); + assert!(scrubbed.contains("API_KEY=sk-1*[REDACTED]")); + assert!(scrubbed.contains("token: 1234*[REDACTED]")); + assert!(scrubbed.contains("password=\"secr*[REDACTED]\"")); + assert!(!scrubbed.contains("abcdef")); + assert!(!scrubbed.contains("secret123456")); + } + + #[test] + fn test_scrub_credentials_json() { + let input = r#"{"api_key": "sk-1234567890", "other": "public"}"#; + let scrubbed = scrub_credentials(input); + assert!(scrubbed.contains("\"api_key\": \"sk-1*[REDACTED]\"")); + assert!(scrubbed.contains("public")); + } use crate::memory::{Memory, MemoryCategory, SqliteMemory}; use tempfile::TempDir;