Merge pull request #1010 from zeroclaw-labs/fix/docs-trait-doc-comments
docs(code): expand doc comments on security, observability, runtime, and peripheral traits
This commit is contained in:
commit
200ce0d6fd
4 changed files with 202 additions and 40 deletions
|
|
@ -1,12 +1,15 @@
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
/// Events the observer can record
|
/// Discrete events emitted by the agent runtime for observability.
|
||||||
|
///
|
||||||
|
/// Each variant represents a lifecycle event that observers can record,
|
||||||
|
/// aggregate, or forward to external monitoring systems. Events carry
|
||||||
|
/// just enough context for tracing and diagnostics without exposing
|
||||||
|
/// sensitive prompt or response content.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum ObserverEvent {
|
pub enum ObserverEvent {
|
||||||
AgentStart {
|
/// The agent orchestration loop has started a new session.
|
||||||
provider: String,
|
AgentStart { provider: String, model: String },
|
||||||
model: String,
|
|
||||||
},
|
|
||||||
/// A request is about to be sent to an LLM provider.
|
/// A request is about to be sent to an LLM provider.
|
||||||
///
|
///
|
||||||
/// This is emitted immediately before a provider call so observers can print
|
/// This is emitted immediately before a provider call so observers can print
|
||||||
|
|
@ -24,6 +27,9 @@ pub enum ObserverEvent {
|
||||||
success: bool,
|
success: bool,
|
||||||
error_message: Option<String>,
|
error_message: Option<String>,
|
||||||
},
|
},
|
||||||
|
/// The agent session has finished.
|
||||||
|
///
|
||||||
|
/// Carries aggregate usage data (tokens, cost) when the provider reports it.
|
||||||
AgentEnd {
|
AgentEnd {
|
||||||
provider: String,
|
provider: String,
|
||||||
model: String,
|
model: String,
|
||||||
|
|
@ -32,9 +38,8 @@ pub enum ObserverEvent {
|
||||||
cost_usd: Option<f64>,
|
cost_usd: Option<f64>,
|
||||||
},
|
},
|
||||||
/// A tool call is about to be executed.
|
/// A tool call is about to be executed.
|
||||||
ToolCallStart {
|
ToolCallStart { tool: String },
|
||||||
tool: String,
|
/// A tool call has completed with a success/failure outcome.
|
||||||
},
|
|
||||||
ToolCall {
|
ToolCall {
|
||||||
tool: String,
|
tool: String,
|
||||||
duration: Duration,
|
duration: Duration,
|
||||||
|
|
@ -42,41 +47,80 @@ pub enum ObserverEvent {
|
||||||
},
|
},
|
||||||
/// The agent produced a final answer for the current user message.
|
/// The agent produced a final answer for the current user message.
|
||||||
TurnComplete,
|
TurnComplete,
|
||||||
|
/// A message was sent or received through a channel.
|
||||||
ChannelMessage {
|
ChannelMessage {
|
||||||
|
/// Channel name (e.g., `"telegram"`, `"discord"`).
|
||||||
channel: String,
|
channel: String,
|
||||||
|
/// `"inbound"` or `"outbound"`.
|
||||||
direction: String,
|
direction: String,
|
||||||
},
|
},
|
||||||
|
/// Periodic heartbeat tick from the runtime keep-alive loop.
|
||||||
HeartbeatTick,
|
HeartbeatTick,
|
||||||
|
/// An error occurred in a named component.
|
||||||
Error {
|
Error {
|
||||||
|
/// Subsystem where the error originated (e.g., `"provider"`, `"gateway"`).
|
||||||
component: String,
|
component: String,
|
||||||
|
/// Human-readable error description. Must not contain secrets or tokens.
|
||||||
message: String,
|
message: String,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Numeric metrics
|
/// Numeric metrics emitted by the agent runtime.
|
||||||
|
///
|
||||||
|
/// Observers can aggregate these into dashboards, alerts, or structured logs.
|
||||||
|
/// Each variant carries a single scalar value with implicit units.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum ObserverMetric {
|
pub enum ObserverMetric {
|
||||||
|
/// Time elapsed for a single LLM or tool request.
|
||||||
RequestLatency(Duration),
|
RequestLatency(Duration),
|
||||||
|
/// Number of tokens consumed by an LLM call.
|
||||||
TokensUsed(u64),
|
TokensUsed(u64),
|
||||||
|
/// Current number of active concurrent sessions.
|
||||||
ActiveSessions(u64),
|
ActiveSessions(u64),
|
||||||
|
/// Current depth of the inbound message queue.
|
||||||
QueueDepth(u64),
|
QueueDepth(u64),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Core observability trait — implement for any backend
|
/// Core observability trait for recording agent runtime telemetry.
|
||||||
|
///
|
||||||
|
/// Implement this trait to integrate with any monitoring backend (structured
|
||||||
|
/// logging, Prometheus, OpenTelemetry, etc.). The agent runtime holds one or
|
||||||
|
/// more `Observer` instances and calls [`record_event`](Observer::record_event)
|
||||||
|
/// and [`record_metric`](Observer::record_metric) at key lifecycle points.
|
||||||
|
///
|
||||||
|
/// Implementations must be `Send + Sync + 'static` because the observer is
|
||||||
|
/// shared across async tasks via `Arc`.
|
||||||
pub trait Observer: Send + Sync + 'static {
|
pub trait Observer: Send + Sync + 'static {
|
||||||
/// Record a discrete event
|
/// Record a discrete lifecycle event.
|
||||||
|
///
|
||||||
|
/// Called synchronously on the hot path; implementations should avoid
|
||||||
|
/// blocking I/O. Buffer events internally and flush asynchronously
|
||||||
|
/// when possible.
|
||||||
fn record_event(&self, event: &ObserverEvent);
|
fn record_event(&self, event: &ObserverEvent);
|
||||||
|
|
||||||
/// Record a numeric metric
|
/// Record a numeric metric sample.
|
||||||
|
///
|
||||||
|
/// Called synchronously; same non-blocking guidance as
|
||||||
|
/// [`record_event`](Observer::record_event).
|
||||||
fn record_metric(&self, metric: &ObserverMetric);
|
fn record_metric(&self, metric: &ObserverMetric);
|
||||||
|
|
||||||
/// Flush any buffered data (no-op for most backends)
|
/// Flush any buffered telemetry data to the backend.
|
||||||
|
///
|
||||||
|
/// The runtime calls this during graceful shutdown. The default
|
||||||
|
/// implementation is a no-op, which is appropriate for backends
|
||||||
|
/// that write synchronously.
|
||||||
fn flush(&self) {}
|
fn flush(&self) {}
|
||||||
|
|
||||||
/// Human-readable name of this observer
|
/// Return the human-readable name of this observer backend.
|
||||||
|
///
|
||||||
|
/// Used in logs and diagnostics (e.g., `"console"`, `"prometheus"`,
|
||||||
|
/// `"opentelemetry"`).
|
||||||
fn name(&self) -> &str;
|
fn name(&self) -> &str;
|
||||||
|
|
||||||
/// Downcast to `Any` for backend-specific operations
|
/// Downcast to `Any` for backend-specific operations.
|
||||||
|
///
|
||||||
|
/// Enables callers to access concrete observer types when needed
|
||||||
|
/// (e.g., retrieving a Prometheus registry handle for custom metrics).
|
||||||
fn as_any(&self) -> &dyn std::any::Any;
|
fn as_any(&self) -> &dyn std::any::Any;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,32 +2,74 @@
|
||||||
//!
|
//!
|
||||||
//! Peripherals are the agent's "arms and legs": remote devices that run minimal
|
//! Peripherals are the agent's "arms and legs": remote devices that run minimal
|
||||||
//! firmware and expose capabilities (GPIO, sensors, actuators) as tools.
|
//! firmware and expose capabilities (GPIO, sensors, actuators) as tools.
|
||||||
|
//! See `docs/hardware-peripherals-design.md` for the communication protocol
|
||||||
|
//! and firmware integration guide.
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
|
||||||
use crate::tools::Tool;
|
use crate::tools::Tool;
|
||||||
|
|
||||||
/// A hardware peripheral that exposes capabilities as tools.
|
/// A hardware peripheral that exposes capabilities as agent tools.
|
||||||
///
|
///
|
||||||
/// Implement this for boards like Nucleo-F401RE (serial), RPi GPIO (native), etc.
|
/// Implement this trait for each supported board type (e.g., Nucleo-F401RE
|
||||||
/// When connected, the peripheral's tools are merged into the agent's tool registry.
|
/// over serial, Raspberry Pi GPIO via sysfs/gpiod). When the agent connects
|
||||||
|
/// to a peripheral, the tools returned by [`tools`](Peripheral::tools) are
|
||||||
|
/// merged into the agent's tool registry, making hardware capabilities
|
||||||
|
/// available to the LLM as callable functions.
|
||||||
|
///
|
||||||
|
/// The lifecycle follows a connect → use → disconnect pattern. Implementations
|
||||||
|
/// must be `Send + Sync` because the peripheral may be accessed from multiple
|
||||||
|
/// async tasks after connection.
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait Peripheral: Send + Sync {
|
pub trait Peripheral: Send + Sync {
|
||||||
/// Human-readable peripheral name (e.g. "nucleo-f401re-0")
|
/// Return the human-readable instance name of this peripheral.
|
||||||
|
///
|
||||||
|
/// Should uniquely identify a specific device instance, including an index
|
||||||
|
/// or serial number when multiple boards of the same type are connected
|
||||||
|
/// (e.g., `"nucleo-f401re-0"`, `"rpi-gpio-hat-1"`).
|
||||||
fn name(&self) -> &str;
|
fn name(&self) -> &str;
|
||||||
|
|
||||||
/// Board type identifier (e.g. "nucleo-f401re", "rpi-gpio")
|
/// Return the board type identifier for this peripheral.
|
||||||
|
///
|
||||||
|
/// A stable, lowercase string used in configuration and factory registration
|
||||||
|
/// (e.g., `"nucleo-f401re"`, `"rpi-gpio"`). Must match the key used in
|
||||||
|
/// the config schema's peripheral section.
|
||||||
fn board_type(&self) -> &str;
|
fn board_type(&self) -> &str;
|
||||||
|
|
||||||
/// Connect to the peripheral (open serial, init GPIO, etc.)
|
/// Establish a connection to the peripheral hardware.
|
||||||
|
///
|
||||||
|
/// Opens the underlying transport (serial port, GPIO bus, I²C, etc.) and
|
||||||
|
/// performs any initialization handshake required by the firmware.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the device is unreachable, the transport cannot be
|
||||||
|
/// opened, or the firmware handshake fails.
|
||||||
async fn connect(&mut self) -> anyhow::Result<()>;
|
async fn connect(&mut self) -> anyhow::Result<()>;
|
||||||
|
|
||||||
/// Disconnect and release resources
|
/// Disconnect from the peripheral and release all held resources.
|
||||||
|
///
|
||||||
|
/// Closes serial ports, unexports GPIO pins, and performs any cleanup
|
||||||
|
/// required for a safe shutdown. After this call, [`health_check`](Peripheral::health_check)
|
||||||
|
/// should return `false` until [`connect`](Peripheral::connect) is called again.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if resource cleanup fails (e.g., serial port busy).
|
||||||
async fn disconnect(&mut self) -> anyhow::Result<()>;
|
async fn disconnect(&mut self) -> anyhow::Result<()>;
|
||||||
|
|
||||||
/// Check if the peripheral is reachable and responsive
|
/// Check whether the peripheral is reachable and responsive.
|
||||||
|
///
|
||||||
|
/// Performs a lightweight probe (e.g., a ping command over serial) without
|
||||||
|
/// altering device state. Returns `true` if the device responds within an
|
||||||
|
/// implementation-defined timeout.
|
||||||
async fn health_check(&self) -> bool;
|
async fn health_check(&self) -> bool;
|
||||||
|
|
||||||
/// Tools this peripheral provides (e.g. gpio_read, gpio_write, sensor_read)
|
/// Return the tools this peripheral exposes to the agent.
|
||||||
|
///
|
||||||
|
/// Each returned [`Tool`] delegates execution to the underlying hardware
|
||||||
|
/// (e.g., `gpio_read`, `gpio_write`, `sensor_read`). The agent merges
|
||||||
|
/// these into its tool registry after a successful
|
||||||
|
/// [`connect`](Peripheral::connect).
|
||||||
fn tools(&self) -> Vec<Box<dyn Tool>>;
|
fn tools(&self) -> Vec<Box<dyn Tool>>;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,29 +1,68 @@
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
/// Runtime adapter — abstracts platform differences so the same agent
|
/// Runtime adapter that abstracts platform differences for the agent.
|
||||||
/// code runs on native, Docker, Cloudflare Workers, Raspberry Pi, etc.
|
///
|
||||||
|
/// Implement this trait to port the agent to a new execution environment.
|
||||||
|
/// The adapter declares platform capabilities (shell access, filesystem,
|
||||||
|
/// long-running processes) and provides platform-specific implementations
|
||||||
|
/// for operations like spawning shell commands. The orchestration loop
|
||||||
|
/// queries these capabilities to adapt its behavior—for example, disabling
|
||||||
|
/// tool execution on runtimes without shell access.
|
||||||
|
///
|
||||||
|
/// Implementations must be `Send + Sync` because the adapter is shared
|
||||||
|
/// across async tasks on the Tokio runtime.
|
||||||
pub trait RuntimeAdapter: Send + Sync {
|
pub trait RuntimeAdapter: Send + Sync {
|
||||||
/// Human-readable runtime name
|
/// Return the human-readable name of this runtime environment.
|
||||||
|
///
|
||||||
|
/// Used in logs and diagnostics (e.g., `"native"`, `"docker"`,
|
||||||
|
/// `"cloudflare-workers"`).
|
||||||
fn name(&self) -> &str;
|
fn name(&self) -> &str;
|
||||||
|
|
||||||
/// Whether this runtime supports shell access
|
/// Report whether this runtime supports shell command execution.
|
||||||
|
///
|
||||||
|
/// When `false`, the agent disables shell-based tools. Serverless and
|
||||||
|
/// edge runtimes typically return `false`.
|
||||||
fn has_shell_access(&self) -> bool;
|
fn has_shell_access(&self) -> bool;
|
||||||
|
|
||||||
/// Whether this runtime supports filesystem access
|
/// Report whether this runtime supports filesystem read/write.
|
||||||
|
///
|
||||||
|
/// When `false`, the agent disables file-based tools and falls back to
|
||||||
|
/// in-memory storage.
|
||||||
fn has_filesystem_access(&self) -> bool;
|
fn has_filesystem_access(&self) -> bool;
|
||||||
|
|
||||||
/// Base storage path for this runtime
|
/// Return the base directory for persistent storage on this runtime.
|
||||||
|
///
|
||||||
|
/// Memory backends, logs, and other artifacts are stored under this path.
|
||||||
|
/// Implementations should return a platform-appropriate writable directory.
|
||||||
fn storage_path(&self) -> PathBuf;
|
fn storage_path(&self) -> PathBuf;
|
||||||
|
|
||||||
/// Whether long-running processes (gateway, heartbeat) are supported
|
/// Report whether this runtime supports long-running background processes.
|
||||||
|
///
|
||||||
|
/// When `true`, the agent may start the gateway server, heartbeat loop,
|
||||||
|
/// and other persistent tasks. Serverless runtimes with short execution
|
||||||
|
/// limits should return `false`.
|
||||||
fn supports_long_running(&self) -> bool;
|
fn supports_long_running(&self) -> bool;
|
||||||
|
|
||||||
/// Maximum memory budget in bytes (0 = unlimited)
|
/// Return the maximum memory budget in bytes for this runtime.
|
||||||
|
///
|
||||||
|
/// A value of `0` (the default) indicates no limit. Constrained
|
||||||
|
/// environments (embedded, serverless) should return their actual
|
||||||
|
/// memory ceiling so the agent can adapt buffer sizes and caching.
|
||||||
fn memory_budget(&self) -> u64 {
|
fn memory_budget(&self) -> u64 {
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Build a shell command process for this runtime.
|
/// Build a shell command process configured for this runtime.
|
||||||
|
///
|
||||||
|
/// Constructs a [`tokio::process::Command`] that will execute `command`
|
||||||
|
/// with `workspace_dir` as the working directory. Implementations may
|
||||||
|
/// prepend sandbox wrappers, set environment variables, or redirect
|
||||||
|
/// I/O as appropriate for the platform.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the runtime does not support shell access or if
|
||||||
|
/// the command cannot be constructed (e.g., missing shell binary).
|
||||||
fn build_shell_command(
|
fn build_shell_command(
|
||||||
&self,
|
&self,
|
||||||
command: &str,
|
command: &str,
|
||||||
|
|
|
||||||
|
|
@ -1,25 +1,62 @@
|
||||||
//! Sandbox trait for pluggable OS-level isolation
|
//! Sandbox trait for pluggable OS-level isolation.
|
||||||
|
//!
|
||||||
|
//! This module defines the [`Sandbox`] trait, which abstracts OS-level process
|
||||||
|
//! isolation backends. Implementations wrap shell commands with platform-specific
|
||||||
|
//! sandboxing (e.g., seccomp, AppArmor, namespaces) to limit the blast radius
|
||||||
|
//! of tool execution. The agent runtime selects and applies a sandbox backend
|
||||||
|
//! before executing any shell command.
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
|
||||||
/// Sandbox backend for OS-level isolation
|
/// Sandbox backend for OS-level process isolation.
|
||||||
|
///
|
||||||
|
/// Implement this trait to add a new sandboxing strategy. The runtime queries
|
||||||
|
/// [`is_available`](Sandbox::is_available) at startup to select the best
|
||||||
|
/// backend for the current platform, then calls
|
||||||
|
/// [`wrap_command`](Sandbox::wrap_command) before every shell execution.
|
||||||
|
///
|
||||||
|
/// Implementations must be `Send + Sync` because the sandbox may be shared
|
||||||
|
/// across concurrent tool executions on the Tokio runtime.
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait Sandbox: Send + Sync {
|
pub trait Sandbox: Send + Sync {
|
||||||
/// Wrap a command with sandbox protection
|
/// Wrap a command with sandbox protection.
|
||||||
|
///
|
||||||
|
/// Mutates `cmd` in place to apply isolation constraints (e.g., prepending
|
||||||
|
/// a wrapper binary, setting environment variables, adding seccomp filters).
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns `std::io::Error` if the sandbox configuration cannot be applied
|
||||||
|
/// (e.g., missing wrapper binary, invalid policy file).
|
||||||
fn wrap_command(&self, cmd: &mut Command) -> std::io::Result<()>;
|
fn wrap_command(&self, cmd: &mut Command) -> std::io::Result<()>;
|
||||||
|
|
||||||
/// Check if this sandbox backend is available on the current platform
|
/// Check if this sandbox backend is available on the current platform.
|
||||||
|
///
|
||||||
|
/// Returns `true` when all required kernel features, binaries, and
|
||||||
|
/// permissions are present. The runtime calls this at startup to select
|
||||||
|
/// the most capable available backend.
|
||||||
fn is_available(&self) -> bool;
|
fn is_available(&self) -> bool;
|
||||||
|
|
||||||
/// Human-readable name of this sandbox backend
|
/// Return the human-readable name of this sandbox backend.
|
||||||
|
///
|
||||||
|
/// Used in logs and diagnostics to identify which isolation strategy is
|
||||||
|
/// active (e.g., `"firejail"`, `"bubblewrap"`, `"none"`).
|
||||||
fn name(&self) -> &str;
|
fn name(&self) -> &str;
|
||||||
|
|
||||||
/// Description of what this sandbox provides
|
/// Return a brief description of the isolation guarantees this sandbox provides.
|
||||||
|
///
|
||||||
|
/// Displayed in status output and health checks so operators can verify
|
||||||
|
/// the active security posture.
|
||||||
fn description(&self) -> &str;
|
fn description(&self) -> &str;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// No-op sandbox (always available, provides no additional isolation)
|
/// No-op sandbox that provides no additional OS-level isolation.
|
||||||
|
///
|
||||||
|
/// Always reports itself as available. Use this as the fallback when no
|
||||||
|
/// platform-specific sandbox backend is detected, or in development
|
||||||
|
/// environments where isolation is not required. Security in this mode
|
||||||
|
/// relies entirely on application-layer controls.
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
pub struct NoopSandbox;
|
pub struct NoopSandbox;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue