From 25fd10a5386bf4ca5b773ba0e875c7a95e5d965c Mon Sep 17 00:00:00 2001 From: Alex Gorevski Date: Thu, 19 Feb 2026 13:04:36 -0800 Subject: [PATCH] docs(code): expand doc comments on security, observability, runtime, and peripheral traits The four underdocumented core trait files now include trait-level doc blocks explaining purpose and architecture role, method-level documentation with parameter/return/error descriptions, and public struct/enum documentation. This brings parity with the well-documented provider, channel, tool, and memory traits, giving extension developers clear guidance for implementing these core extension points. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/observability/traits.rs | 74 +++++++++++++++++++++++++++++-------- src/peripherals/traits.rs | 60 +++++++++++++++++++++++++----- src/runtime/traits.rs | 57 +++++++++++++++++++++++----- src/security/traits.rs | 51 +++++++++++++++++++++---- 4 files changed, 202 insertions(+), 40 deletions(-) diff --git a/src/observability/traits.rs b/src/observability/traits.rs index ea5f5d1..0249938 100644 --- a/src/observability/traits.rs +++ b/src/observability/traits.rs @@ -1,12 +1,15 @@ use std::time::Duration; -/// Events the observer can record +/// Discrete events emitted by the agent runtime for observability. +/// +/// Each variant represents a lifecycle event that observers can record, +/// aggregate, or forward to external monitoring systems. Events carry +/// just enough context for tracing and diagnostics without exposing +/// sensitive prompt or response content. #[derive(Debug, Clone)] pub enum ObserverEvent { - AgentStart { - provider: String, - model: String, - }, + /// The agent orchestration loop has started a new session. + AgentStart { provider: String, model: String }, /// A request is about to be sent to an LLM provider. /// /// This is emitted immediately before a provider call so observers can print @@ -24,6 +27,9 @@ pub enum ObserverEvent { success: bool, error_message: Option, }, + /// The agent session has finished. + /// + /// Carries aggregate usage data (tokens, cost) when the provider reports it. AgentEnd { provider: String, model: String, @@ -32,9 +38,8 @@ pub enum ObserverEvent { cost_usd: Option, }, /// A tool call is about to be executed. - ToolCallStart { - tool: String, - }, + ToolCallStart { tool: String }, + /// A tool call has completed with a success/failure outcome. ToolCall { tool: String, duration: Duration, @@ -42,41 +47,80 @@ pub enum ObserverEvent { }, /// The agent produced a final answer for the current user message. TurnComplete, + /// A message was sent or received through a channel. ChannelMessage { + /// Channel name (e.g., `"telegram"`, `"discord"`). channel: String, + /// `"inbound"` or `"outbound"`. direction: String, }, + /// Periodic heartbeat tick from the runtime keep-alive loop. HeartbeatTick, + /// An error occurred in a named component. Error { + /// Subsystem where the error originated (e.g., `"provider"`, `"gateway"`). component: String, + /// Human-readable error description. Must not contain secrets or tokens. message: String, }, } -/// Numeric metrics +/// Numeric metrics emitted by the agent runtime. +/// +/// Observers can aggregate these into dashboards, alerts, or structured logs. +/// Each variant carries a single scalar value with implicit units. #[derive(Debug, Clone)] pub enum ObserverMetric { + /// Time elapsed for a single LLM or tool request. RequestLatency(Duration), + /// Number of tokens consumed by an LLM call. TokensUsed(u64), + /// Current number of active concurrent sessions. ActiveSessions(u64), + /// Current depth of the inbound message queue. QueueDepth(u64), } -/// Core observability trait — implement for any backend +/// Core observability trait for recording agent runtime telemetry. +/// +/// Implement this trait to integrate with any monitoring backend (structured +/// logging, Prometheus, OpenTelemetry, etc.). The agent runtime holds one or +/// more `Observer` instances and calls [`record_event`](Observer::record_event) +/// and [`record_metric`](Observer::record_metric) at key lifecycle points. +/// +/// Implementations must be `Send + Sync + 'static` because the observer is +/// shared across async tasks via `Arc`. pub trait Observer: Send + Sync + 'static { - /// Record a discrete event + /// Record a discrete lifecycle event. + /// + /// Called synchronously on the hot path; implementations should avoid + /// blocking I/O. Buffer events internally and flush asynchronously + /// when possible. fn record_event(&self, event: &ObserverEvent); - /// Record a numeric metric + /// Record a numeric metric sample. + /// + /// Called synchronously; same non-blocking guidance as + /// [`record_event`](Observer::record_event). fn record_metric(&self, metric: &ObserverMetric); - /// Flush any buffered data (no-op for most backends) + /// Flush any buffered telemetry data to the backend. + /// + /// The runtime calls this during graceful shutdown. The default + /// implementation is a no-op, which is appropriate for backends + /// that write synchronously. fn flush(&self) {} - /// Human-readable name of this observer + /// Return the human-readable name of this observer backend. + /// + /// Used in logs and diagnostics (e.g., `"console"`, `"prometheus"`, + /// `"opentelemetry"`). fn name(&self) -> &str; - /// Downcast to `Any` for backend-specific operations + /// Downcast to `Any` for backend-specific operations. + /// + /// Enables callers to access concrete observer types when needed + /// (e.g., retrieving a Prometheus registry handle for custom metrics). fn as_any(&self) -> &dyn std::any::Any; } diff --git a/src/peripherals/traits.rs b/src/peripherals/traits.rs index 6081d1d..0e27065 100644 --- a/src/peripherals/traits.rs +++ b/src/peripherals/traits.rs @@ -2,32 +2,74 @@ //! //! Peripherals are the agent's "arms and legs": remote devices that run minimal //! firmware and expose capabilities (GPIO, sensors, actuators) as tools. +//! See `docs/hardware-peripherals-design.md` for the communication protocol +//! and firmware integration guide. use async_trait::async_trait; use crate::tools::Tool; -/// A hardware peripheral that exposes capabilities as tools. +/// A hardware peripheral that exposes capabilities as agent tools. /// -/// Implement this for boards like Nucleo-F401RE (serial), RPi GPIO (native), etc. -/// When connected, the peripheral's tools are merged into the agent's tool registry. +/// Implement this trait for each supported board type (e.g., Nucleo-F401RE +/// over serial, Raspberry Pi GPIO via sysfs/gpiod). When the agent connects +/// to a peripheral, the tools returned by [`tools`](Peripheral::tools) are +/// merged into the agent's tool registry, making hardware capabilities +/// available to the LLM as callable functions. +/// +/// The lifecycle follows a connect → use → disconnect pattern. Implementations +/// must be `Send + Sync` because the peripheral may be accessed from multiple +/// async tasks after connection. #[async_trait] pub trait Peripheral: Send + Sync { - /// Human-readable peripheral name (e.g. "nucleo-f401re-0") + /// Return the human-readable instance name of this peripheral. + /// + /// Should uniquely identify a specific device instance, including an index + /// or serial number when multiple boards of the same type are connected + /// (e.g., `"nucleo-f401re-0"`, `"rpi-gpio-hat-1"`). fn name(&self) -> &str; - /// Board type identifier (e.g. "nucleo-f401re", "rpi-gpio") + /// Return the board type identifier for this peripheral. + /// + /// A stable, lowercase string used in configuration and factory registration + /// (e.g., `"nucleo-f401re"`, `"rpi-gpio"`). Must match the key used in + /// the config schema's peripheral section. fn board_type(&self) -> &str; - /// Connect to the peripheral (open serial, init GPIO, etc.) + /// Establish a connection to the peripheral hardware. + /// + /// Opens the underlying transport (serial port, GPIO bus, I²C, etc.) and + /// performs any initialization handshake required by the firmware. + /// + /// # Errors + /// + /// Returns an error if the device is unreachable, the transport cannot be + /// opened, or the firmware handshake fails. async fn connect(&mut self) -> anyhow::Result<()>; - /// Disconnect and release resources + /// Disconnect from the peripheral and release all held resources. + /// + /// Closes serial ports, unexports GPIO pins, and performs any cleanup + /// required for a safe shutdown. After this call, [`health_check`](Peripheral::health_check) + /// should return `false` until [`connect`](Peripheral::connect) is called again. + /// + /// # Errors + /// + /// Returns an error if resource cleanup fails (e.g., serial port busy). async fn disconnect(&mut self) -> anyhow::Result<()>; - /// Check if the peripheral is reachable and responsive + /// Check whether the peripheral is reachable and responsive. + /// + /// Performs a lightweight probe (e.g., a ping command over serial) without + /// altering device state. Returns `true` if the device responds within an + /// implementation-defined timeout. async fn health_check(&self) -> bool; - /// Tools this peripheral provides (e.g. gpio_read, gpio_write, sensor_read) + /// Return the tools this peripheral exposes to the agent. + /// + /// Each returned [`Tool`] delegates execution to the underlying hardware + /// (e.g., `gpio_read`, `gpio_write`, `sensor_read`). The agent merges + /// these into its tool registry after a successful + /// [`connect`](Peripheral::connect). fn tools(&self) -> Vec>; } diff --git a/src/runtime/traits.rs b/src/runtime/traits.rs index 153c06f..7e3e06a 100644 --- a/src/runtime/traits.rs +++ b/src/runtime/traits.rs @@ -1,29 +1,68 @@ use std::path::{Path, PathBuf}; -/// Runtime adapter — abstracts platform differences so the same agent -/// code runs on native, Docker, Cloudflare Workers, Raspberry Pi, etc. +/// Runtime adapter that abstracts platform differences for the agent. +/// +/// Implement this trait to port the agent to a new execution environment. +/// The adapter declares platform capabilities (shell access, filesystem, +/// long-running processes) and provides platform-specific implementations +/// for operations like spawning shell commands. The orchestration loop +/// queries these capabilities to adapt its behavior—for example, disabling +/// tool execution on runtimes without shell access. +/// +/// Implementations must be `Send + Sync` because the adapter is shared +/// across async tasks on the Tokio runtime. pub trait RuntimeAdapter: Send + Sync { - /// Human-readable runtime name + /// Return the human-readable name of this runtime environment. + /// + /// Used in logs and diagnostics (e.g., `"native"`, `"docker"`, + /// `"cloudflare-workers"`). fn name(&self) -> &str; - /// Whether this runtime supports shell access + /// Report whether this runtime supports shell command execution. + /// + /// When `false`, the agent disables shell-based tools. Serverless and + /// edge runtimes typically return `false`. fn has_shell_access(&self) -> bool; - /// Whether this runtime supports filesystem access + /// Report whether this runtime supports filesystem read/write. + /// + /// When `false`, the agent disables file-based tools and falls back to + /// in-memory storage. fn has_filesystem_access(&self) -> bool; - /// Base storage path for this runtime + /// Return the base directory for persistent storage on this runtime. + /// + /// Memory backends, logs, and other artifacts are stored under this path. + /// Implementations should return a platform-appropriate writable directory. fn storage_path(&self) -> PathBuf; - /// Whether long-running processes (gateway, heartbeat) are supported + /// Report whether this runtime supports long-running background processes. + /// + /// When `true`, the agent may start the gateway server, heartbeat loop, + /// and other persistent tasks. Serverless runtimes with short execution + /// limits should return `false`. fn supports_long_running(&self) -> bool; - /// Maximum memory budget in bytes (0 = unlimited) + /// Return the maximum memory budget in bytes for this runtime. + /// + /// A value of `0` (the default) indicates no limit. Constrained + /// environments (embedded, serverless) should return their actual + /// memory ceiling so the agent can adapt buffer sizes and caching. fn memory_budget(&self) -> u64 { 0 } - /// Build a shell command process for this runtime. + /// Build a shell command process configured for this runtime. + /// + /// Constructs a [`tokio::process::Command`] that will execute `command` + /// with `workspace_dir` as the working directory. Implementations may + /// prepend sandbox wrappers, set environment variables, or redirect + /// I/O as appropriate for the platform. + /// + /// # Errors + /// + /// Returns an error if the runtime does not support shell access or if + /// the command cannot be constructed (e.g., missing shell binary). fn build_shell_command( &self, command: &str, diff --git a/src/security/traits.rs b/src/security/traits.rs index 06fc4ef..13e0738 100644 --- a/src/security/traits.rs +++ b/src/security/traits.rs @@ -1,25 +1,62 @@ -//! Sandbox trait for pluggable OS-level isolation +//! Sandbox trait for pluggable OS-level isolation. +//! +//! This module defines the [`Sandbox`] trait, which abstracts OS-level process +//! isolation backends. Implementations wrap shell commands with platform-specific +//! sandboxing (e.g., seccomp, AppArmor, namespaces) to limit the blast radius +//! of tool execution. The agent runtime selects and applies a sandbox backend +//! before executing any shell command. use async_trait::async_trait; use std::process::Command; -/// Sandbox backend for OS-level isolation +/// Sandbox backend for OS-level process isolation. +/// +/// Implement this trait to add a new sandboxing strategy. The runtime queries +/// [`is_available`](Sandbox::is_available) at startup to select the best +/// backend for the current platform, then calls +/// [`wrap_command`](Sandbox::wrap_command) before every shell execution. +/// +/// Implementations must be `Send + Sync` because the sandbox may be shared +/// across concurrent tool executions on the Tokio runtime. #[async_trait] pub trait Sandbox: Send + Sync { - /// Wrap a command with sandbox protection + /// Wrap a command with sandbox protection. + /// + /// Mutates `cmd` in place to apply isolation constraints (e.g., prepending + /// a wrapper binary, setting environment variables, adding seccomp filters). + /// + /// # Errors + /// + /// Returns `std::io::Error` if the sandbox configuration cannot be applied + /// (e.g., missing wrapper binary, invalid policy file). fn wrap_command(&self, cmd: &mut Command) -> std::io::Result<()>; - /// Check if this sandbox backend is available on the current platform + /// Check if this sandbox backend is available on the current platform. + /// + /// Returns `true` when all required kernel features, binaries, and + /// permissions are present. The runtime calls this at startup to select + /// the most capable available backend. fn is_available(&self) -> bool; - /// Human-readable name of this sandbox backend + /// Return the human-readable name of this sandbox backend. + /// + /// Used in logs and diagnostics to identify which isolation strategy is + /// active (e.g., `"firejail"`, `"bubblewrap"`, `"none"`). fn name(&self) -> &str; - /// Description of what this sandbox provides + /// Return a brief description of the isolation guarantees this sandbox provides. + /// + /// Displayed in status output and health checks so operators can verify + /// the active security posture. fn description(&self) -> &str; } -/// No-op sandbox (always available, provides no additional isolation) +/// No-op sandbox that provides no additional OS-level isolation. +/// +/// Always reports itself as available. Use this as the fallback when no +/// platform-specific sandbox backend is detected, or in development +/// environments where isolation is not required. Security in this mode +/// relies entirely on application-layer controls. #[derive(Debug, Clone, Default)] pub struct NoopSandbox;