From 25fd10a5386bf4ca5b773ba0e875c7a95e5d965c Mon Sep 17 00:00:00 2001
From: Alex Gorevski <algore@microsoft.com>
Date: Thu, 19 Feb 2026 13:04:36 -0800
Subject: [PATCH] docs(code): expand doc comments on security, observability,
 runtime, and peripheral traits

The four underdocumented core trait files now include trait-level doc blocks
explaining purpose and architecture role, method-level documentation with
parameter/return/error descriptions, and public struct/enum documentation.

This brings parity with the well-documented provider, channel, tool, and
memory traits, giving extension developers clear guidance for implementing
these core extension points.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/observability/traits.rs | 74 +++++++++++++++++++++++++++++--------
 src/peripherals/traits.rs   | 60 +++++++++++++++++++++++++-----
 src/runtime/traits.rs       | 57 +++++++++++++++++++++++-----
 src/security/traits.rs      | 51 +++++++++++++++++++++----
 4 files changed, 202 insertions(+), 40 deletions(-)
diff --git a/src/observability/traits.rs b/src/observability/traits.rs
index ea5f5d1..0249938 100644
--- a/src/observability/traits.rs
+++ b/src/observability/traits.rs
@@ -1,12 +1,15 @@
 use std::time::Duration;
 
-/// Events the observer can record
+/// Discrete events emitted by the agent runtime for observability.
+///
+/// Each variant represents a lifecycle event that observers can record,
+/// aggregate, or forward to external monitoring systems. Events carry
+/// just enough context for tracing and diagnostics without exposing
+/// sensitive prompt or response content.
 #[derive(Debug, Clone)]
 pub enum ObserverEvent {
-    AgentStart {
-        provider: String,
-        model: String,
-    },
+    /// The agent orchestration loop has started a new session.
+    AgentStart { provider: String, model: String },
     /// A request is about to be sent to an LLM provider.
     ///
     /// This is emitted immediately before a provider call so observers can print
@@ -24,6 +27,9 @@ pub enum ObserverEvent {
         success: bool,
         error_message: Option<String>,
     },
+    /// The agent session has finished.
+    ///
+    /// Carries aggregate usage data (tokens, cost) when the provider reports it.
     AgentEnd {
         provider: String,
         model: String,
@@ -32,9 +38,8 @@ pub enum ObserverEvent {
         cost_usd: Option<f64>,
     },
     /// A tool call is about to be executed.
-    ToolCallStart {
-        tool: String,
-    },
+    ToolCallStart { tool: String },
+    /// A tool call has completed with a success/failure outcome.
     ToolCall {
         tool: String,
         duration: Duration,
@@ -42,41 +47,80 @@ pub enum ObserverEvent {
     },
     /// The agent produced a final answer for the current user message.
     TurnComplete,
+    /// A message was sent or received through a channel.
     ChannelMessage {
+        /// Channel name (e.g., `"telegram"`, `"discord"`).
         channel: String,
+        /// `"inbound"` or `"outbound"`.
         direction: String,
     },
+    /// Periodic heartbeat tick from the runtime keep-alive loop.
     HeartbeatTick,
+    /// An error occurred in a named component.
     Error {
+        /// Subsystem where the error originated (e.g., `"provider"`, `"gateway"`).
         component: String,
+        /// Human-readable error description. Must not contain secrets or tokens.
         message: String,
     },
 }
 
-/// Numeric metrics
+/// Numeric metrics emitted by the agent runtime.
+///
+/// Observers can aggregate these into dashboards, alerts, or structured logs.
+/// Each variant carries a single scalar value with implicit units.
 #[derive(Debug, Clone)]
 pub enum ObserverMetric {
+    /// Time elapsed for a single LLM or tool request.
     RequestLatency(Duration),
+    /// Number of tokens consumed by an LLM call.
     TokensUsed(u64),
+    /// Current number of active concurrent sessions.
     ActiveSessions(u64),
+    /// Current depth of the inbound message queue.
     QueueDepth(u64),
 }
 
-/// Core observability trait — implement for any backend
+/// Core observability trait for recording agent runtime telemetry.
+///
+/// Implement this trait to integrate with any monitoring backend (structured
+/// logging, Prometheus, OpenTelemetry, etc.). The agent runtime holds one or
+/// more `Observer` instances and calls [`record_event`](Observer::record_event)
+/// and [`record_metric`](Observer::record_metric) at key lifecycle points.
+///
+/// Implementations must be `Send + Sync + 'static` because the observer is
+/// shared across async tasks via `Arc`.
 pub trait Observer: Send + Sync + 'static {
-    /// Record a discrete event
+    /// Record a discrete lifecycle event.
+    ///
+    /// Called synchronously on the hot path; implementations should avoid
+    /// blocking I/O. Buffer events internally and flush asynchronously
+    /// when possible.
     fn record_event(&self, event: &ObserverEvent);
 
-    /// Record a numeric metric
+    /// Record a numeric metric sample.
+    ///
+    /// Called synchronously; same non-blocking guidance as
+    /// [`record_event`](Observer::record_event).
     fn record_metric(&self, metric: &ObserverMetric);
 
-    /// Flush any buffered data (no-op for most backends)
+    /// Flush any buffered telemetry data to the backend.
+    ///
+    /// The runtime calls this during graceful shutdown. The default
+    /// implementation is a no-op, which is appropriate for backends
+    /// that write synchronously.
     fn flush(&self) {}
 
-    /// Human-readable name of this observer
+    /// Return the human-readable name of this observer backend.
+    ///
+    /// Used in logs and diagnostics (e.g., `"console"`, `"prometheus"`,
+    /// `"opentelemetry"`).
     fn name(&self) -> &str;
 
-    /// Downcast to `Any` for backend-specific operations
+    /// Downcast to `Any` for backend-specific operations.
+    ///
+    /// Enables callers to access concrete observer types when needed
+    /// (e.g., retrieving a Prometheus registry handle for custom metrics).
     fn as_any(&self) -> &dyn std::any::Any;
 }
 
diff --git a/src/peripherals/traits.rs b/src/peripherals/traits.rs
index 6081d1d..0e27065 100644
--- a/src/peripherals/traits.rs
+++ b/src/peripherals/traits.rs
@@ -2,32 +2,74 @@
 //!
 //! Peripherals are the agent's "arms and legs": remote devices that run minimal
 //! firmware and expose capabilities (GPIO, sensors, actuators) as tools.
+//! See `docs/hardware-peripherals-design.md` for the communication protocol
+//! and firmware integration guide.
 
 use async_trait::async_trait;
 
 use crate::tools::Tool;
 
-/// A hardware peripheral that exposes capabilities as tools.
+/// A hardware peripheral that exposes capabilities as agent tools.
 ///
-/// Implement this for boards like Nucleo-F401RE (serial), RPi GPIO (native), etc.
-/// When connected, the peripheral's tools are merged into the agent's tool registry.
+/// Implement this trait for each supported board type (e.g., Nucleo-F401RE
+/// over serial, Raspberry Pi GPIO via sysfs/gpiod). When the agent connects
+/// to a peripheral, the tools returned by [`tools`](Peripheral::tools) are
+/// merged into the agent's tool registry, making hardware capabilities
+/// available to the LLM as callable functions.
+///
+/// The lifecycle follows a connect → use → disconnect pattern. Implementations
+/// must be `Send + Sync` because the peripheral may be accessed from multiple
+/// async tasks after connection.
 #[async_trait]
 pub trait Peripheral: Send + Sync {
-    /// Human-readable peripheral name (e.g. "nucleo-f401re-0")
+    /// Return the human-readable instance name of this peripheral.
+    ///
+    /// Should uniquely identify a specific device instance, including an index
+    /// or serial number when multiple boards of the same type are connected
+    /// (e.g., `"nucleo-f401re-0"`, `"rpi-gpio-hat-1"`).
     fn name(&self) -> &str;
 
-    /// Board type identifier (e.g. "nucleo-f401re", "rpi-gpio")
+    /// Return the board type identifier for this peripheral.
+    ///
+    /// A stable, lowercase string used in configuration and factory registration
+    /// (e.g., `"nucleo-f401re"`, `"rpi-gpio"`). Must match the key used in
+    /// the config schema's peripheral section.
     fn board_type(&self) -> &str;
 
-    /// Connect to the peripheral (open serial, init GPIO, etc.)
+    /// Establish a connection to the peripheral hardware.
+    ///
+    /// Opens the underlying transport (serial port, GPIO bus, I²C, etc.) and
+    /// performs any initialization handshake required by the firmware.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the device is unreachable, the transport cannot be
+    /// opened, or the firmware handshake fails.
     async fn connect(&mut self) -> anyhow::Result<()>;
 
-    /// Disconnect and release resources
+    /// Disconnect from the peripheral and release all held resources.
+    ///
+    /// Closes serial ports, unexports GPIO pins, and performs any cleanup
+    /// required for a safe shutdown. After this call, [`health_check`](Peripheral::health_check)
+    /// should return `false` until [`connect`](Peripheral::connect) is called again.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if resource cleanup fails (e.g., serial port busy).
     async fn disconnect(&mut self) -> anyhow::Result<()>;
 
-    /// Check if the peripheral is reachable and responsive
+    /// Check whether the peripheral is reachable and responsive.
+    ///
+    /// Performs a lightweight probe (e.g., a ping command over serial) without
+    /// altering device state. Returns `true` if the device responds within an
+    /// implementation-defined timeout.
     async fn health_check(&self) -> bool;
 
-    /// Tools this peripheral provides (e.g. gpio_read, gpio_write, sensor_read)
+    /// Return the tools this peripheral exposes to the agent.
+    ///
+    /// Each returned [`Tool`] delegates execution to the underlying hardware
+    /// (e.g., `gpio_read`, `gpio_write`, `sensor_read`). The agent merges
+    /// these into its tool registry after a successful
+    /// [`connect`](Peripheral::connect).
     fn tools(&self) -> Vec<Box<dyn Tool>>;
 }
diff --git a/src/runtime/traits.rs b/src/runtime/traits.rs
index 153c06f..7e3e06a 100644
--- a/src/runtime/traits.rs
+++ b/src/runtime/traits.rs
@@ -1,29 +1,68 @@
 use std::path::{Path, PathBuf};
 
-/// Runtime adapter — abstracts platform differences so the same agent
-/// code runs on native, Docker, Cloudflare Workers, Raspberry Pi, etc.
+/// Runtime adapter that abstracts platform differences for the agent.
+///
+/// Implement this trait to port the agent to a new execution environment.
+/// The adapter declares platform capabilities (shell access, filesystem,
+/// long-running processes) and provides platform-specific implementations
+/// for operations like spawning shell commands. The orchestration loop
+/// queries these capabilities to adapt its behavior—for example, disabling
+/// tool execution on runtimes without shell access.
+///
+/// Implementations must be `Send + Sync` because the adapter is shared
+/// across async tasks on the Tokio runtime.
 pub trait RuntimeAdapter: Send + Sync {
-    /// Human-readable runtime name
+    /// Return the human-readable name of this runtime environment.
+    ///
+    /// Used in logs and diagnostics (e.g., `"native"`, `"docker"`,
+    /// `"cloudflare-workers"`).
     fn name(&self) -> &str;
 
-    /// Whether this runtime supports shell access
+    /// Report whether this runtime supports shell command execution.
+    ///
+    /// When `false`, the agent disables shell-based tools. Serverless and
+    /// edge runtimes typically return `false`.
     fn has_shell_access(&self) -> bool;
 
-    /// Whether this runtime supports filesystem access
+    /// Report whether this runtime supports filesystem read/write.
+    ///
+    /// When `false`, the agent disables file-based tools and falls back to
+    /// in-memory storage.
     fn has_filesystem_access(&self) -> bool;
 
-    /// Base storage path for this runtime
+    /// Return the base directory for persistent storage on this runtime.
+    ///
+    /// Memory backends, logs, and other artifacts are stored under this path.
+    /// Implementations should return a platform-appropriate writable directory.
     fn storage_path(&self) -> PathBuf;
 
-    /// Whether long-running processes (gateway, heartbeat) are supported
+    /// Report whether this runtime supports long-running background processes.
+    ///
+    /// When `true`, the agent may start the gateway server, heartbeat loop,
+    /// and other persistent tasks. Serverless runtimes with short execution
+    /// limits should return `false`.
     fn supports_long_running(&self) -> bool;
 
-    /// Maximum memory budget in bytes (0 = unlimited)
+    /// Return the maximum memory budget in bytes for this runtime.
+    ///
+    /// A value of `0` (the default) indicates no limit. Constrained
+    /// environments (embedded, serverless) should return their actual
+    /// memory ceiling so the agent can adapt buffer sizes and caching.
     fn memory_budget(&self) -> u64 {
         0
     }
 
-    /// Build a shell command process for this runtime.
+    /// Build a shell command process configured for this runtime.
+    ///
+    /// Constructs a [`tokio::process::Command`] that will execute `command`
+    /// with `workspace_dir` as the working directory. Implementations may
+    /// prepend sandbox wrappers, set environment variables, or redirect
+    /// I/O as appropriate for the platform.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the runtime does not support shell access or if
+    /// the command cannot be constructed (e.g., missing shell binary).
     fn build_shell_command(
         &self,
         command: &str,
diff --git a/src/security/traits.rs b/src/security/traits.rs
index 06fc4ef..13e0738 100644
--- a/src/security/traits.rs
+++ b/src/security/traits.rs
@@ -1,25 +1,62 @@
-//! Sandbox trait for pluggable OS-level isolation
+//! Sandbox trait for pluggable OS-level isolation.
+//!
+//! This module defines the [`Sandbox`] trait, which abstracts OS-level process
+//! isolation backends. Implementations wrap shell commands with platform-specific
+//! sandboxing (e.g., seccomp, AppArmor, namespaces) to limit the blast radius
+//! of tool execution. The agent runtime selects and applies a sandbox backend
+//! before executing any shell command.
 
 use async_trait::async_trait;
 use std::process::Command;
 
-/// Sandbox backend for OS-level isolation
+/// Sandbox backend for OS-level process isolation.
+///
+/// Implement this trait to add a new sandboxing strategy. The runtime queries
+/// [`is_available`](Sandbox::is_available) at startup to select the best
+/// backend for the current platform, then calls
+/// [`wrap_command`](Sandbox::wrap_command) before every shell execution.
+///
+/// Implementations must be `Send + Sync` because the sandbox may be shared
+/// across concurrent tool executions on the Tokio runtime.
 #[async_trait]
 pub trait Sandbox: Send + Sync {
-    /// Wrap a command with sandbox protection
+    /// Wrap a command with sandbox protection.
+    ///
+    /// Mutates `cmd` in place to apply isolation constraints (e.g., prepending
+    /// a wrapper binary, setting environment variables, adding seccomp filters).
+    ///
+    /// # Errors
+    ///
+    /// Returns `std::io::Error` if the sandbox configuration cannot be applied
+    /// (e.g., missing wrapper binary, invalid policy file).
     fn wrap_command(&self, cmd: &mut Command) -> std::io::Result<()>;
 
-    /// Check if this sandbox backend is available on the current platform
+    /// Check if this sandbox backend is available on the current platform.
+    ///
+    /// Returns `true` when all required kernel features, binaries, and
+    /// permissions are present. The runtime calls this at startup to select
+    /// the most capable available backend.
     fn is_available(&self) -> bool;
 
-    /// Human-readable name of this sandbox backend
+    /// Return the human-readable name of this sandbox backend.
+    ///
+    /// Used in logs and diagnostics to identify which isolation strategy is
+    /// active (e.g., `"firejail"`, `"bubblewrap"`, `"none"`).
     fn name(&self) -> &str;
 
-    /// Description of what this sandbox provides
+    /// Return a brief description of the isolation guarantees this sandbox provides.
+    ///
+    /// Displayed in status output and health checks so operators can verify
+    /// the active security posture.
     fn description(&self) -> &str;
 }
 
-/// No-op sandbox (always available, provides no additional isolation)
+/// No-op sandbox that provides no additional OS-level isolation.
+///
+/// Always reports itself as available. Use this as the fallback when no
+/// platform-specific sandbox backend is detected, or in development
+/// environments where isolation is not required. Security in this mode
+/// relies entirely on application-layer controls.
 #[derive(Debug, Clone, Default)]
 pub struct NoopSandbox;