use std::time::Duration; /// Discrete events emitted by the agent runtime for observability. /// /// Each variant represents a lifecycle event that observers can record, /// aggregate, or forward to external monitoring systems. Events carry /// just enough context for tracing and diagnostics without exposing /// sensitive prompt or response content. #[derive(Debug, Clone)] pub enum ObserverEvent { /// The agent orchestration loop has started a new session. AgentStart { provider: String, model: String }, /// A request is about to be sent to an LLM provider. /// /// This is emitted immediately before a provider call so observers can print /// user-facing progress without leaking prompt contents. LlmRequest { provider: String, model: String, messages_count: usize, }, /// Result of a single LLM provider call. LlmResponse { provider: String, model: String, duration: Duration, success: bool, error_message: Option, }, /// The agent session has finished. /// /// Carries aggregate usage data (tokens, cost) when the provider reports it. AgentEnd { provider: String, model: String, duration: Duration, tokens_used: Option, cost_usd: Option, }, /// A tool call is about to be executed. ToolCallStart { tool: String }, /// A tool call has completed with a success/failure outcome. ToolCall { tool: String, duration: Duration, success: bool, }, /// The agent produced a final answer for the current user message. TurnComplete, /// A message was sent or received through a channel. ChannelMessage { /// Channel name (e.g., `"telegram"`, `"discord"`). channel: String, /// `"inbound"` or `"outbound"`. direction: String, }, /// Periodic heartbeat tick from the runtime keep-alive loop. HeartbeatTick, /// An error occurred in a named component. Error { /// Subsystem where the error originated (e.g., `"provider"`, `"gateway"`). component: String, /// Human-readable error description. Must not contain secrets or tokens. message: String, }, } /// Numeric metrics emitted by the agent runtime. /// /// Observers can aggregate these into dashboards, alerts, or structured logs. /// Each variant carries a single scalar value with implicit units. #[derive(Debug, Clone)] pub enum ObserverMetric { /// Time elapsed for a single LLM or tool request. RequestLatency(Duration), /// Number of tokens consumed by an LLM call. TokensUsed(u64), /// Current number of active concurrent sessions. ActiveSessions(u64), /// Current depth of the inbound message queue. QueueDepth(u64), } /// Core observability trait for recording agent runtime telemetry. /// /// Implement this trait to integrate with any monitoring backend (structured /// logging, Prometheus, OpenTelemetry, etc.). The agent runtime holds one or /// more `Observer` instances and calls [`record_event`](Observer::record_event) /// and [`record_metric`](Observer::record_metric) at key lifecycle points. /// /// Implementations must be `Send + Sync + 'static` because the observer is /// shared across async tasks via `Arc`. pub trait Observer: Send + Sync + 'static { /// Record a discrete lifecycle event. /// /// Called synchronously on the hot path; implementations should avoid /// blocking I/O. Buffer events internally and flush asynchronously /// when possible. fn record_event(&self, event: &ObserverEvent); /// Record a numeric metric sample. /// /// Called synchronously; same non-blocking guidance as /// [`record_event`](Observer::record_event). fn record_metric(&self, metric: &ObserverMetric); /// Flush any buffered telemetry data to the backend. /// /// The runtime calls this during graceful shutdown. The default /// implementation is a no-op, which is appropriate for backends /// that write synchronously. fn flush(&self) {} /// Return the human-readable name of this observer backend. /// /// Used in logs and diagnostics (e.g., `"console"`, `"prometheus"`, /// `"opentelemetry"`). fn name(&self) -> &str; /// Downcast to `Any` for backend-specific operations. /// /// Enables callers to access concrete observer types when needed /// (e.g., retrieving a Prometheus registry handle for custom metrics). fn as_any(&self) -> &dyn std::any::Any; } #[cfg(test)] mod tests { use super::*; use parking_lot::Mutex; use std::time::Duration; #[derive(Default)] struct DummyObserver { events: Mutex, metrics: Mutex, } impl Observer for DummyObserver { fn record_event(&self, _event: &ObserverEvent) { let mut guard = self.events.lock(); *guard += 1; } fn record_metric(&self, _metric: &ObserverMetric) { let mut guard = self.metrics.lock(); *guard += 1; } fn name(&self) -> &str { "dummy-observer" } fn as_any(&self) -> &dyn std::any::Any { self } } #[test] fn observer_records_events_and_metrics() { let observer = DummyObserver::default(); observer.record_event(&ObserverEvent::HeartbeatTick); observer.record_event(&ObserverEvent::Error { component: "test".into(), message: "boom".into(), }); observer.record_metric(&ObserverMetric::TokensUsed(42)); assert_eq!(*observer.events.lock(), 2); assert_eq!(*observer.metrics.lock(), 1); } #[test] fn observer_default_flush_and_as_any_work() { let observer = DummyObserver::default(); observer.flush(); assert_eq!(observer.name(), "dummy-observer"); assert!(observer.as_any().downcast_ref::().is_some()); } #[test] fn observer_event_and_metric_are_cloneable() { let event = ObserverEvent::ToolCall { tool: "shell".into(), duration: Duration::from_millis(10), success: true, }; let metric = ObserverMetric::RequestLatency(Duration::from_millis(8)); let cloned_event = event.clone(); let cloned_metric = metric.clone(); assert!(matches!(cloned_event, ObserverEvent::ToolCall { .. })); assert!(matches!(cloned_metric, ObserverMetric::RequestLatency(_))); } }