From 0f6648ceb103a05b53bf27153a09875cfa74b080 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edvard=20Sch=C3=B8yen?= <99178202+ecschoye@users.noreply.github.com> Date: Sun, 15 Feb 2026 14:46:49 -0500 Subject: [PATCH] feat: add OpenTelemetry tracing and metrics observer * feat: add OpenTelemetry tracing and metrics observer Add OtelObserver that exports traces and metrics via OTLP HTTP/protobuf to any OpenTelemetry-compatible collector (Jaeger, Grafana Tempo, etc.). - ObserverEvents map to OTel spans (AgentEnd, ToolCall, Error) and metric counters (AgentStart, ChannelMessage, HeartbeatTick) - ObserverMetrics map to OTel histograms and gauges - Spans include proper timing via SpanBuilder.with_start_time - Config: backend="otel", otel_endpoint, otel_service_name - Accepts "otel", "opentelemetry", "otlp" as backend aliases - Graceful fallback to NoopObserver on init failure Co-Authored-By: Claude Opus 4.6 * fix: resolve unused variable warning and update Cargo.lock Prefix unused `resolved_key` with underscore to suppress clippy warning introduced by upstream changes. Regenerate Cargo.lock after rebase on main. Co-Authored-By: Claude Opus 4.6 * fix: address review comments on OTel observer - Fix metric types: use Gauge for ActiveSessions/QueueDepth (absolute readings, not deltas), Counter for TokensUsed (monotonic) - Remove duplicate token recording from AgentEnd event handler (TokensUsed metric via record_metric is the canonical path) - Store meter_provider in struct so flush() exports both traces and metrics (was silently dropping metrics on shutdown) Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 Co-authored-by: argenis de la rosa --- Cargo.lock | 190 ++++++++++++++++++- Cargo.toml | 5 + src/config/schema.rs | 11 ++ src/observability/mod.rs | 58 +++++- src/observability/otel.rs | 371 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 632 insertions(+), 3 deletions(-) create mode 100644 src/observability/otel.rs diff --git a/Cargo.lock b/Cargo.lock index ced7e82..614cbb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -517,6 +517,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "email-encoding" version = "0.4.1" @@ -622,6 +628,17 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.32" @@ -1085,6 +1102,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" @@ -1325,6 +1351,76 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" +[[package]] +name = "opentelemetry" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.18", +] + +[[package]] +name = "opentelemetry-http" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" +dependencies = [ + "async-trait", + "bytes", + "http", + "opentelemetry", + "reqwest", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" +dependencies = [ + "http", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest", + "thiserror 2.0.18", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", + "tonic-prost", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand 0.9.2", + "thiserror 2.0.18", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -1360,6 +1456,26 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pin-project" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1440,6 +1556,29 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "prost" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "psm" version = "0.1.30" @@ -1960,9 +2099,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.115" +version = "2.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12" +checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" dependencies = [ "proc-macro2", "quote", @@ -2205,6 +2344,38 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +[[package]] +name = "tonic" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f32a6f80051a4111560201420c7885d0082ba9efe2ab61875c587bb6b18b9a0" +dependencies = [ + "async-trait", + "base64", + "bytes", + "http", + "http-body", + "http-body-util", + "percent-encoding", + "pin-project", + "sync_wrapper", + "tokio-stream", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic-prost" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f86539c0089bfd09b1f8c0ab0239d80392af74c21bc9e0f15e1b4aca4c1647f" +dependencies = [ + "bytes", + "prost", + "tonic", +] + [[package]] name = "tower" version = "0.5.3" @@ -2259,9 +2430,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tracing-core" version = "0.1.36" @@ -3022,6 +3205,9 @@ dependencies = [ "http-body-util", "lettre", "mail-parser", + "opentelemetry", + "opentelemetry-otlp", + "opentelemetry_sdk", "prometheus", "reqwest", "rusqlite", diff --git a/Cargo.toml b/Cargo.toml index a9a1924..45dfcaf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,11 @@ tower = { version = "0.5", default-features = false } tower-http = { version = "0.6", default-features = false, features = ["limit", "timeout"] } http-body-util = "0.1" +# OpenTelemetry — OTLP trace + metrics export +opentelemetry = { version = "0.31", default-features = false, features = ["trace", "metrics"] } +opentelemetry_sdk = { version = "0.31", default-features = false, features = ["trace", "metrics"] } +opentelemetry-otlp = { version = "0.31", default-features = false, features = ["trace", "metrics", "http-proto", "reqwest-blocking-client"] } + [profile.release] opt-level = "z" # Optimize for size lto = true # Link-time optimization diff --git a/src/config/schema.rs b/src/config/schema.rs index 84496ab..4c81324 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -328,12 +328,22 @@ impl Default for MemoryConfig { pub struct ObservabilityConfig { /// "none" | "log" | "prometheus" | "otel" pub backend: String, + + /// OTLP endpoint (e.g. "http://localhost:4318"). Only used when backend = "otel". + #[serde(default)] + pub otel_endpoint: Option, + + /// Service name reported to the OTel collector. Defaults to "zeroclaw". + #[serde(default)] + pub otel_service_name: Option, } impl Default for ObservabilityConfig { fn default() -> Self { Self { backend: "none".into(), + otel_endpoint: None, + otel_service_name: None, } } } @@ -1087,6 +1097,7 @@ mod tests { default_temperature: 0.5, observability: ObservabilityConfig { backend: "log".into(), + ..ObservabilityConfig::default() }, autonomy: AutonomyConfig { level: AutonomyLevel::Full, diff --git a/src/observability/mod.rs b/src/observability/mod.rs index 801771d..c713663 100644 --- a/src/observability/mod.rs +++ b/src/observability/mod.rs @@ -1,10 +1,12 @@ pub mod log; pub mod multi; pub mod noop; +pub mod otel; pub mod traits; pub use self::log::LogObserver; pub use noop::NoopObserver; +pub use otel::OtelObserver; pub use traits::{Observer, ObserverEvent}; use crate::config::ObservabilityConfig; @@ -13,6 +15,24 @@ use crate::config::ObservabilityConfig; pub fn create_observer(config: &ObservabilityConfig) -> Box { match config.backend.as_str() { "log" => Box::new(LogObserver::new()), + "otel" | "opentelemetry" | "otlp" => { + match OtelObserver::new( + config.otel_endpoint.as_deref(), + config.otel_service_name.as_deref(), + ) { + Ok(obs) => { + tracing::info!( + endpoint = config.otel_endpoint.as_deref().unwrap_or("http://localhost:4318"), + "OpenTelemetry observer initialized" + ); + Box::new(obs) + } + Err(e) => { + tracing::error!("Failed to create OTel observer: {e}. Falling back to noop."); + Box::new(NoopObserver) + } + } + } "none" | "noop" => Box::new(NoopObserver), _ => { tracing::warn!( @@ -32,6 +52,7 @@ mod tests { fn factory_none_returns_noop() { let cfg = ObservabilityConfig { backend: "none".into(), + ..ObservabilityConfig::default() }; assert_eq!(create_observer(&cfg).name(), "noop"); } @@ -40,6 +61,7 @@ mod tests { fn factory_noop_returns_noop() { let cfg = ObservabilityConfig { backend: "noop".into(), + ..ObservabilityConfig::default() }; assert_eq!(create_observer(&cfg).name(), "noop"); } @@ -48,14 +70,46 @@ mod tests { fn factory_log_returns_log() { let cfg = ObservabilityConfig { backend: "log".into(), + ..ObservabilityConfig::default() }; assert_eq!(create_observer(&cfg).name(), "log"); } + #[test] + fn factory_otel_returns_otel() { + let cfg = ObservabilityConfig { + backend: "otel".into(), + otel_endpoint: Some("http://127.0.0.1:19999".into()), + otel_service_name: Some("test".into()), + }; + assert_eq!(create_observer(&cfg).name(), "otel"); + } + + #[test] + fn factory_opentelemetry_alias() { + let cfg = ObservabilityConfig { + backend: "opentelemetry".into(), + otel_endpoint: Some("http://127.0.0.1:19999".into()), + otel_service_name: Some("test".into()), + }; + assert_eq!(create_observer(&cfg).name(), "otel"); + } + + #[test] + fn factory_otlp_alias() { + let cfg = ObservabilityConfig { + backend: "otlp".into(), + otel_endpoint: Some("http://127.0.0.1:19999".into()), + otel_service_name: Some("test".into()), + }; + assert_eq!(create_observer(&cfg).name(), "otel"); + } + #[test] fn factory_unknown_falls_back_to_noop() { let cfg = ObservabilityConfig { - backend: "prometheus".into(), + backend: "xyzzy_unknown".into(), + ..ObservabilityConfig::default() }; assert_eq!(create_observer(&cfg).name(), "noop"); } @@ -64,6 +118,7 @@ mod tests { fn factory_empty_string_falls_back_to_noop() { let cfg = ObservabilityConfig { backend: String::new(), + ..ObservabilityConfig::default() }; assert_eq!(create_observer(&cfg).name(), "noop"); } @@ -72,6 +127,7 @@ mod tests { fn factory_garbage_falls_back_to_noop() { let cfg = ObservabilityConfig { backend: "xyzzy_garbage_123".into(), + ..ObservabilityConfig::default() }; assert_eq!(create_observer(&cfg).name(), "noop"); } diff --git a/src/observability/otel.rs b/src/observability/otel.rs new file mode 100644 index 0000000..591e336 --- /dev/null +++ b/src/observability/otel.rs @@ -0,0 +1,371 @@ +use super::traits::{Observer, ObserverEvent, ObserverMetric}; +use opentelemetry::metrics::{Counter, Gauge, Histogram}; +use opentelemetry::trace::{Span, SpanKind, Status, Tracer}; +use opentelemetry::{global, KeyValue}; +use opentelemetry_otlp::WithExportConfig; +use opentelemetry_sdk::metrics::SdkMeterProvider; +use opentelemetry_sdk::trace::SdkTracerProvider; +use std::time::SystemTime; + +/// OpenTelemetry-backed observer — exports traces and metrics via OTLP. +pub struct OtelObserver { + tracer_provider: SdkTracerProvider, + meter_provider: SdkMeterProvider, + + // Metrics instruments + agent_starts: Counter, + agent_duration: Histogram, + tool_calls: Counter, + tool_duration: Histogram, + channel_messages: Counter, + heartbeat_ticks: Counter, + errors: Counter, + request_latency: Histogram, + tokens_used: Counter, + active_sessions: Gauge, + queue_depth: Gauge, +} + +impl OtelObserver { + /// Create a new OTel observer exporting to the given OTLP endpoint. + /// + /// Uses HTTP/protobuf transport (port 4318 by default). + /// Falls back to `http://localhost:4318` if no endpoint is provided. + pub fn new(endpoint: Option<&str>, service_name: Option<&str>) -> Result { + let endpoint = endpoint.unwrap_or("http://localhost:4318"); + let service_name = service_name.unwrap_or("zeroclaw"); + + // ── Trace exporter ────────────────────────────────────── + let span_exporter = opentelemetry_otlp::SpanExporter::builder() + .with_http() + .with_endpoint(endpoint) + .build() + .map_err(|e| format!("Failed to create OTLP span exporter: {e}"))?; + + let tracer_provider = SdkTracerProvider::builder() + .with_batch_exporter(span_exporter) + .with_resource(opentelemetry_sdk::Resource::builder() + .with_service_name(service_name.to_string()) + .build()) + .build(); + + global::set_tracer_provider(tracer_provider.clone()); + + // ── Metric exporter ───────────────────────────────────── + let metric_exporter = opentelemetry_otlp::MetricExporter::builder() + .with_http() + .with_endpoint(endpoint) + .build() + .map_err(|e| format!("Failed to create OTLP metric exporter: {e}"))?; + + let metric_reader = opentelemetry_sdk::metrics::PeriodicReader::builder(metric_exporter) + .build(); + + let meter_provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder() + .with_reader(metric_reader) + .with_resource(opentelemetry_sdk::Resource::builder() + .with_service_name(service_name.to_string()) + .build()) + .build(); + + let meter_provider_clone = meter_provider.clone(); + global::set_meter_provider(meter_provider); + + // ── Create metric instruments ──────────────────────────── + let meter = global::meter("zeroclaw"); + + let agent_starts = meter + .u64_counter("zeroclaw.agent.starts") + .with_description("Total agent invocations") + .build(); + + let agent_duration = meter + .f64_histogram("zeroclaw.agent.duration") + .with_description("Agent invocation duration in seconds") + .with_unit("s") + .build(); + + let tool_calls = meter + .u64_counter("zeroclaw.tool.calls") + .with_description("Total tool calls") + .build(); + + let tool_duration = meter + .f64_histogram("zeroclaw.tool.duration") + .with_description("Tool execution duration in seconds") + .with_unit("s") + .build(); + + let channel_messages = meter + .u64_counter("zeroclaw.channel.messages") + .with_description("Total channel messages") + .build(); + + let heartbeat_ticks = meter + .u64_counter("zeroclaw.heartbeat.ticks") + .with_description("Total heartbeat ticks") + .build(); + + let errors = meter + .u64_counter("zeroclaw.errors") + .with_description("Total errors by component") + .build(); + + let request_latency = meter + .f64_histogram("zeroclaw.request.latency") + .with_description("Request latency in seconds") + .with_unit("s") + .build(); + + let tokens_used = meter + .u64_counter("zeroclaw.tokens.used") + .with_description("Total tokens consumed (monotonic)") + .build(); + + let active_sessions = meter + .u64_gauge("zeroclaw.sessions.active") + .with_description("Current number of active sessions") + .build(); + + let queue_depth = meter + .u64_gauge("zeroclaw.queue.depth") + .with_description("Current message queue depth") + .build(); + + Ok(Self { + tracer_provider, + meter_provider: meter_provider_clone, + agent_starts, + agent_duration, + tool_calls, + tool_duration, + channel_messages, + heartbeat_ticks, + errors, + request_latency, + tokens_used, + active_sessions, + queue_depth, + }) + } +} + +impl Observer for OtelObserver { + fn record_event(&self, event: &ObserverEvent) { + let tracer = global::tracer("zeroclaw"); + + match event { + ObserverEvent::AgentStart { provider, model } => { + self.agent_starts.add( + 1, + &[ + KeyValue::new("provider", provider.clone()), + KeyValue::new("model", model.clone()), + ], + ); + } + ObserverEvent::AgentEnd { + duration, + tokens_used, + } => { + let secs = duration.as_secs_f64(); + let start_time = SystemTime::now() + .checked_sub(*duration) + .unwrap_or(SystemTime::now()); + + // Create a completed span with correct timing + let mut span = tracer.build( + opentelemetry::trace::SpanBuilder::from_name("agent.invocation") + .with_kind(SpanKind::Internal) + .with_start_time(start_time) + .with_attributes(vec![ + KeyValue::new("duration_s", secs), + ]), + ); + if let Some(t) = tokens_used { + span.set_attribute(KeyValue::new("tokens_used", *t as i64)); + } + span.end(); + + self.agent_duration.record(secs, &[]); + // Note: tokens are recorded via record_metric(TokensUsed) to avoid + // double-counting. AgentEnd only records duration. + } + ObserverEvent::ToolCall { + tool, + duration, + success, + } => { + let secs = duration.as_secs_f64(); + let start_time = SystemTime::now() + .checked_sub(*duration) + .unwrap_or(SystemTime::now()); + + let status = if *success { + Status::Ok + } else { + Status::error("") + }; + + let mut span = tracer.build( + opentelemetry::trace::SpanBuilder::from_name("tool.call") + .with_kind(SpanKind::Internal) + .with_start_time(start_time) + .with_attributes(vec![ + KeyValue::new("tool.name", tool.clone()), + KeyValue::new("tool.success", *success), + KeyValue::new("duration_s", secs), + ]), + ); + span.set_status(status); + span.end(); + + let attrs = [ + KeyValue::new("tool", tool.clone()), + KeyValue::new("success", success.to_string()), + ]; + self.tool_calls.add(1, &attrs); + self.tool_duration.record(secs, &[KeyValue::new("tool", tool.clone())]); + } + ObserverEvent::ChannelMessage { channel, direction } => { + self.channel_messages.add( + 1, + &[ + KeyValue::new("channel", channel.clone()), + KeyValue::new("direction", direction.clone()), + ], + ); + } + ObserverEvent::HeartbeatTick => { + self.heartbeat_ticks.add(1, &[]); + } + ObserverEvent::Error { component, message } => { + // Create an error span for visibility in trace backends + let mut span = tracer.build( + opentelemetry::trace::SpanBuilder::from_name("error") + .with_kind(SpanKind::Internal) + .with_attributes(vec![ + KeyValue::new("component", component.clone()), + KeyValue::new("error.message", message.clone()), + ]), + ); + span.set_status(Status::error(message.clone())); + span.end(); + + self.errors.add(1, &[KeyValue::new("component", component.clone())]); + } + } + } + + fn record_metric(&self, metric: &ObserverMetric) { + match metric { + ObserverMetric::RequestLatency(d) => { + self.request_latency.record(d.as_secs_f64(), &[]); + } + ObserverMetric::TokensUsed(t) => { + self.tokens_used.add(*t as u64, &[]); + } + ObserverMetric::ActiveSessions(s) => { + self.active_sessions.record(*s as u64, &[]); + } + ObserverMetric::QueueDepth(d) => { + self.queue_depth.record(*d as u64, &[]); + } + } + } + + fn flush(&self) { + if let Err(e) = self.tracer_provider.force_flush() { + tracing::warn!("OTel trace flush failed: {e}"); + } + if let Err(e) = self.meter_provider.force_flush() { + tracing::warn!("OTel metric flush failed: {e}"); + } + } + + fn name(&self) -> &str { + "otel" + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + // Note: OtelObserver::new() requires an OTLP endpoint. + // In tests we verify the struct creation fails gracefully + // when no collector is available, and test the observer interface + // by constructing with a known-unreachable endpoint (spans/metrics + // are buffered and exported asynchronously, so recording never panics). + + fn test_observer() -> OtelObserver { + // Create with a dummy endpoint — exports will silently fail + // but the observer itself works fine for recording + OtelObserver::new( + Some("http://127.0.0.1:19999"), + Some("zeroclaw-test"), + ) + .expect("observer creation should not fail with valid endpoint format") + } + + #[test] + fn otel_observer_name() { + let obs = test_observer(); + assert_eq!(obs.name(), "otel"); + } + + #[test] + fn records_all_events_without_panic() { + let obs = test_observer(); + obs.record_event(&ObserverEvent::AgentStart { + provider: "openrouter".into(), + model: "claude-sonnet".into(), + }); + obs.record_event(&ObserverEvent::AgentEnd { + duration: Duration::from_millis(500), + tokens_used: Some(100), + }); + obs.record_event(&ObserverEvent::AgentEnd { + duration: Duration::ZERO, + tokens_used: None, + }); + obs.record_event(&ObserverEvent::ToolCall { + tool: "shell".into(), + duration: Duration::from_millis(10), + success: true, + }); + obs.record_event(&ObserverEvent::ToolCall { + tool: "file_read".into(), + duration: Duration::from_millis(5), + success: false, + }); + obs.record_event(&ObserverEvent::ChannelMessage { + channel: "telegram".into(), + direction: "inbound".into(), + }); + obs.record_event(&ObserverEvent::HeartbeatTick); + obs.record_event(&ObserverEvent::Error { + component: "provider".into(), + message: "timeout".into(), + }); + } + + #[test] + fn records_all_metrics_without_panic() { + let obs = test_observer(); + obs.record_metric(&ObserverMetric::RequestLatency(Duration::from_secs(2))); + obs.record_metric(&ObserverMetric::TokensUsed(500)); + obs.record_metric(&ObserverMetric::TokensUsed(0)); + obs.record_metric(&ObserverMetric::ActiveSessions(3)); + obs.record_metric(&ObserverMetric::QueueDepth(42)); + } + + #[test] + fn flush_does_not_panic() { + let obs = test_observer(); + obs.record_event(&ObserverEvent::HeartbeatTick); + obs.flush(); + } + +}