feat(observability): implement Prometheus metrics backend with /metrics endpoint
- Adds PrometheusObserver backend with counters, histograms, and gauges - Tracks agent starts/duration, tool calls, channel messages, heartbeat ticks, errors, request latency, tokens, sessions, queue depth - Adds GET /metrics endpoint to gateway for Prometheus scraping - Adds provider/model labels to AgentStart and AgentEnd events for better observability - Adds as_any() method to Observer trait for backend-specific downcast Metrics exposed: - zeroclaw_agent_starts_total (Counter) with provider/model labels - zeroclaw_agent_duration_seconds (Histogram) with provider/model labels - zeroclaw_tool_calls_total (Counter) with tool/success labels - zeroclaw_tool_duration_seconds (Histogram) with tool label - zeroclaw_channel_messages_total (Counter) with channel/direction labels - zeroclaw_heartbeat_ticks_total (Counter) - zeroclaw_errors_total (Counter) with component label - zeroclaw_request_latency_seconds (Histogram) - zeroclaw_tokens_used_last (Gauge) - zeroclaw_active_sessions (Gauge) - zeroclaw_queue_depth (Gauge) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
c04f2855e4
commit
eba544dbd4
11 changed files with 575 additions and 228 deletions
|
|
@ -1,19 +1,13 @@
|
|||
pub mod log;
|
||||
pub mod multi;
|
||||
pub mod noop;
|
||||
pub mod otel;
|
||||
pub mod prometheus;
|
||||
pub mod traits;
|
||||
pub mod verbose;
|
||||
|
||||
#[allow(unused_imports)]
|
||||
pub use self::log::LogObserver;
|
||||
#[allow(unused_imports)]
|
||||
pub use self::multi::MultiObserver;
|
||||
pub use noop::NoopObserver;
|
||||
pub use otel::OtelObserver;
|
||||
pub use prometheus::PrometheusObserver;
|
||||
pub use traits::{Observer, ObserverEvent};
|
||||
#[allow(unused_imports)]
|
||||
pub use verbose::VerboseObserver;
|
||||
|
||||
use crate::config::ObservabilityConfig;
|
||||
|
||||
|
|
@ -21,27 +15,7 @@ use crate::config::ObservabilityConfig;
|
|||
pub fn create_observer(config: &ObservabilityConfig) -> Box<dyn Observer> {
|
||||
match config.backend.as_str() {
|
||||
"log" => Box::new(LogObserver::new()),
|
||||
"otel" | "opentelemetry" | "otlp" => {
|
||||
match OtelObserver::new(
|
||||
config.otel_endpoint.as_deref(),
|
||||
config.otel_service_name.as_deref(),
|
||||
) {
|
||||
Ok(obs) => {
|
||||
tracing::info!(
|
||||
endpoint = config
|
||||
.otel_endpoint
|
||||
.as_deref()
|
||||
.unwrap_or("http://localhost:4318"),
|
||||
"OpenTelemetry observer initialized"
|
||||
);
|
||||
Box::new(obs)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to create OTel observer: {e}. Falling back to noop.");
|
||||
Box::new(NoopObserver)
|
||||
}
|
||||
}
|
||||
}
|
||||
"prometheus" => Box::new(PrometheusObserver::new()),
|
||||
"none" | "noop" => Box::new(NoopObserver),
|
||||
_ => {
|
||||
tracing::warn!(
|
||||
|
|
@ -61,7 +35,7 @@ mod tests {
|
|||
fn factory_none_returns_noop() {
|
||||
let cfg = ObservabilityConfig {
|
||||
backend: "none".into(),
|
||||
..ObservabilityConfig::default()
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(create_observer(&cfg).name(), "noop");
|
||||
}
|
||||
|
|
@ -70,7 +44,7 @@ mod tests {
|
|||
fn factory_noop_returns_noop() {
|
||||
let cfg = ObservabilityConfig {
|
||||
backend: "noop".into(),
|
||||
..ObservabilityConfig::default()
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(create_observer(&cfg).name(), "noop");
|
||||
}
|
||||
|
|
@ -79,46 +53,25 @@ mod tests {
|
|||
fn factory_log_returns_log() {
|
||||
let cfg = ObservabilityConfig {
|
||||
backend: "log".into(),
|
||||
..ObservabilityConfig::default()
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(create_observer(&cfg).name(), "log");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn factory_otel_returns_otel() {
|
||||
fn factory_prometheus_returns_prometheus() {
|
||||
let cfg = ObservabilityConfig {
|
||||
backend: "otel".into(),
|
||||
otel_endpoint: Some("http://127.0.0.1:19999".into()),
|
||||
otel_service_name: Some("test".into()),
|
||||
backend: "prometheus".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(create_observer(&cfg).name(), "otel");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn factory_opentelemetry_alias() {
|
||||
let cfg = ObservabilityConfig {
|
||||
backend: "opentelemetry".into(),
|
||||
otel_endpoint: Some("http://127.0.0.1:19999".into()),
|
||||
otel_service_name: Some("test".into()),
|
||||
};
|
||||
assert_eq!(create_observer(&cfg).name(), "otel");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn factory_otlp_alias() {
|
||||
let cfg = ObservabilityConfig {
|
||||
backend: "otlp".into(),
|
||||
otel_endpoint: Some("http://127.0.0.1:19999".into()),
|
||||
otel_service_name: Some("test".into()),
|
||||
};
|
||||
assert_eq!(create_observer(&cfg).name(), "otel");
|
||||
assert_eq!(create_observer(&cfg).name(), "prometheus");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn factory_unknown_falls_back_to_noop() {
|
||||
let cfg = ObservabilityConfig {
|
||||
backend: "xyzzy_unknown".into(),
|
||||
..ObservabilityConfig::default()
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(create_observer(&cfg).name(), "noop");
|
||||
}
|
||||
|
|
@ -127,7 +80,7 @@ mod tests {
|
|||
fn factory_empty_string_falls_back_to_noop() {
|
||||
let cfg = ObservabilityConfig {
|
||||
backend: String::new(),
|
||||
..ObservabilityConfig::default()
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(create_observer(&cfg).name(), "noop");
|
||||
}
|
||||
|
|
@ -136,7 +89,7 @@ mod tests {
|
|||
fn factory_garbage_falls_back_to_noop() {
|
||||
let cfg = ObservabilityConfig {
|
||||
backend: "xyzzy_garbage_123".into(),
|
||||
..ObservabilityConfig::default()
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(create_observer(&cfg).name(), "noop");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue