Skip to main content

rustrade_supervisor/
prometheus.rs

1//! Local Prometheus collectors for supervisor metrics.
2//!
3//! Compiled only when the `prometheus` feature is enabled. The collectors
4//! live in a process-local [`Registry`] (NOT the prometheus crate's default
5//! global registry, so host services that already own a registry don't
6//! collide with us). Hosts that want to expose these metrics call
7//! [`registry()`] and either:
8//!
9//! - Serve `registry().gather()` directly from their `/metrics` handler, or
10//! - Use [`prometheus::Registry::register`] on each individual collector to
11//!   merge them into the host's own registry.
12//!
13//! The atomic counters in `SupervisorMetrics` remain the authoritative
14//! in-process source of truth; this module just mirrors them into
15//! Prometheus-shaped collectors.
16
17use std::sync::OnceLock;
18
19use prometheus::{Counter, Gauge, HistogramOpts, HistogramVec, Registry};
20
21/// The supervisor's local prometheus collectors.
22pub struct Collectors {
23    /// The registry these collectors live in.
24    pub registry: Registry,
25    /// `rustrade_supervisor_restarts_total` — counter.
26    pub restarts_total: Counter,
27    /// `rustrade_supervisor_active_services` — gauge.
28    pub active_services: Gauge,
29    /// `rustrade_supervisor_spawned_total` — counter.
30    pub spawned_total: Counter,
31    /// `rustrade_supervisor_terminated_total` — counter.
32    pub terminated_total: Counter,
33    /// `rustrade_supervisor_circuit_breaker_trips_total` — counter.
34    pub circuit_breaker_trips: Counter,
35    /// `rustrade_supervisor_uptime_seconds` — histogram (labelled by service).
36    pub uptime_seconds: HistogramVec,
37}
38
39static COLLECTORS: OnceLock<Collectors> = OnceLock::new();
40
41/// Return the lazily-initialized supervisor collectors.
42pub fn collectors() -> &'static Collectors {
43    COLLECTORS.get_or_init(Collectors::new)
44}
45
46/// Return the local prometheus registry that owns the supervisor collectors.
47///
48/// Host services typically `gather()` this from their `/metrics` handler:
49///
50/// ```ignore
51/// use prometheus::Encoder;
52/// let registry = rustrade_supervisor::prometheus::registry();
53/// let mut buf = Vec::new();
54/// prometheus::TextEncoder::new()
55///     .encode(&registry.gather(), &mut buf)
56///     .unwrap();
57/// ```
58pub fn registry() -> &'static Registry {
59    &collectors().registry
60}
61
62impl Collectors {
63    fn new() -> Self {
64        let registry = Registry::new();
65
66        let restarts_total = Counter::new(
67            "rustrade_supervisor_restarts_total",
68            "Total number of service restarts across all services",
69        )
70        .expect("create restarts_total counter");
71
72        let active_services = Gauge::new(
73            "rustrade_supervisor_active_services",
74            "Number of services currently in a non-terminal phase",
75        )
76        .expect("create active_services gauge");
77
78        let spawned_total = Counter::new(
79            "rustrade_supervisor_spawned_total",
80            "Total number of services ever spawned (including restarts)",
81        )
82        .expect("create spawned_total counter");
83
84        let terminated_total = Counter::new(
85            "rustrade_supervisor_terminated_total",
86            "Total number of services that have terminated",
87        )
88        .expect("create terminated_total counter");
89
90        let circuit_breaker_trips = Counter::new(
91            "rustrade_supervisor_circuit_breaker_trips_total",
92            "Total number of circuit breaker trips across all services",
93        )
94        .expect("create circuit_breaker_trips counter");
95
96        let uptime_seconds = HistogramVec::new(
97            HistogramOpts::new(
98                "rustrade_supervisor_uptime_seconds",
99                "Cumulative running time of a service at termination, in seconds",
100            ),
101            &["service"],
102        )
103        .expect("create uptime_seconds histogram_vec");
104
105        registry
106            .register(Box::new(restarts_total.clone()))
107            .expect("register restarts_total");
108        registry
109            .register(Box::new(active_services.clone()))
110            .expect("register active_services");
111        registry
112            .register(Box::new(spawned_total.clone()))
113            .expect("register spawned_total");
114        registry
115            .register(Box::new(terminated_total.clone()))
116            .expect("register terminated_total");
117        registry
118            .register(Box::new(circuit_breaker_trips.clone()))
119            .expect("register circuit_breaker_trips");
120        registry
121            .register(Box::new(uptime_seconds.clone()))
122            .expect("register uptime_seconds");
123
124        Self {
125            registry,
126            restarts_total,
127            active_services,
128            spawned_total,
129            terminated_total,
130            circuit_breaker_trips,
131            uptime_seconds,
132        }
133    }
134}