solti_prometheus/
discover.rs1use std::sync::Arc;
4use std::time::{SystemTime, UNIX_EPOCH};
5
6use prometheus::{Counter, CounterVec, Gauge, Histogram, HistogramVec, Registry};
7use solti_discover::{DiscoverMetricsBackend, OUTCOME_FAILURE, OUTCOME_SUCCESS};
8
9use crate::register::{Sub, ms_to_secs};
10
11pub struct PrometheusDiscoverMetrics {
25 attempts_total: Counter,
26 outcomes_total: CounterVec,
27 duration_seconds: HistogramVec,
28 failures_total: CounterVec,
29 last_success_ts: Gauge,
30 holds_total: Counter,
31 hold_duration_seconds: Histogram,
32}
33
34impl PrometheusDiscoverMetrics {
35 pub fn new(registry: Arc<Registry>) -> Result<Self, prometheus::Error> {
37 let r = Sub::new(®istry, "discover");
38
39 let attempts_total = r.counter("attempts_total", "Total discovery heartbeat attempts")?;
40 let outcomes_total = r.counter_vec(
41 "outcomes_total",
42 "Discovery heartbeat outcomes",
43 &["outcome"],
44 )?;
45 let duration_seconds = r.histogram_vec(
46 "duration_seconds",
47 "Discovery heartbeat call duration",
48 vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0],
49 &["outcome"],
50 )?;
51 let failures_total = r.counter_vec(
52 "failures_total",
53 "Discovery heartbeat failures grouped by reason",
54 &["reason"],
55 )?;
56 let last_success_ts = r.gauge(
57 "last_success_timestamp_seconds",
58 "UNIX timestamp of the last successful heartbeat",
59 )?;
60 let holds_total = r.counter("holds_total", "Server-advised retry holds observed")?;
61 let hold_duration_seconds = r.histogram(
62 "hold_duration_seconds",
63 "Duration of server-advised retry holds",
64 vec![1.0, 5.0, 15.0, 30.0, 60.0, 300.0, 900.0, 1800.0, 3600.0],
65 )?;
66
67 Ok(Self {
68 attempts_total,
69 outcomes_total,
70 duration_seconds,
71 failures_total,
72 last_success_ts,
73 holds_total,
74 hold_duration_seconds,
75 })
76 }
77}
78
79impl std::fmt::Debug for PrometheusDiscoverMetrics {
80 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81 f.debug_struct("PrometheusDiscoverMetrics").finish()
82 }
83}
84
85impl DiscoverMetricsBackend for PrometheusDiscoverMetrics {
86 fn record_attempt(&self) {
87 self.attempts_total.inc();
88 }
89
90 fn record_success(&self, duration_ms: u64) {
91 self.outcomes_total
92 .with_label_values(&[OUTCOME_SUCCESS])
93 .inc();
94 self.duration_seconds
95 .with_label_values(&[OUTCOME_SUCCESS])
96 .observe(ms_to_secs(duration_ms));
97 let ts = SystemTime::now()
98 .duration_since(UNIX_EPOCH)
99 .map(|d| d.as_secs_f64())
100 .unwrap_or(0.0);
101 self.last_success_ts.set(ts);
102 }
103
104 fn record_failure(&self, duration_ms: u64, reason: &'static str) {
105 self.outcomes_total
106 .with_label_values(&[OUTCOME_FAILURE])
107 .inc();
108 self.duration_seconds
109 .with_label_values(&[OUTCOME_FAILURE])
110 .observe(ms_to_secs(duration_ms));
111 self.failures_total.with_label_values(&[reason]).inc();
112 }
113
114 fn record_hold(&self, duration_s: u64) {
115 self.holds_total.inc();
116 self.hold_duration_seconds.observe(duration_s as f64);
117 }
118}