Skip to main content

dynomite/stats/
prometheus.rs

1//! Prometheus text exposition rendering for stats snapshots.
2//!
3//! The renderer walks the immutable [`Snapshot`] value and produces a
4//! Prometheus 0.0.4 text-format string suitable for serving over an
5//! HTTP `/metrics` endpoint. The rendering uses the `prometheus`
6//! crate's [`Registry`] and [`TextEncoder`]: a fresh registry is built
7//! per call, every metric family is registered with its `# HELP` and
8//! `# TYPE` headers, and the counter/gauge values are filled from the
9//! snapshot before encoding.
10//!
11//! Naming conventions:
12//!
13//! * Pool counters become `dynomite_pool_<field>_total` with a single
14//!   `pool` label.
15//! * Pool gauges and timestamps become `dynomite_pool_<field>` with a
16//!   single `pool` label.
17//! * Server counters become `dynomite_server_<field>_total` with a
18//!   single `server` label.
19//! * Server gauges and timestamps become `dynomite_server_<field>`
20//!   with a single `server` label.
21//! * Histogram summaries (latency, payload size, queue waits, etc.)
22//!   are exposed as gauges named `dynomite_<channel>_microseconds`
23//!   carrying a `quantile` label. Prometheus does not have a way to
24//!   round-trip a pre-aggregated estimated histogram, so we publish
25//!   the same quantile rollups the JSON endpoint already exposes.
26//! * The build identification block is published as
27//!   `dynomite_build_info{version,source,rack,dc}` set to `1`, the
28//!   convention popularised by `node_exporter`.
29//! * Each server entry also produces `dynomite_peer_state` with the
30//!   `peer` and `state` labels, set to `1` for `up` and `0` for
31//!   `down`. The current snapshot model treats every server as up
32//!   (the eject timestamps live in their respective metrics); the
33//!   gauge is emitted so dashboards have a stable label set to
34//!   target.
35
36use prometheus::{Encoder, IntCounterVec, IntGaugeVec, Opts, Registry, TextEncoder};
37
38use crate::cluster::peer::PeerState;
39use crate::stats::codec::{StatsMetricType, POOL_CODEC, SERVER_CODEC};
40use crate::stats::failure::FailureSnapshot;
41use crate::stats::snapshot::{HistogramSummary, Snapshot};
42
43/// Render a [`Snapshot`] in the Prometheus 0.0.4 text exposition format.
44///
45/// The string returned is a complete, self-contained response body
46/// that may be served directly with a
47/// `Content-Type: text/plain; version=0.0.4; charset=utf-8` header.
48///
49/// # Examples
50///
51/// ```
52/// use dynomite::stats::{render_prometheus, PoolStats, ServerStats, ServiceInfo, Snapshot};
53///
54/// let snap = Snapshot {
55///     info: ServiceInfo {
56///         source: "node-a".into(),
57///         version: "0.0.1".into(),
58///         rack: "r1".into(),
59///         dc: "dc1".into(),
60///     },
61///     pool: PoolStats::new("dyn_o_mite"),
62///     server: ServerStats::new("redis_local"),
63///     ..Snapshot::default()
64/// };
65/// let text = render_prometheus(&snap);
66/// assert!(text.contains("dynomite_build_info"));
67/// assert!(text.contains("# TYPE dynomite_build_info gauge"));
68/// ```
69pub fn render_prometheus(snap: &Snapshot) -> String {
70    let registry = Registry::new();
71    register_build_info(&registry, snap);
72    register_uptime(&registry, snap);
73    register_resource_usage(&registry, snap);
74    register_pool(&registry, snap);
75    register_server(&registry, snap);
76    register_peer_state(&registry, snap);
77    register_failure_metrics(&registry, &snap.failure);
78    register_histogram_summaries(&registry, snap);
79    register_queue_p99s(&registry, snap);
80
81    let mut buf = Vec::with_capacity(8 * 1024);
82    let encoder = TextEncoder::new();
83    encoder
84        .encode(&registry.gather(), &mut buf)
85        .expect("invariant: TextEncoder writes valid UTF-8 into Vec<u8>");
86    String::from_utf8(buf).expect("invariant: TextEncoder emits UTF-8")
87}
88
89fn register_build_info(registry: &Registry, snap: &Snapshot) {
90    let opts = Opts::new(
91        "dynomite_build_info",
92        "Static identification of the running engine; value is always 1.",
93    );
94    let gauge = IntGaugeVec::new(opts, &["version", "source", "rack", "dc"])
95        .expect("invariant: build_info descriptor is valid");
96    gauge
97        .with_label_values(&[
98            &snap.info.version,
99            &snap.info.source,
100            &snap.info.rack,
101            &snap.info.dc,
102        ])
103        .set(1);
104    registry
105        .register(Box::new(gauge))
106        .expect("invariant: build_info registers cleanly");
107}
108
109fn register_uptime(registry: &Registry, snap: &Snapshot) {
110    let opts = Opts::new(
111        "dynomite_uptime_seconds",
112        "Seconds elapsed since the engine started.",
113    );
114    let gauge = IntGaugeVec::new(opts, &[]).expect("invariant: uptime descriptor is valid");
115    gauge.with_label_values::<&str>(&[]).set(snap.uptime);
116    registry
117        .register(Box::new(gauge))
118        .expect("invariant: uptime registers cleanly");
119
120    let opts = Opts::new(
121        "dynomite_timestamp_seconds",
122        "Wall-clock seconds since the UNIX epoch at snapshot time.",
123    );
124    let gauge = IntGaugeVec::new(opts, &[]).expect("invariant: timestamp descriptor is valid");
125    gauge.with_label_values::<&str>(&[]).set(snap.timestamp);
126    registry
127        .register(Box::new(gauge))
128        .expect("invariant: timestamp registers cleanly");
129}
130
131fn register_resource_usage(registry: &Registry, snap: &Snapshot) {
132    let entries: [(&str, &str, i64); 5] = [
133        (
134            "dynomite_alloc_msgs",
135            "Number of message structs currently allocated.",
136            snap.alloc_msgs,
137        ),
138        (
139            "dynomite_free_msgs",
140            "Number of message structs on the free list.",
141            snap.free_msgs,
142        ),
143        (
144            "dynomite_alloc_mbufs",
145            "Number of mbuf chunks currently allocated.",
146            snap.alloc_mbufs,
147        ),
148        (
149            "dynomite_free_mbufs",
150            "Number of mbuf chunks on the free list.",
151            snap.free_mbufs,
152        ),
153        (
154            "dynomite_memory_bytes",
155            "Resident set size of the engine in bytes.",
156            snap.dyn_memory,
157        ),
158    ];
159    for (name, help, value) in entries {
160        let gauge = IntGaugeVec::new(Opts::new(name, help), &[])
161            .expect("invariant: resource gauge descriptor is valid");
162        gauge.with_label_values::<&str>(&[]).set(value);
163        registry
164            .register(Box::new(gauge))
165            .expect("invariant: resource gauge registers cleanly");
166    }
167}
168
169fn register_pool(registry: &Registry, snap: &Snapshot) {
170    let pool = &snap.pool.name;
171    for (i, spec) in POOL_CODEC.iter().enumerate() {
172        let value = snap.pool.metrics.get(i).copied().unwrap_or(0);
173        match spec.kind {
174            StatsMetricType::Counter => {
175                let name = format!("dynomite_pool_{}_total", spec.name);
176                let opts = Opts::new(name, spec.description);
177                let counter = IntCounterVec::new(opts, &["pool"])
178                    .expect("invariant: pool counter descriptor is valid");
179                if value > 0 {
180                    counter
181                        .with_label_values(&[pool.as_str()])
182                        .inc_by(u64::try_from(value).unwrap_or(0));
183                } else {
184                    let _ = counter.with_label_values(&[pool.as_str()]);
185                }
186                registry
187                    .register(Box::new(counter))
188                    .expect("invariant: pool counter registers cleanly");
189            }
190            StatsMetricType::Gauge | StatsMetricType::Timestamp => {
191                let name = format!("dynomite_pool_{}", spec.name);
192                let opts = Opts::new(name, spec.description);
193                let gauge = IntGaugeVec::new(opts, &["pool"])
194                    .expect("invariant: pool gauge descriptor is valid");
195                gauge.with_label_values(&[pool.as_str()]).set(value);
196                registry
197                    .register(Box::new(gauge))
198                    .expect("invariant: pool gauge registers cleanly");
199            }
200        }
201    }
202}
203
204fn register_server(registry: &Registry, snap: &Snapshot) {
205    let server = &snap.server.name;
206    for (i, spec) in SERVER_CODEC.iter().enumerate() {
207        let value = snap.server.metrics.get(i).copied().unwrap_or(0);
208        match spec.kind {
209            StatsMetricType::Counter => {
210                let name = format!("dynomite_server_{}_total", spec.name);
211                let opts = Opts::new(name, spec.description);
212                let counter = IntCounterVec::new(opts, &["server"])
213                    .expect("invariant: server counter descriptor is valid");
214                if value > 0 {
215                    counter
216                        .with_label_values(&[server.as_str()])
217                        .inc_by(u64::try_from(value).unwrap_or(0));
218                } else {
219                    let _ = counter.with_label_values(&[server.as_str()]);
220                }
221                registry
222                    .register(Box::new(counter))
223                    .expect("invariant: server counter registers cleanly");
224            }
225            StatsMetricType::Gauge | StatsMetricType::Timestamp => {
226                let name = format!("dynomite_server_{}", spec.name);
227                let opts = Opts::new(name, spec.description);
228                let gauge = IntGaugeVec::new(opts, &["server"])
229                    .expect("invariant: server gauge descriptor is valid");
230                gauge.with_label_values(&[server.as_str()]).set(value);
231                registry
232                    .register(Box::new(gauge))
233                    .expect("invariant: server gauge registers cleanly");
234            }
235        }
236    }
237}
238
239fn register_peer_state(registry: &Registry, snap: &Snapshot) {
240    let opts = Opts::new(
241        "dynomite_peer_state",
242        "Peer up/down indicator. The active state has value 1; the other has value 0.",
243    );
244    let gauge = IntGaugeVec::new(opts, &["peer", "state"])
245        .expect("invariant: peer_state descriptor is valid");
246    let peer = snap.server.name.as_str();
247    gauge.with_label_values(&[peer, "up"]).set(1);
248    gauge.with_label_values(&[peer, "down"]).set(0);
249    registry
250        .register(Box::new(gauge))
251        .expect("invariant: peer_state registers cleanly");
252}
253
254fn register_failure_metrics(registry: &Registry, failure: &FailureSnapshot) {
255    register_failure_no_targets(registry, failure);
256    register_failure_peer_send(registry, failure);
257    register_failure_backend_send(registry, failure);
258    register_failure_response_timeout(registry, failure);
259    register_failure_peer_state(registry, failure);
260    register_failure_phi(registry, failure);
261}
262
263fn register_failure_no_targets(registry: &Registry, failure: &FailureSnapshot) {
264    let opts = Opts::new(
265        "dispatch_no_targets_total",
266        "Dispatch failures because the only routable peer for the hashed token was Down or absent.",
267    );
268    let counter = IntCounterVec::new(opts, &["dc", "rack", "consistency_level"])
269        .expect("invariant: dispatch_no_targets descriptor is valid");
270    for entry in &failure.no_targets {
271        counter
272            .with_label_values(&[
273                entry.dc.as_str(),
274                entry.rack.as_str(),
275                entry.consistency.name(),
276            ])
277            .inc_by(entry.count);
278    }
279    registry
280        .register(Box::new(counter))
281        .expect("invariant: dispatch_no_targets registers cleanly");
282}
283
284fn register_failure_peer_send(registry: &Registry, failure: &FailureSnapshot) {
285    let full = IntCounterVec::new(
286        Opts::new(
287            "dispatch_peer_send_full_total",
288            "Dispatcher try_send to a peer's outbound channel returned Full.",
289        ),
290        &["peer_idx", "peer_dc"],
291    )
292    .expect("invariant: dispatch_peer_send_full descriptor is valid");
293    for entry in &failure.peer_send_full {
294        full.with_label_values(&[&entry.peer_idx.to_string(), &entry.peer_dc])
295            .inc_by(entry.count);
296    }
297    registry
298        .register(Box::new(full))
299        .expect("invariant: dispatch_peer_send_full registers cleanly");
300
301    let closed = IntCounterVec::new(
302        Opts::new(
303            "dispatch_peer_send_closed_total",
304            "Dispatcher try_send to a peer's outbound channel returned Closed.",
305        ),
306        &["peer_idx", "peer_dc"],
307    )
308    .expect("invariant: dispatch_peer_send_closed descriptor is valid");
309    for entry in &failure.peer_send_closed {
310        closed
311            .with_label_values(&[&entry.peer_idx.to_string(), &entry.peer_dc])
312            .inc_by(entry.count);
313    }
314    registry
315        .register(Box::new(closed))
316        .expect("invariant: dispatch_peer_send_closed registers cleanly");
317}
318
319fn register_failure_backend_send(registry: &Registry, failure: &FailureSnapshot) {
320    let full = IntCounterVec::new(
321        Opts::new(
322            "dispatch_backend_send_full_total",
323            "Dispatcher try_send to the local datastore backend returned Full.",
324        ),
325        &[],
326    )
327    .expect("invariant: dispatch_backend_send_full descriptor is valid");
328    if failure.backend_send_full > 0 {
329        full.with_label_values::<&str>(&[])
330            .inc_by(failure.backend_send_full);
331    } else {
332        let _ = full.with_label_values::<&str>(&[]);
333    }
334    registry
335        .register(Box::new(full))
336        .expect("invariant: dispatch_backend_send_full registers cleanly");
337
338    let closed = IntCounterVec::new(
339        Opts::new(
340            "dispatch_backend_send_closed_total",
341            "Dispatcher try_send to the local datastore backend returned Closed.",
342        ),
343        &[],
344    )
345    .expect("invariant: dispatch_backend_send_closed descriptor is valid");
346    if failure.backend_send_closed > 0 {
347        closed
348            .with_label_values::<&str>(&[])
349            .inc_by(failure.backend_send_closed);
350    } else {
351        let _ = closed.with_label_values::<&str>(&[]);
352    }
353    registry
354        .register(Box::new(closed))
355        .expect("invariant: dispatch_backend_send_closed registers cleanly");
356}
357
358fn register_failure_response_timeout(registry: &Registry, failure: &FailureSnapshot) {
359    let counter = IntCounterVec::new(
360        Opts::new(
361            "dispatch_response_timeout_total",
362            "Dispatcher's response coalescer gave up waiting for replies.",
363        ),
364        &["consistency_level"],
365    )
366    .expect("invariant: dispatch_response_timeout descriptor is valid");
367    for entry in &failure.response_timeout {
368        counter
369            .with_label_values(&[entry.consistency.name()])
370            .inc_by(entry.count);
371    }
372    registry
373        .register(Box::new(counter))
374        .expect("invariant: dispatch_response_timeout registers cleanly");
375}
376
377fn register_failure_peer_state(registry: &Registry, failure: &FailureSnapshot) {
378    let trans = IntCounterVec::new(
379        Opts::new(
380            "peer_state_transitions_total",
381            "Number of gossip-driven peer-state transitions, labelled by from/to state.",
382        ),
383        &["peer_idx", "from_state", "to_state"],
384    )
385    .expect("invariant: peer_state_transitions descriptor is valid");
386    for entry in &failure.peer_state_transitions {
387        let peer_idx = entry.peer_idx.to_string();
388        trans
389            .with_label_values(&[peer_idx.as_str(), entry.from.name(), entry.to.name()])
390            .inc_by(entry.count);
391    }
392    registry
393        .register(Box::new(trans))
394        .expect("invariant: peer_state_transitions registers cleanly");
395
396    let current = IntGaugeVec::new(
397        Opts::new(
398            "peer_state_current",
399            "Current peer state. Numeric value matches PeerState's repr(u8): \
400             0=Unknown, 1=Joining, 2=Normal, 3=Standby, 4=Down, 5=Reset, 6=Leaving.",
401        ),
402        &["peer_idx", "dc", "rack"],
403    )
404    .expect("invariant: peer_state_current descriptor is valid");
405    for entry in &failure.peer_state_current {
406        current
407            .with_label_values(&[&entry.peer_idx.to_string(), &entry.dc, &entry.rack])
408            .set(peer_state_value(entry.state));
409    }
410    registry
411        .register(Box::new(current))
412        .expect("invariant: peer_state_current registers cleanly");
413}
414
415fn register_failure_phi(registry: &Registry, failure: &FailureSnapshot) {
416    let gauge = IntGaugeVec::new(
417        Opts::new(
418            "gossip_phi_score_milli",
419            "Phi-accrual failure detector score per peer, scaled by 1000 (gauge units = thousandths).",
420        ),
421        &["peer_idx", "dc", "rack"],
422    )
423    .expect("invariant: gossip_phi_score descriptor is valid");
424    for entry in &failure.peer_phi {
425        let value = phi_to_milli_clamped(entry.phi);
426        gauge
427            .with_label_values(&[&entry.peer_idx.to_string(), &entry.dc, &entry.rack])
428            .set(value);
429    }
430    registry
431        .register(Box::new(gauge))
432        .expect("invariant: gossip_phi_score registers cleanly");
433}
434
435/// Map a [`PeerState`] to the integer value the Prometheus gauge
436/// publishes. Mirrors the enum's `repr(u8)` discriminants but
437/// goes via a match so the conversion is explicit and the
438/// pedantic cast lints stay clean.
439fn peer_state_value(state: PeerState) -> i64 {
440    match state {
441        PeerState::Unknown => 0,
442        PeerState::Joining => 1,
443        PeerState::Normal => 2,
444        PeerState::Standby => 3,
445        PeerState::Down => 4,
446        PeerState::Reset => 5,
447        PeerState::Leaving => 6,
448    }
449}
450
451/// Render a finite phi value in thousandths as an `i64`. The
452/// snapshot already clamps the upstream value; this helper
453/// repeats the clamp for safety against future refactors.
454fn phi_to_milli_clamped(phi: f64) -> i64 {
455    if !phi.is_finite() || phi <= 0.0 {
456        return 0;
457    }
458    let saturating = i64::MAX / 1000;
459    let scaled = (phi * 1000.0).round();
460    if !scaled.is_finite() || scaled <= 0.0 {
461        return 0;
462    }
463    let bits = scaled.to_bits();
464    let exp_field = u32::try_from((bits >> 52) & 0x7FF).unwrap_or(0);
465    if exp_field < 1023 {
466        return 0;
467    }
468    let unbiased = exp_field - 1023;
469    if unbiased >= 63 {
470        return saturating;
471    }
472    let mant = bits & ((1u64 << 52) - 1);
473    let m = (1u64 << 52) | mant;
474    let value = if unbiased >= 52 {
475        m.checked_shl(unbiased - 52).unwrap_or(u64::MAX)
476    } else {
477        m >> (52 - unbiased)
478    };
479    i64::try_from(value).unwrap_or(saturating).min(saturating)
480}
481
482fn register_histogram_summaries(registry: &Registry, snap: &Snapshot) {
483    let entries: [(&str, &str, &HistogramSummary); 8] = [
484        (
485            "dynomite_request_latency_microseconds",
486            "Top-level request latency in microseconds.",
487            &snap.latency,
488        ),
489        (
490            "dynomite_payload_size_bytes",
491            "Observed request/response payload sizes in bytes.",
492            &snap.payload_size,
493        ),
494        (
495            "dynomite_cross_region_latency_microseconds",
496            "Cross-region peer round-trip latency in microseconds.",
497            &snap.cross_region_latency,
498        ),
499        (
500            "dynomite_cross_zone_latency_microseconds",
501            "Cross-zone peer latency in microseconds.",
502            &snap.cross_zone_latency,
503        ),
504        (
505            "dynomite_server_latency_microseconds",
506            "Backing-server response latency in microseconds.",
507            &snap.server_latency,
508        ),
509        (
510            "dynomite_cross_region_queue_wait_microseconds",
511            "Cross-region queue wait time in microseconds.",
512            &snap.cross_region_queue_wait,
513        ),
514        (
515            "dynomite_cross_zone_queue_wait_microseconds",
516            "Cross-zone queue wait time in microseconds.",
517            &snap.cross_zone_queue_wait,
518        ),
519        (
520            "dynomite_server_queue_wait_microseconds",
521            "Server queue wait time in microseconds.",
522            &snap.server_queue_wait,
523        ),
524    ];
525    for (name, help, summary) in entries {
526        let gauge = IntGaugeVec::new(Opts::new(name, help), &["quantile"])
527            .expect("invariant: histogram quantile gauge is valid");
528        let s = *summary;
529        let mean_v = i64::try_from(s.mean).unwrap_or(i64::MAX);
530        let q95 = i64::try_from(s.p95).unwrap_or(i64::MAX);
531        let q99 = i64::try_from(s.p99).unwrap_or(i64::MAX);
532        let q999 = i64::try_from(s.p999).unwrap_or(i64::MAX);
533        let max_v = i64::try_from(s.max).unwrap_or(i64::MAX);
534        gauge.with_label_values(&["mean"]).set(mean_v);
535        gauge.with_label_values(&["0.95"]).set(q95);
536        gauge.with_label_values(&["0.99"]).set(q99);
537        gauge.with_label_values(&["0.999"]).set(q999);
538        gauge.with_label_values(&["max"]).set(max_v);
539        registry
540            .register(Box::new(gauge))
541            .expect("invariant: histogram quantile gauge registers cleanly");
542    }
543}
544
545fn register_queue_p99s(registry: &Registry, snap: &Snapshot) {
546    let entries: [(&str, &str, u64); 8] = [
547        (
548            "dynomite_client_out_queue_p99",
549            "99th percentile of the client outbound queue length.",
550            snap.client_out_queue_p99,
551        ),
552        (
553            "dynomite_server_in_queue_p99",
554            "99th percentile of the server inbound queue length.",
555            snap.server_in_queue_p99,
556        ),
557        (
558            "dynomite_server_out_queue_p99",
559            "99th percentile of the server outbound queue length.",
560            snap.server_out_queue_p99,
561        ),
562        (
563            "dynomite_dnode_client_out_queue_p99",
564            "99th percentile of the dnode client outbound queue length.",
565            snap.dnode_client_out_queue_p99,
566        ),
567        (
568            "dynomite_peer_in_queue_p99",
569            "99th percentile of the local-DC peer inbound queue length.",
570            snap.peer_in_queue_p99,
571        ),
572        (
573            "dynomite_peer_out_queue_p99",
574            "99th percentile of the local-DC peer outbound queue length.",
575            snap.peer_out_queue_p99,
576        ),
577        (
578            "dynomite_remote_peer_in_queue_p99",
579            "99th percentile of the remote-DC peer inbound queue length.",
580            snap.remote_peer_in_queue_p99,
581        ),
582        (
583            "dynomite_remote_peer_out_queue_p99",
584            "99th percentile of the remote-DC peer outbound queue length.",
585            snap.remote_peer_out_queue_p99,
586        ),
587    ];
588    for (name, help, value) in entries {
589        let gauge = IntGaugeVec::new(Opts::new(name, help), &[])
590            .expect("invariant: queue p99 gauge descriptor is valid");
591        let value_i64 = i64::try_from(value).unwrap_or(i64::MAX);
592        gauge.with_label_values::<&str>(&[]).set(value_i64);
593        registry
594            .register(Box::new(gauge))
595            .expect("invariant: queue p99 gauge registers cleanly");
596    }
597}
598
599#[cfg(test)]
600mod tests {
601    use super::*;
602    use crate::stats::codec::PoolField;
603    use crate::stats::snapshot::{PoolStats, ServerStats, ServiceInfo};
604
605    fn make_snap() -> Snapshot {
606        Snapshot {
607            info: ServiceInfo {
608                source: "node-a".into(),
609                version: "0.0.1".into(),
610                rack: "r1".into(),
611                dc: "dc1".into(),
612            },
613            pool: PoolStats::new("dyn_o_mite"),
614            server: ServerStats::new("redis_local"),
615            ..Snapshot::default()
616        }
617    }
618
619    #[test]
620    fn render_prometheus_includes_help_and_type_lines() {
621        let mut snap = make_snap();
622        snap.pool.metrics[PoolField::ClientEof.index()] = 7;
623        let out = render_prometheus(&snap);
624        assert!(
625            out.contains("# HELP dynomite_pool_client_eof_total"),
626            "missing # HELP for pool client_eof:\n{out}"
627        );
628        assert!(
629            out.contains("# TYPE dynomite_pool_client_eof_total counter"),
630            "missing # TYPE for pool client_eof:\n{out}"
631        );
632        assert!(
633            out.contains("dynomite_pool_client_eof_total{pool=\"dyn_o_mite\"} 7"),
634            "missing pool client_eof value line:\n{out}"
635        );
636    }
637
638    #[test]
639    fn render_prometheus_quotes_label_values() {
640        let mut snap = make_snap();
641        snap.pool = PoolStats::new("my\\pool\"");
642        snap.pool.metrics[PoolField::ClientEof.index()] = 3;
643        let out = render_prometheus(&snap);
644        let backslash = "\\\\";
645        let escaped_quote = "\\\"";
646        let expected_label = format!("pool=\"my{backslash}pool{escaped_quote}\"");
647        assert!(
648            out.contains(&expected_label),
649            "expected escaped label `{expected_label}` not found in:\n{out}"
650        );
651    }
652
653    #[test]
654    fn render_prometheus_emits_build_info() {
655        let snap = make_snap();
656        let out = render_prometheus(&snap);
657        assert!(
658            out.contains("# TYPE dynomite_build_info gauge"),
659            "missing build_info type line:\n{out}"
660        );
661        let needle = "dynomite_build_info{";
662        let pos = out
663            .find(needle)
664            .unwrap_or_else(|| panic!("missing build_info value line:\n{out}"));
665        let line_end = out[pos..].find('\n').map_or(out.len(), |n| pos + n);
666        let line = &out[pos..line_end];
667        assert!(
668            line.contains("version=\"0.0.1\""),
669            "build_info missing version label: {line}"
670        );
671        assert!(line.ends_with(" 1"), "build_info value should be 1: {line}");
672    }
673
674    #[test]
675    fn render_prometheus_includes_server_counters_and_uptime() {
676        let mut snap = make_snap();
677        snap.uptime = 42;
678        snap.server.metrics[crate::stats::ServerField::ReadRequests.index()] = 5;
679        let out = render_prometheus(&snap);
680        assert!(
681            out.contains("# TYPE dynomite_server_read_requests_total counter"),
682            "server counter type line missing"
683        );
684        assert!(
685            out.contains("dynomite_server_read_requests_total{server=\"redis_local\"} 5"),
686            "server counter value missing:\n{out}"
687        );
688        assert!(
689            out.contains("dynomite_uptime_seconds 42"),
690            "uptime gauge value missing:\n{out}"
691        );
692    }
693
694    #[test]
695    fn render_prometheus_emits_peer_state_for_server() {
696        let snap = make_snap();
697        let out = render_prometheus(&snap);
698        assert!(
699            out.contains("dynomite_peer_state{peer=\"redis_local\",state=\"up\"} 1"),
700            "peer_state up line missing:\n{out}"
701        );
702        assert!(
703            out.contains("dynomite_peer_state{peer=\"redis_local\",state=\"down\"} 0"),
704            "peer_state down line missing:\n{out}"
705        );
706    }
707
708    /// The failure-cause counters are wired into the
709    /// renderer; verify each family lands with the expected
710    /// HELP and TYPE headers and that label values from the
711    /// snapshot make it onto the wire.
712    #[test]
713    fn render_prometheus_emits_failure_cause_counters() {
714        use crate::cluster::peer::PeerState;
715        use crate::msg::ConsistencyLevel;
716        use crate::stats::FailureMetrics;
717
718        let metrics = FailureMetrics::new();
719        metrics.record_no_targets("dc1", "rA", ConsistencyLevel::DcQuorum);
720        metrics.record_peer_send_full(7, "dc2");
721        metrics.record_peer_send_closed(7, "dc2");
722        metrics.record_backend_send_full();
723        metrics.record_backend_send_closed();
724        metrics.record_response_timeout(ConsistencyLevel::DcOne);
725        metrics.record_peer_state_transition(3, "dc1", "rA", PeerState::Normal, PeerState::Down);
726        metrics.observe_phi(3, "dc1", "rA", 4.5);
727
728        let mut snap = make_snap();
729        snap.failure = metrics.snapshot();
730        let out = render_prometheus(&snap);
731
732        assert!(
733            out.contains("# TYPE dispatch_no_targets_total counter"),
734            "missing dispatch_no_targets type line:\n{out}"
735        );
736        assert!(
737            out.contains(
738                "dispatch_no_targets_total{consistency_level=\"DC_QUORUM\",dc=\"dc1\",rack=\"rA\"} 1"
739            ),
740            "missing dispatch_no_targets row:\n{out}"
741        );
742        assert!(
743            out.contains("# TYPE dispatch_peer_send_full_total counter"),
744            "missing dispatch_peer_send_full type line:\n{out}"
745        );
746        assert!(
747            out.contains("dispatch_peer_send_full_total{peer_dc=\"dc2\",peer_idx=\"7\"} 1"),
748            "missing dispatch_peer_send_full row:\n{out}"
749        );
750        assert!(
751            out.contains("dispatch_peer_send_closed_total{peer_dc=\"dc2\",peer_idx=\"7\"} 1"),
752            "missing dispatch_peer_send_closed row:\n{out}"
753        );
754        assert!(
755            out.contains("dispatch_backend_send_full_total 1"),
756            "missing dispatch_backend_send_full row:\n{out}"
757        );
758        assert!(
759            out.contains("dispatch_backend_send_closed_total 1"),
760            "missing dispatch_backend_send_closed row:\n{out}"
761        );
762        assert!(
763            out.contains("dispatch_response_timeout_total{consistency_level=\"DC_ONE\"} 1"),
764            "missing dispatch_response_timeout row:\n{out}"
765        );
766        assert!(
767            out.contains(
768                "peer_state_transitions_total{from_state=\"NORMAL\",peer_idx=\"3\",to_state=\"DOWN\"} 1"
769            ),
770            "missing peer_state_transitions row:\n{out}"
771        );
772        assert!(
773            out.contains("peer_state_current{dc=\"dc1\",peer_idx=\"3\",rack=\"rA\"} 4"),
774            "missing peer_state_current row (state=Down=4):\n{out}"
775        );
776        // phi=4.5 -> 4500 in the milli gauge.
777        assert!(
778            out.contains("gossip_phi_score_milli{dc=\"dc1\",peer_idx=\"3\",rack=\"rA\"} 4500"),
779            "missing gossip_phi_score_milli row:\n{out}"
780        );
781    }
782}