Skip to main content

dynamo_runtime/metrics/
request_plane.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Request-plane metrics for AddressedPushRouter.
5//! Used to pinpoint serialization vs transport roundtrip latency.
6
7use once_cell::sync::{Lazy, OnceCell};
8use prometheus::{Gauge, Histogram, HistogramOpts};
9
10use super::prometheus_names::{name_prefix, request_plane};
11use crate::MetricsRegistry;
12
13fn request_plane_metric_name(suffix: &str) -> String {
14    format!("{}_{}", name_prefix::REQUEST_PLANE, suffix)
15}
16
17/// Time from generate() entry to send_request() (serialization + encoding + control message).
18pub static REQUEST_PLANE_QUEUE_SECONDS: Lazy<Histogram> = Lazy::new(|| {
19    Histogram::with_opts(
20        HistogramOpts::new(
21            request_plane_metric_name(request_plane::QUEUE_SECONDS),
22            "Time from generate() entry to send_request() (seconds)",
23        )
24        .buckets(vec![
25            0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0,
26        ]),
27    )
28    .expect("request_plane_queue_seconds histogram")
29});
30
31/// Time for send_request() to complete (frontend view: network + queue + ack).
32pub static REQUEST_PLANE_SEND_SECONDS: Lazy<Histogram> = Lazy::new(|| {
33    Histogram::with_opts(
34        HistogramOpts::new(
35            request_plane_metric_name(request_plane::SEND_SECONDS),
36            "Time for send_request() to complete (seconds)",
37        )
38        .buckets(vec![
39            0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0,
40        ]),
41    )
42    .expect("request_plane_send_seconds histogram")
43});
44
45/// Time from send_request() to first response item (transport roundtrip TTFT).
46pub static REQUEST_PLANE_ROUNDTRIP_TTFT_SECONDS: Lazy<Histogram> = Lazy::new(|| {
47    Histogram::with_opts(
48        HistogramOpts::new(
49            request_plane_metric_name(request_plane::ROUNDTRIP_TTFT_SECONDS),
50            "Time from send_request() to first response item (seconds)",
51        )
52        .buckets(vec![
53            0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0,
54        ]),
55    )
56    .expect("request_plane_roundtrip_ttft_seconds histogram")
57});
58
59/// Currently in-flight requests (incremented at generate() entry, decremented on stream complete).
60pub static REQUEST_PLANE_INFLIGHT: Lazy<Gauge> = Lazy::new(|| {
61    Gauge::new(
62        request_plane_metric_name(request_plane::INFLIGHT_REQUESTS),
63        "Currently in-flight requests at AddressedPushRouter",
64    )
65    .expect("request_plane_inflight gauge")
66});
67
68/// Guards idempotency for the `MetricsRegistry` registration path.
69static METRICS_REGISTERED: OnceCell<()> = OnceCell::new();
70
71/// Guards idempotency for the raw `prometheus::Registry` registration path.
72/// Kept separate from `METRICS_REGISTERED` so that calling `ensure_request_plane_metrics_registered`
73/// first does not silently prevent the metrics from being registered in the prometheus registry.
74static PROMETHEUS_REGISTERED: OnceCell<Result<(), String>> = OnceCell::new();
75
76/// Register request-plane metrics with the given registry. Idempotent; only the first call registers.
77pub fn ensure_request_plane_metrics_registered(registry: &MetricsRegistry) {
78    let _ = METRICS_REGISTERED.get_or_init(|| {
79        registry.add_metric_or_warn(
80            Box::new(REQUEST_PLANE_QUEUE_SECONDS.clone()),
81            "request_plane_queue_seconds",
82        );
83        registry.add_metric_or_warn(
84            Box::new(REQUEST_PLANE_SEND_SECONDS.clone()),
85            "request_plane_send_seconds",
86        );
87        registry.add_metric_or_warn(
88            Box::new(REQUEST_PLANE_ROUNDTRIP_TTFT_SECONDS.clone()),
89            "request_plane_roundtrip_ttft_seconds",
90        );
91        registry.add_metric_or_warn(
92            Box::new(REQUEST_PLANE_INFLIGHT.clone()),
93            "request_plane_inflight",
94        );
95    });
96}
97
98/// Register request-plane metrics with a raw Prometheus registry (e.g. for LLM HTTP service /metrics).
99/// Idempotent; only the first call registers. Call this when the service exposes /metrics from its own registry.
100pub fn ensure_request_plane_metrics_registered_prometheus(
101    registry: &prometheus::Registry,
102) -> Result<(), prometheus::Error> {
103    PROMETHEUS_REGISTERED
104        .get_or_init(|| {
105            (|| -> Result<(), prometheus::Error> {
106                registry.register(Box::new(REQUEST_PLANE_QUEUE_SECONDS.clone()))?;
107                registry.register(Box::new(REQUEST_PLANE_SEND_SECONDS.clone()))?;
108                registry.register(Box::new(REQUEST_PLANE_ROUNDTRIP_TTFT_SECONDS.clone()))?;
109                registry.register(Box::new(REQUEST_PLANE_INFLIGHT.clone()))?;
110                Ok(())
111            })()
112            .map_err(|e| e.to_string())
113        })
114        .as_ref()
115        .map(|_| ())
116        .map_err(|e| prometheus::Error::Msg(e.clone()))
117}