1use once_cell::sync::{Lazy, OnceCell};
8use prometheus::{Counter, Histogram, HistogramOpts, HistogramVec, IntGaugeVec, Opts, Registry};
9
10use super::prometheus_names::{frontend_perf, name_prefix};
11use crate::MetricsRegistry;
12
13pub use super::prometheus_names::frontend_perf::{STAGE_DISPATCH, STAGE_PREPROCESS, STAGE_ROUTE};
14
15fn frontend_metric_name(suffix: &str) -> String {
16 format!("{}_{}", name_prefix::FRONTEND, suffix)
17}
18
19pub static STAGE_REQUESTS: Lazy<IntGaugeVec> = Lazy::new(|| {
22 IntGaugeVec::new(
23 Opts::new(
24 frontend_metric_name(frontend_perf::STAGE_REQUESTS),
25 "Number of requests currently in the given pipeline stage",
26 ),
27 &["stage", "phase"],
28 )
29 .expect("failed to create dynamo_frontend_stage_requests gauge")
30});
31
32pub struct StageGuard {
38 gauge: prometheus::IntGauge,
39}
40
41impl StageGuard {
42 pub fn new(stage: &str, phase: &str) -> Self {
49 let gauge = STAGE_REQUESTS.with_label_values(&[stage, phase]);
50 gauge.inc();
51 Self { gauge }
52 }
53}
54
55impl Drop for StageGuard {
56 fn drop(&mut self) {
57 self.gauge.dec();
58 }
59}
60
61pub static STAGE_DURATION_SECONDS: Lazy<HistogramVec> = Lazy::new(|| {
63 HistogramVec::new(
64 HistogramOpts::new(
65 frontend_metric_name(frontend_perf::STAGE_DURATION_SECONDS),
66 "Pipeline stage duration (seconds)",
67 )
68 .buckets(vec![
69 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0,
70 ]),
71 &["stage"],
72 )
73 .expect("stage_duration_seconds histogram vec")
74});
75
76pub static TOKENIZE_SECONDS: Lazy<Histogram> = Lazy::new(|| {
78 Histogram::with_opts(
79 HistogramOpts::new(
80 frontend_metric_name(frontend_perf::TOKENIZE_SECONDS),
81 "Tokenization time in preprocessor (seconds)",
82 )
83 .buckets(vec![
84 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0,
85 ]),
86 )
87 .expect("tokenize_seconds histogram")
88});
89
90pub static TEMPLATE_SECONDS: Lazy<Histogram> = Lazy::new(|| {
92 Histogram::with_opts(
93 HistogramOpts::new(
94 frontend_metric_name(frontend_perf::TEMPLATE_SECONDS),
95 "Template application time in preprocessor (seconds)",
96 )
97 .buckets(vec![
98 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05,
99 ]),
100 )
101 .expect("template_seconds histogram")
102});
103
104pub static DETOKENIZE_TOTAL_US: Lazy<Counter> = Lazy::new(|| {
107 Counter::with_opts(Opts::new(
108 frontend_metric_name(frontend_perf::DETOKENIZE_TOTAL_US),
109 "Cumulative detokenization time (microseconds)",
110 ))
111 .expect("detokenize_total_us counter")
112});
113
114pub static DETOKENIZE_TOKEN_COUNT: Lazy<Counter> = Lazy::new(|| {
116 Counter::with_opts(Opts::new(
117 frontend_metric_name(frontend_perf::DETOKENIZE_TOKEN_COUNT),
118 "Total tokens detokenized",
119 ))
120 .expect("detokenize_token_count counter")
121});
122
123pub static TOKENIZER_CACHE_HITS_TOTAL: Lazy<Counter> = Lazy::new(|| {
125 Counter::with_opts(Opts::new(
126 frontend_metric_name(frontend_perf::TOKENIZER_CACHE_HITS_TOTAL),
127 "Cumulative L1 tokenizer prefix-cache hits",
128 ))
129 .expect("tokenizer_cache_hits_total counter")
130});
131
132pub static TOKENIZER_CACHE_MISSES_TOTAL: Lazy<Counter> = Lazy::new(|| {
134 Counter::with_opts(Opts::new(
135 frontend_metric_name(frontend_perf::TOKENIZER_CACHE_MISSES_TOTAL),
136 "Cumulative L1 tokenizer prefix-cache misses",
137 ))
138 .expect("tokenizer_cache_misses_total counter")
139});
140
141static REGISTERED: OnceCell<()> = OnceCell::new();
143
144static PROMETHEUS_REGISTERED: OnceCell<()> = OnceCell::new();
148
149pub fn ensure_frontend_perf_metrics_registered(registry: &MetricsRegistry) {
151 let _ = REGISTERED.get_or_init(|| {
152 registry.add_metric(Box::new(STAGE_REQUESTS.clone())).ok();
153 registry
154 .add_metric(Box::new(STAGE_DURATION_SECONDS.clone()))
155 .ok();
156 registry.add_metric(Box::new(TOKENIZE_SECONDS.clone())).ok();
157 registry.add_metric(Box::new(TEMPLATE_SECONDS.clone())).ok();
158 registry
159 .add_metric(Box::new(DETOKENIZE_TOTAL_US.clone()))
160 .ok();
161 registry
162 .add_metric(Box::new(DETOKENIZE_TOKEN_COUNT.clone()))
163 .ok();
164 registry
165 .add_metric(Box::new(TOKENIZER_CACHE_HITS_TOTAL.clone()))
166 .ok();
167 registry
168 .add_metric(Box::new(TOKENIZER_CACHE_MISSES_TOTAL.clone()))
169 .ok();
170 });
171}
172
173pub fn ensure_frontend_perf_metrics_registered_prometheus(
176 registry: &Registry,
177) -> Result<(), prometheus::Error> {
178 if PROMETHEUS_REGISTERED.get().is_some() {
179 return Ok(());
180 }
181 registry.register(Box::new(STAGE_REQUESTS.clone()))?;
182 registry.register(Box::new(STAGE_DURATION_SECONDS.clone()))?;
183 registry.register(Box::new(TOKENIZE_SECONDS.clone()))?;
184 registry.register(Box::new(TEMPLATE_SECONDS.clone()))?;
185 registry.register(Box::new(DETOKENIZE_TOTAL_US.clone()))?;
186 registry.register(Box::new(DETOKENIZE_TOKEN_COUNT.clone()))?;
187 registry.register(Box::new(TOKENIZER_CACHE_HITS_TOTAL.clone()))?;
188 registry.register(Box::new(TOKENIZER_CACHE_MISSES_TOTAL.clone()))?;
189 let _ = PROMETHEUS_REGISTERED.set(());
190 Ok(())
191}
192
193#[cfg(test)]
194mod tests {
195 use super::*;
196
197 #[test]
198 fn test_stage_guard_inc_dec() {
199 let gauge = STAGE_REQUESTS.with_label_values(&["test_stage", "test_phase"]);
200 assert_eq!(gauge.get(), 0);
201
202 {
203 let _guard = StageGuard::new("test_stage", "test_phase");
204 assert_eq!(gauge.get(), 1);
205
206 {
207 let _guard2 = StageGuard::new("test_stage", "test_phase");
208 assert_eq!(gauge.get(), 2);
209 }
210 assert_eq!(gauge.get(), 1);
212 }
213 assert_eq!(gauge.get(), 0);
215 }
216
217 #[test]
218 fn test_stage_guard_different_labels() {
219 let preprocess = STAGE_REQUESTS.with_label_values(&["preprocess_t", ""]);
220 let route_prefill = STAGE_REQUESTS.with_label_values(&["route_t", "prefill"]);
221 let route_decode = STAGE_REQUESTS.with_label_values(&["route_t", "decode"]);
222
223 let _g1 = StageGuard::new("preprocess_t", "");
224 let _g2 = StageGuard::new("route_t", "prefill");
225 let _g3 = StageGuard::new("route_t", "decode");
226
227 assert_eq!(preprocess.get(), 1);
228 assert_eq!(route_prefill.get(), 1);
229 assert_eq!(route_decode.get(), 1);
230
231 drop(_g2);
232 assert_eq!(preprocess.get(), 1);
233 assert_eq!(route_prefill.get(), 0);
234 assert_eq!(route_decode.get(), 1);
235 }
236}