use once_cell::sync::{Lazy, OnceCell};
use prometheus::{Counter, Histogram, HistogramOpts, HistogramVec, IntGaugeVec, Opts, Registry};
use super::prometheus_names::{frontend_perf, name_prefix};
use crate::MetricsRegistry;
pub use super::prometheus_names::frontend_perf::{STAGE_DISPATCH, STAGE_PREPROCESS, STAGE_ROUTE};
fn frontend_metric_name(suffix: &str) -> String {
format!("{}_{}", name_prefix::FRONTEND, suffix)
}
pub static STAGE_REQUESTS: Lazy<IntGaugeVec> = Lazy::new(|| {
IntGaugeVec::new(
Opts::new(
frontend_metric_name(frontend_perf::STAGE_REQUESTS),
"Number of requests currently in the given pipeline stage",
),
&["stage", "phase"],
)
.expect("failed to create dynamo_frontend_stage_requests gauge")
});
pub struct StageGuard {
gauge: prometheus::IntGauge,
}
impl StageGuard {
pub fn new(stage: &str, phase: &str) -> Self {
let gauge = STAGE_REQUESTS.with_label_values(&[stage, phase]);
gauge.inc();
Self { gauge }
}
}
impl Drop for StageGuard {
fn drop(&mut self) {
self.gauge.dec();
}
}
pub static STAGE_DURATION_SECONDS: Lazy<HistogramVec> = Lazy::new(|| {
HistogramVec::new(
HistogramOpts::new(
frontend_metric_name(frontend_perf::STAGE_DURATION_SECONDS),
"Pipeline stage duration (seconds)",
)
.buckets(vec![
0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0,
]),
&["stage"],
)
.expect("stage_duration_seconds histogram vec")
});
pub static TOKENIZE_SECONDS: Lazy<Histogram> = Lazy::new(|| {
Histogram::with_opts(
HistogramOpts::new(
frontend_metric_name(frontend_perf::TOKENIZE_SECONDS),
"Tokenization time in preprocessor (seconds)",
)
.buckets(vec![
0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0,
]),
)
.expect("tokenize_seconds histogram")
});
pub static TEMPLATE_SECONDS: Lazy<Histogram> = Lazy::new(|| {
Histogram::with_opts(
HistogramOpts::new(
frontend_metric_name(frontend_perf::TEMPLATE_SECONDS),
"Template application time in preprocessor (seconds)",
)
.buckets(vec![
0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05,
]),
)
.expect("template_seconds histogram")
});
pub static DETOKENIZE_TOTAL_US: Lazy<Counter> = Lazy::new(|| {
Counter::with_opts(Opts::new(
frontend_metric_name(frontend_perf::DETOKENIZE_TOTAL_US),
"Cumulative detokenization time (microseconds)",
))
.expect("detokenize_total_us counter")
});
pub static DETOKENIZE_TOKEN_COUNT: Lazy<Counter> = Lazy::new(|| {
Counter::with_opts(Opts::new(
frontend_metric_name(frontend_perf::DETOKENIZE_TOKEN_COUNT),
"Total tokens detokenized",
))
.expect("detokenize_token_count counter")
});
pub static TOKENIZER_CACHE_HITS_TOTAL: Lazy<Counter> = Lazy::new(|| {
Counter::with_opts(Opts::new(
frontend_metric_name(frontend_perf::TOKENIZER_CACHE_HITS_TOTAL),
"Cumulative L1 tokenizer prefix-cache hits",
))
.expect("tokenizer_cache_hits_total counter")
});
pub static TOKENIZER_CACHE_MISSES_TOTAL: Lazy<Counter> = Lazy::new(|| {
Counter::with_opts(Opts::new(
frontend_metric_name(frontend_perf::TOKENIZER_CACHE_MISSES_TOTAL),
"Cumulative L1 tokenizer prefix-cache misses",
))
.expect("tokenizer_cache_misses_total counter")
});
static REGISTERED: OnceCell<()> = OnceCell::new();
static PROMETHEUS_REGISTERED: OnceCell<()> = OnceCell::new();
pub fn ensure_frontend_perf_metrics_registered(registry: &MetricsRegistry) {
let _ = REGISTERED.get_or_init(|| {
registry.add_metric(Box::new(STAGE_REQUESTS.clone())).ok();
registry
.add_metric(Box::new(STAGE_DURATION_SECONDS.clone()))
.ok();
registry.add_metric(Box::new(TOKENIZE_SECONDS.clone())).ok();
registry.add_metric(Box::new(TEMPLATE_SECONDS.clone())).ok();
registry
.add_metric(Box::new(DETOKENIZE_TOTAL_US.clone()))
.ok();
registry
.add_metric(Box::new(DETOKENIZE_TOKEN_COUNT.clone()))
.ok();
registry
.add_metric(Box::new(TOKENIZER_CACHE_HITS_TOTAL.clone()))
.ok();
registry
.add_metric(Box::new(TOKENIZER_CACHE_MISSES_TOTAL.clone()))
.ok();
});
}
pub fn ensure_frontend_perf_metrics_registered_prometheus(
registry: &Registry,
) -> Result<(), prometheus::Error> {
if PROMETHEUS_REGISTERED.get().is_some() {
return Ok(());
}
registry.register(Box::new(STAGE_REQUESTS.clone()))?;
registry.register(Box::new(STAGE_DURATION_SECONDS.clone()))?;
registry.register(Box::new(TOKENIZE_SECONDS.clone()))?;
registry.register(Box::new(TEMPLATE_SECONDS.clone()))?;
registry.register(Box::new(DETOKENIZE_TOTAL_US.clone()))?;
registry.register(Box::new(DETOKENIZE_TOKEN_COUNT.clone()))?;
registry.register(Box::new(TOKENIZER_CACHE_HITS_TOTAL.clone()))?;
registry.register(Box::new(TOKENIZER_CACHE_MISSES_TOTAL.clone()))?;
let _ = PROMETHEUS_REGISTERED.set(());
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_stage_guard_inc_dec() {
let gauge = STAGE_REQUESTS.with_label_values(&["test_stage", "test_phase"]);
assert_eq!(gauge.get(), 0);
{
let _guard = StageGuard::new("test_stage", "test_phase");
assert_eq!(gauge.get(), 1);
{
let _guard2 = StageGuard::new("test_stage", "test_phase");
assert_eq!(gauge.get(), 2);
}
assert_eq!(gauge.get(), 1);
}
assert_eq!(gauge.get(), 0);
}
#[test]
fn test_stage_guard_different_labels() {
let preprocess = STAGE_REQUESTS.with_label_values(&["preprocess_t", ""]);
let route_prefill = STAGE_REQUESTS.with_label_values(&["route_t", "prefill"]);
let route_decode = STAGE_REQUESTS.with_label_values(&["route_t", "decode"]);
let _g1 = StageGuard::new("preprocess_t", "");
let _g2 = StageGuard::new("route_t", "prefill");
let _g3 = StageGuard::new("route_t", "decode");
assert_eq!(preprocess.get(), 1);
assert_eq!(route_prefill.get(), 1);
assert_eq!(route_decode.get(), 1);
drop(_g2);
assert_eq!(preprocess.get(), 1);
assert_eq!(route_prefill.get(), 0);
assert_eq!(route_decode.get(), 1);
}
}