Skip to main content

snap_control/server/
metrics.rs

1// Copyright 2025 Anapaya Systems
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//   http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//! SNAP control plane API Prometheus middleware.
15
16use std::{
17    future::Future,
18    pin::Pin,
19    task::{Context, Poll},
20    time::Instant,
21};
22
23use axum::body::Body;
24use http::{Request, Response};
25use prometheus::{HistogramVec, IntCounterVec};
26use scion_sdk_observability::metrics::registry::MetricsRegistry;
27use tower::{BoxError, Layer, Service};
28
29/// Prometheus middleware layer for tracking control plane API metrics.
30#[derive(Clone)]
31pub struct PrometheusMiddlewareLayer {
32    metrics: Metrics,
33}
34
35impl PrometheusMiddlewareLayer {
36    /// Create a new Prometheus middleware layer with the given metrics.
37    pub fn new(metrics: Metrics) -> Self {
38        Self { metrics }
39    }
40}
41
42impl<S> Layer<S> for PrometheusMiddlewareLayer {
43    type Service = PrometheusMiddleware<S>;
44
45    fn layer(&self, inner: S) -> Self::Service {
46        PrometheusMiddleware::new(inner, self.metrics.clone())
47    }
48}
49
50/// Prometheus middleware for tracking control plane API metrics.
51#[derive(Clone)]
52pub struct PrometheusMiddleware<S> {
53    inner: S,
54    metrics: Metrics,
55}
56
57impl<S> PrometheusMiddleware<S> {
58    /// Create a new Prometheus middleware with the given service and metrics.
59    pub fn new(inner: S, metrics: Metrics) -> Self {
60        Self { inner, metrics }
61    }
62}
63
64impl<S> Service<Request<Body>> for PrometheusMiddleware<S>
65where
66    S: Service<Request<Body>, Response = Response<Body>> + Send + Clone + 'static,
67    S::Error: Into<BoxError>,
68    S::Future: Send + 'static,
69{
70    type Response = Response<Body>;
71    type Error = BoxError;
72    type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
73
74    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
75        self.inner.poll_ready(cx).map_err(Into::into)
76    }
77
78    fn call(&mut self, request: Request<Body>) -> Self::Future {
79        let method = request.uri().path().to_string();
80        let metrics = self.metrics.clone();
81
82        // Increment started metric
83        metrics
84            .control_plane_started_total
85            .with_label_values(&[&method])
86            .inc();
87
88        let fut = self.inner.call(request);
89        let start = Instant::now();
90
91        Box::pin(async move {
92            let result = fut.await.map_err(Into::into)?;
93            let status = result.status().as_str().to_string();
94
95            // Increment handled metric
96            metrics
97                .control_plane_handled_total
98                .with_label_values(&[&method, &status])
99                .inc();
100
101            // Observe latency
102            let elapsed = start.elapsed().as_secs_f64();
103            metrics
104                .control_plane_latency_seconds
105                .with_label_values(&[&method, &status])
106                .observe(elapsed);
107
108            Ok(result)
109        })
110    }
111}
112
113/// SNAP control plane API metrics.
114#[derive(Debug, Clone)]
115pub struct Metrics {
116    /// Total number of control plane API requests started on the server.
117    pub control_plane_started_total: IntCounterVec,
118    /// Total number of control plane API requests handled on the server.
119    pub control_plane_handled_total: IntCounterVec,
120    /// Latency of control plane API requests in seconds.
121    pub control_plane_latency_seconds: HistogramVec,
122}
123
124impl Metrics {
125    /// Create new metrics instance with the given registry.
126    pub fn new(metrics_registry: &MetricsRegistry) -> Self {
127        Metrics {
128            control_plane_started_total: metrics_registry.int_counter_vec(
129                "control_plane_requests_started_total",
130                "Total number of control plane API requests started on the server.",
131                &["method"],
132            ),
133            control_plane_handled_total: metrics_registry.int_counter_vec(
134                "control_plane_requests_handled_total",
135                "Total number of control plane API requests handled on the server.",
136                &["method", "status"],
137            ),
138            control_plane_latency_seconds: metrics_registry.histogram_vec(
139                "control_plane_requests_latency_seconds",
140                "Latency of control plane API requests in seconds.",
141                vec![0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0],
142                &["method", "status"],
143            ),
144        }
145    }
146}