Skip to main content

pylon_runtime/
metrics.rs

1use std::sync::atomic::{AtomicU64, Ordering};
2use std::time::Instant;
3
4/// Per-HTTP-method request counters.
5pub struct MethodCounters {
6    pub get: AtomicU64,
7    pub post: AtomicU64,
8    pub patch: AtomicU64,
9    pub delete: AtomicU64,
10    pub options: AtomicU64,
11}
12
13impl MethodCounters {
14    fn new() -> Self {
15        Self {
16            get: AtomicU64::new(0),
17            post: AtomicU64::new(0),
18            patch: AtomicU64::new(0),
19            delete: AtomicU64::new(0),
20            options: AtomicU64::new(0),
21        }
22    }
23
24    fn increment(&self, method: &str) {
25        match method {
26            "GET" => self.get.fetch_add(1, Ordering::Relaxed),
27            "POST" => self.post.fetch_add(1, Ordering::Relaxed),
28            "PATCH" => self.patch.fetch_add(1, Ordering::Relaxed),
29            "DELETE" => self.delete.fetch_add(1, Ordering::Relaxed),
30            "OPTIONS" => self.options.fetch_add(1, Ordering::Relaxed),
31            _ => 0,
32        };
33    }
34}
35
36/// Lightweight, lock-free request metrics.
37///
38/// All counters use relaxed atomic ordering — sufficient for monitoring
39/// where exact cross-thread consistency is not required.
40pub struct Metrics {
41    pub requests_total: AtomicU64,
42    pub requests_ok: AtomicU64,
43    pub requests_err: AtomicU64,
44    pub requests_by_method: MethodCounters,
45    start_time: Instant,
46}
47
48/// Per-request context tracked across the dispatch path so the
49/// `record_request` access log can emit URL + duration without every
50/// response site threading them in. Set at request receive in
51/// `server.rs`, consumed on `record_request`.
52struct CurrentRequest {
53    url: String,
54    started: std::time::Instant,
55}
56
57thread_local! {
58    static CURRENT_REQUEST: std::cell::Cell<Option<CurrentRequest>> = const { std::cell::Cell::new(None) };
59}
60
61/// Stash the in-flight request URL + start time so the next
62/// `record_request` call on this thread can emit a complete access
63/// log line. Pass `None` for paths we want to skip in the log
64/// (currently /health + /metrics — they're called by liveness probes
65/// and Prometheus scrapers and would drown out real traffic).
66pub fn set_current_request(url: &str, started: std::time::Instant) {
67    CURRENT_REQUEST.with(|cell| {
68        cell.set(Some(CurrentRequest {
69            url: url.to_string(),
70            started,
71        }))
72    });
73}
74
75impl Metrics {
76    /// Create a new metrics instance. The uptime clock starts immediately.
77    pub fn new() -> Self {
78        Self {
79            requests_total: AtomicU64::new(0),
80            requests_ok: AtomicU64::new(0),
81            requests_err: AtomicU64::new(0),
82            requests_by_method: MethodCounters::new(),
83            start_time: Instant::now(),
84        }
85    }
86
87    /// Record a completed request. A status code in the 200-399 range is
88    /// counted as successful; everything else counts as an error.
89    ///
90    /// Also emits an access log line via tracing if a thread-local
91    /// request context was set (see [`set_current_request`]). The
92    /// thread-local trick keeps the existing 30+ call sites of this
93    /// method untouched while still giving us Next.js-style
94    /// `GET /foo 200 in 27ms` output for free. Server `recv()` is
95    /// single-threaded per request, so the thread-local matches the
96    /// in-flight request without cross-talk.
97    pub fn record_request(&self, method: &str, status: u16) {
98        self.requests_total.fetch_add(1, Ordering::Relaxed);
99        if (200..400).contains(&status) {
100            self.requests_ok.fetch_add(1, Ordering::Relaxed);
101        } else {
102            self.requests_err.fetch_add(1, Ordering::Relaxed);
103        }
104        self.requests_by_method.increment(method);
105
106        // Pull the per-request context if set. We log even without it
107        // (just method + status) so callers that haven't been wired
108        // through still get partial visibility. /health and /metrics
109        // are noisy and intentionally skipped at the call site of
110        // `set_current_request` — they don't set the thread-local, so
111        // the URL ends up "?".
112        let ctx = CURRENT_REQUEST.take();
113        match ctx {
114            Some(c) => {
115                let dur_ms = c.started.elapsed().as_millis();
116                tracing::info!("← {} {} {} in {}ms", method, c.url, status, dur_ms);
117            }
118            None => {
119                tracing::debug!("← {} {} (no per-request ctx)", method, status);
120            }
121        }
122    }
123
124    /// Seconds elapsed since this `Metrics` instance was created.
125    pub fn uptime_secs(&self) -> u64 {
126        self.start_time.elapsed().as_secs()
127    }
128
129    /// Return a JSON snapshot of all current metrics.
130    pub fn snapshot(&self) -> serde_json::Value {
131        serde_json::json!({
132            "uptime_secs": self.uptime_secs(),
133            "requests": {
134                "total": self.requests_total.load(Ordering::Relaxed),
135                "ok": self.requests_ok.load(Ordering::Relaxed),
136                "error": self.requests_err.load(Ordering::Relaxed),
137            },
138            "methods": {
139                "GET": self.requests_by_method.get.load(Ordering::Relaxed),
140                "POST": self.requests_by_method.post.load(Ordering::Relaxed),
141                "PATCH": self.requests_by_method.patch.load(Ordering::Relaxed),
142                "DELETE": self.requests_by_method.delete.load(Ordering::Relaxed),
143            }
144        })
145    }
146
147    /// Return metrics in Prometheus text exposition format.
148    ///
149    /// Supports scraping by Prometheus, Grafana Agent, OTel collector, etc.
150    pub fn prometheus(&self) -> String {
151        let total = self.requests_total.load(Ordering::Relaxed);
152        let ok = self.requests_ok.load(Ordering::Relaxed);
153        let err = self.requests_err.load(Ordering::Relaxed);
154        let uptime = self.uptime_secs();
155        let get = self.requests_by_method.get.load(Ordering::Relaxed);
156        let post = self.requests_by_method.post.load(Ordering::Relaxed);
157        let patch = self.requests_by_method.patch.load(Ordering::Relaxed);
158        let delete = self.requests_by_method.delete.load(Ordering::Relaxed);
159        let options = self.requests_by_method.options.load(Ordering::Relaxed);
160
161        format!(
162            "# HELP pylon_uptime_seconds Server uptime in seconds.\n\
163             # TYPE pylon_uptime_seconds gauge\n\
164             pylon_uptime_seconds {uptime}\n\
165             # HELP pylon_http_requests_total HTTP requests total.\n\
166             # TYPE pylon_http_requests_total counter\n\
167             pylon_http_requests_total {total}\n\
168             # HELP pylon_http_requests_ok_total HTTP requests with 2xx/3xx status.\n\
169             # TYPE pylon_http_requests_ok_total counter\n\
170             pylon_http_requests_ok_total {ok}\n\
171             # HELP pylon_http_requests_errors_total HTTP requests with 4xx/5xx status.\n\
172             # TYPE pylon_http_requests_errors_total counter\n\
173             pylon_http_requests_errors_total {err}\n\
174             # HELP pylon_http_requests_by_method HTTP requests by method.\n\
175             # TYPE pylon_http_requests_by_method counter\n\
176             pylon_http_requests_by_method{{method=\"GET\"}} {get}\n\
177             pylon_http_requests_by_method{{method=\"POST\"}} {post}\n\
178             pylon_http_requests_by_method{{method=\"PATCH\"}} {patch}\n\
179             pylon_http_requests_by_method{{method=\"DELETE\"}} {delete}\n\
180             pylon_http_requests_by_method{{method=\"OPTIONS\"}} {options}\n"
181        )
182    }
183}
184
185impl Default for Metrics {
186    fn default() -> Self {
187        Self::new()
188    }
189}
190
191#[cfg(test)]
192mod tests {
193    use super::*;
194
195    #[test]
196    fn new_metrics_are_zero() {
197        let m = Metrics::new();
198        assert_eq!(m.requests_total.load(Ordering::Relaxed), 0);
199        assert_eq!(m.requests_ok.load(Ordering::Relaxed), 0);
200        assert_eq!(m.requests_err.load(Ordering::Relaxed), 0);
201    }
202
203    #[test]
204    fn record_ok_request() {
205        let m = Metrics::new();
206        m.record_request("GET", 200);
207        assert_eq!(m.requests_total.load(Ordering::Relaxed), 1);
208        assert_eq!(m.requests_ok.load(Ordering::Relaxed), 1);
209        assert_eq!(m.requests_err.load(Ordering::Relaxed), 0);
210        assert_eq!(m.requests_by_method.get.load(Ordering::Relaxed), 1);
211    }
212
213    #[test]
214    fn record_error_request() {
215        let m = Metrics::new();
216        m.record_request("POST", 500);
217        assert_eq!(m.requests_total.load(Ordering::Relaxed), 1);
218        assert_eq!(m.requests_ok.load(Ordering::Relaxed), 0);
219        assert_eq!(m.requests_err.load(Ordering::Relaxed), 1);
220        assert_eq!(m.requests_by_method.post.load(Ordering::Relaxed), 1);
221    }
222
223    #[test]
224    fn method_counters_increment_independently() {
225        let m = Metrics::new();
226        m.record_request("GET", 200);
227        m.record_request("GET", 200);
228        m.record_request("POST", 201);
229        m.record_request("DELETE", 204);
230        m.record_request("PATCH", 200);
231        m.record_request("OPTIONS", 204);
232
233        assert_eq!(m.requests_by_method.get.load(Ordering::Relaxed), 2);
234        assert_eq!(m.requests_by_method.post.load(Ordering::Relaxed), 1);
235        assert_eq!(m.requests_by_method.delete.load(Ordering::Relaxed), 1);
236        assert_eq!(m.requests_by_method.patch.load(Ordering::Relaxed), 1);
237        assert_eq!(m.requests_by_method.options.load(Ordering::Relaxed), 1);
238        assert_eq!(m.requests_total.load(Ordering::Relaxed), 6);
239    }
240
241    #[test]
242    fn snapshot_returns_valid_json() {
243        let m = Metrics::new();
244        m.record_request("GET", 200);
245        m.record_request("POST", 400);
246
247        let snap = m.snapshot();
248        assert_eq!(snap["requests"]["total"], 2);
249        assert_eq!(snap["requests"]["ok"], 1);
250        assert_eq!(snap["requests"]["error"], 1);
251        assert_eq!(snap["methods"]["GET"], 1);
252        assert_eq!(snap["methods"]["POST"], 1);
253        assert_eq!(snap["methods"]["PATCH"], 0);
254        assert_eq!(snap["methods"]["DELETE"], 0);
255        assert!(snap["uptime_secs"].as_u64().is_some());
256    }
257
258    #[test]
259    fn uptime_is_non_negative() {
260        let m = Metrics::new();
261        assert!(m.uptime_secs() < 2); // should be ~0 immediately after creation
262    }
263
264    #[test]
265    fn status_boundary_classification() {
266        let m = Metrics::new();
267        // 2xx = ok
268        m.record_request("GET", 200);
269        m.record_request("GET", 204);
270        m.record_request("GET", 299);
271        // 3xx = ok (redirects)
272        m.record_request("GET", 301);
273        m.record_request("GET", 399);
274        // 4xx = error
275        m.record_request("GET", 400);
276        m.record_request("GET", 404);
277        // 5xx = error
278        m.record_request("GET", 500);
279
280        assert_eq!(m.requests_ok.load(Ordering::Relaxed), 5);
281        assert_eq!(m.requests_err.load(Ordering::Relaxed), 3);
282    }
283}