Skip to main content

batuta/serve/banco/
metrics.rs

1//! Prometheus-compatible metrics endpoint.
2//!
3//! Tracks request counts, latency, and system state.
4//! GET /api/v1/metrics returns text/plain in Prometheus exposition format.
5
6use std::sync::atomic::{AtomicU64, Ordering};
7use std::time::Instant;
8
9/// Server-wide metrics collector.
10pub struct MetricsCollector {
11    /// Total requests served.
12    pub total_requests: AtomicU64,
13    /// Total chat completion requests.
14    pub chat_requests: AtomicU64,
15    /// Total inference tokens generated.
16    pub tokens_generated: AtomicU64,
17    /// Total errors (4xx + 5xx).
18    pub errors: AtomicU64,
19    /// Server start time.
20    start: Instant,
21}
22
23impl MetricsCollector {
24    #[must_use]
25    pub fn new() -> Self {
26        Self {
27            total_requests: AtomicU64::new(0),
28            chat_requests: AtomicU64::new(0),
29            tokens_generated: AtomicU64::new(0),
30            errors: AtomicU64::new(0),
31            start: Instant::now(),
32        }
33    }
34
35    /// Increment total request counter.
36    pub fn inc_requests(&self) {
37        self.total_requests.fetch_add(1, Ordering::Relaxed);
38    }
39
40    /// Increment chat request counter.
41    pub fn inc_chat(&self) {
42        self.chat_requests.fetch_add(1, Ordering::Relaxed);
43    }
44
45    /// Add generated tokens.
46    pub fn add_tokens(&self, n: u64) {
47        self.tokens_generated.fetch_add(n, Ordering::Relaxed);
48    }
49
50    /// Increment error counter.
51    pub fn inc_errors(&self) {
52        self.errors.fetch_add(1, Ordering::Relaxed);
53    }
54
55    /// Render metrics in Prometheus exposition format.
56    #[must_use]
57    pub fn render(&self, model_loaded: bool, endpoint_count: u64) -> String {
58        let uptime = self.start.elapsed().as_secs();
59        let total = self.total_requests.load(Ordering::Relaxed);
60        let chat = self.chat_requests.load(Ordering::Relaxed);
61        let tokens = self.tokens_generated.load(Ordering::Relaxed);
62        let errors = self.errors.load(Ordering::Relaxed);
63
64        format!(
65            "# HELP banco_requests_total Total HTTP requests served.\n\
66             # TYPE banco_requests_total counter\n\
67             banco_requests_total {total}\n\
68             # HELP banco_chat_requests_total Total chat completion requests.\n\
69             # TYPE banco_chat_requests_total counter\n\
70             banco_chat_requests_total {chat}\n\
71             # HELP banco_tokens_generated_total Total tokens generated.\n\
72             # TYPE banco_tokens_generated_total counter\n\
73             banco_tokens_generated_total {tokens}\n\
74             # HELP banco_errors_total Total error responses.\n\
75             # TYPE banco_errors_total counter\n\
76             banco_errors_total {errors}\n\
77             # HELP banco_uptime_seconds Server uptime in seconds.\n\
78             # TYPE banco_uptime_seconds gauge\n\
79             banco_uptime_seconds {uptime}\n\
80             # HELP banco_model_loaded Whether a model is loaded (1=yes, 0=no).\n\
81             # TYPE banco_model_loaded gauge\n\
82             banco_model_loaded {}\n\
83             # HELP banco_endpoints_total Number of registered API endpoints.\n\
84             # TYPE banco_endpoints_total gauge\n\
85             banco_endpoints_total {endpoint_count}\n",
86            if model_loaded { 1 } else { 0 }
87        )
88    }
89}
90
91impl Default for MetricsCollector {
92    fn default() -> Self {
93        Self::new()
94    }
95}