sentinel_proxy/inference/
metrics.rs

1//! Inference-specific metrics for budget and cost tracking.
2//!
3//! Provides Prometheus metrics for:
4//! - Token budget usage per tenant
5//! - Budget alerts and exhaustion events
6//! - Cost attribution per model and route
7
8use anyhow::{Context, Result};
9use prometheus::{
10    register_counter_vec, register_histogram_vec, register_int_counter_vec, register_int_gauge_vec,
11    CounterVec, HistogramVec, IntCounterVec, IntGaugeVec,
12};
13
14use sentinel_common::budget::{BudgetAlert, BudgetCheckResult, CostResult};
15use sentinel_common::ids::Scope;
16
17/// Inference-specific metrics collector.
18///
19/// Tracks token budgets, costs, and inference-specific metrics with
20/// namespace/service labels for multi-tenant observability.
21pub struct InferenceMetrics {
22    // Budget metrics
23    /// Budget limit by tenant (gauge)
24    budget_limit: IntGaugeVec,
25    /// Tokens used in current period (counter)
26    budget_used: IntCounterVec,
27    /// Budget remaining (gauge, can be negative)
28    budget_remaining: IntGaugeVec,
29    /// Budget exhausted events (counter)
30    budget_exhausted: IntCounterVec,
31    /// Budget alerts fired (counter)
32    budget_alerts: IntCounterVec,
33
34    // Cost metrics
35    /// Total cost by model and route (counter)
36    cost_total: CounterVec,
37    /// Input tokens by model and route (counter)
38    input_tokens_total: IntCounterVec,
39    /// Output tokens by model and route (counter)
40    output_tokens_total: IntCounterVec,
41    /// Request cost histogram (histogram)
42    cost_per_request: HistogramVec,
43}
44
45impl InferenceMetrics {
46    /// Create new inference metrics and register with Prometheus.
47    pub fn new() -> Result<Self> {
48        // Cost buckets in dollars (from 0.0001 to 10.0)
49        let cost_buckets = vec![
50            0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0,
51        ];
52
53        let budget_limit = register_int_gauge_vec!(
54            "sentinel_inference_budget_limit",
55            "Token budget limit per tenant",
56            &["namespace", "service", "route", "tenant"]
57        )
58        .context("Failed to register inference_budget_limit metric")?;
59
60        let budget_used = register_int_counter_vec!(
61            "sentinel_inference_budget_used_total",
62            "Total tokens consumed against budget",
63            &["namespace", "service", "route", "tenant"]
64        )
65        .context("Failed to register inference_budget_used metric")?;
66
67        let budget_remaining = register_int_gauge_vec!(
68            "sentinel_inference_budget_remaining",
69            "Tokens remaining in budget (can be negative if over)",
70            &["namespace", "service", "route", "tenant"]
71        )
72        .context("Failed to register inference_budget_remaining metric")?;
73
74        let budget_exhausted = register_int_counter_vec!(
75            "sentinel_inference_budget_exhausted_total",
76            "Number of requests blocked due to exhausted budget",
77            &["namespace", "service", "route", "tenant"]
78        )
79        .context("Failed to register inference_budget_exhausted metric")?;
80
81        let budget_alerts = register_int_counter_vec!(
82            "sentinel_inference_budget_alerts_total",
83            "Number of budget alert thresholds crossed",
84            &["namespace", "service", "route", "tenant", "threshold"]
85        )
86        .context("Failed to register inference_budget_alerts metric")?;
87
88        let cost_total = register_counter_vec!(
89            "sentinel_inference_cost_total",
90            "Total cost of inference requests",
91            &["namespace", "service", "route", "model", "currency"]
92        )
93        .context("Failed to register inference_cost_total metric")?;
94
95        let input_tokens_total = register_int_counter_vec!(
96            "sentinel_inference_input_tokens_total",
97            "Total input tokens processed",
98            &["namespace", "service", "route", "model"]
99        )
100        .context("Failed to register inference_input_tokens metric")?;
101
102        let output_tokens_total = register_int_counter_vec!(
103            "sentinel_inference_output_tokens_total",
104            "Total output tokens generated",
105            &["namespace", "service", "route", "model"]
106        )
107        .context("Failed to register inference_output_tokens metric")?;
108
109        let cost_per_request = register_histogram_vec!(
110            "sentinel_inference_cost_per_request",
111            "Cost per inference request in dollars",
112            &["namespace", "service", "route", "model"],
113            cost_buckets
114        )
115        .context("Failed to register inference_cost_per_request metric")?;
116
117        Ok(Self {
118            budget_limit,
119            budget_used,
120            budget_remaining,
121            budget_exhausted,
122            budget_alerts,
123            cost_total,
124            input_tokens_total,
125            output_tokens_total,
126            cost_per_request,
127        })
128    }
129
130    /// Extract namespace and service strings from a scope.
131    #[inline]
132    fn scope_labels(scope: &Scope) -> (&str, &str) {
133        match scope {
134            Scope::Global => ("", ""),
135            Scope::Namespace(ns) => (ns.as_str(), ""),
136            Scope::Service { namespace, service } => (namespace.as_str(), service.as_str()),
137        }
138    }
139
140    /// Record a budget check result.
141    pub fn record_budget_check(
142        &self,
143        route: &str,
144        tenant: &str,
145        result: &BudgetCheckResult,
146        budget_limit: u64,
147        scope: &Scope,
148    ) {
149        let (namespace, service) = Self::scope_labels(scope);
150
151        // Set the budget limit gauge
152        self.budget_limit
153            .with_label_values(&[namespace, service, route, tenant])
154            .set(budget_limit as i64);
155
156        // Record exhausted events
157        if matches!(result, BudgetCheckResult::Exhausted { .. }) {
158            self.budget_exhausted
159                .with_label_values(&[namespace, service, route, tenant])
160                .inc();
161        }
162    }
163
164    /// Record token usage against a budget.
165    pub fn record_budget_usage(
166        &self,
167        route: &str,
168        tenant: &str,
169        tokens: u64,
170        remaining: i64,
171        scope: &Scope,
172    ) {
173        let (namespace, service) = Self::scope_labels(scope);
174
175        self.budget_used
176            .with_label_values(&[namespace, service, route, tenant])
177            .inc_by(tokens);
178
179        self.budget_remaining
180            .with_label_values(&[namespace, service, route, tenant])
181            .set(remaining);
182    }
183
184    /// Record a budget alert.
185    pub fn record_budget_alert(&self, route: &str, alert: &BudgetAlert, scope: &Scope) {
186        let (namespace, service) = Self::scope_labels(scope);
187
188        // Format threshold as percentage string
189        let threshold_str = format!("{:.0}", alert.threshold * 100.0);
190
191        self.budget_alerts
192            .with_label_values(&[namespace, service, route, &alert.tenant, &threshold_str])
193            .inc();
194    }
195
196    /// Record a cost result.
197    pub fn record_cost(&self, route: &str, cost: &CostResult, scope: &Scope) {
198        let (namespace, service) = Self::scope_labels(scope);
199
200        // Record total cost
201        self.cost_total
202            .with_label_values(&[namespace, service, route, &cost.model, &cost.currency])
203            .inc_by(cost.total_cost);
204
205        // Record token counts
206        self.input_tokens_total
207            .with_label_values(&[namespace, service, route, &cost.model])
208            .inc_by(cost.input_tokens);
209
210        self.output_tokens_total
211            .with_label_values(&[namespace, service, route, &cost.model])
212            .inc_by(cost.output_tokens);
213
214        // Record cost histogram
215        self.cost_per_request
216            .with_label_values(&[namespace, service, route, &cost.model])
217            .observe(cost.total_cost);
218    }
219}
220
221// ============================================================================
222// Tests
223// ============================================================================
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    // Note: Prometheus metric registration is global and can conflict between tests.
230    // These tests are disabled by default to avoid conflicts.
231
232    #[test]
233    #[ignore = "Requires isolated Prometheus registry"]
234    fn test_metrics_creation() {
235        let metrics = InferenceMetrics::new();
236        assert!(metrics.is_ok());
237    }
238
239    #[test]
240    fn test_scope_labels() {
241        let (ns, svc) = InferenceMetrics::scope_labels(&Scope::Global);
242        assert_eq!(ns, "");
243        assert_eq!(svc, "");
244
245        let ns_scope = Scope::Namespace("api".to_string());
246        let (ns, svc) = InferenceMetrics::scope_labels(&ns_scope);
247        assert_eq!(ns, "api");
248        assert_eq!(svc, "");
249
250        let svc_scope = Scope::Service {
251            namespace: "api".to_string(),
252            service: "payments".to_string(),
253        };
254        let (ns, svc) = InferenceMetrics::scope_labels(&svc_scope);
255        assert_eq!(ns, "api");
256        assert_eq!(svc, "payments");
257    }
258}