sentinel_proxy/inference/
metrics.rs1use anyhow::{Context, Result};
9use prometheus::{
10 register_counter_vec, register_histogram_vec, register_int_counter_vec, register_int_gauge_vec,
11 CounterVec, HistogramVec, IntCounterVec, IntGaugeVec,
12};
13
14use sentinel_common::budget::{BudgetAlert, BudgetCheckResult, CostResult};
15use sentinel_common::ids::Scope;
16
17pub struct InferenceMetrics {
22 budget_limit: IntGaugeVec,
25 budget_used: IntCounterVec,
27 budget_remaining: IntGaugeVec,
29 budget_exhausted: IntCounterVec,
31 budget_alerts: IntCounterVec,
33
34 cost_total: CounterVec,
37 input_tokens_total: IntCounterVec,
39 output_tokens_total: IntCounterVec,
41 cost_per_request: HistogramVec,
43}
44
45impl InferenceMetrics {
46 pub fn new() -> Result<Self> {
48 let cost_buckets = vec![
50 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0,
51 ];
52
53 let budget_limit = register_int_gauge_vec!(
54 "sentinel_inference_budget_limit",
55 "Token budget limit per tenant",
56 &["namespace", "service", "route", "tenant"]
57 )
58 .context("Failed to register inference_budget_limit metric")?;
59
60 let budget_used = register_int_counter_vec!(
61 "sentinel_inference_budget_used_total",
62 "Total tokens consumed against budget",
63 &["namespace", "service", "route", "tenant"]
64 )
65 .context("Failed to register inference_budget_used metric")?;
66
67 let budget_remaining = register_int_gauge_vec!(
68 "sentinel_inference_budget_remaining",
69 "Tokens remaining in budget (can be negative if over)",
70 &["namespace", "service", "route", "tenant"]
71 )
72 .context("Failed to register inference_budget_remaining metric")?;
73
74 let budget_exhausted = register_int_counter_vec!(
75 "sentinel_inference_budget_exhausted_total",
76 "Number of requests blocked due to exhausted budget",
77 &["namespace", "service", "route", "tenant"]
78 )
79 .context("Failed to register inference_budget_exhausted metric")?;
80
81 let budget_alerts = register_int_counter_vec!(
82 "sentinel_inference_budget_alerts_total",
83 "Number of budget alert thresholds crossed",
84 &["namespace", "service", "route", "tenant", "threshold"]
85 )
86 .context("Failed to register inference_budget_alerts metric")?;
87
88 let cost_total = register_counter_vec!(
89 "sentinel_inference_cost_total",
90 "Total cost of inference requests",
91 &["namespace", "service", "route", "model", "currency"]
92 )
93 .context("Failed to register inference_cost_total metric")?;
94
95 let input_tokens_total = register_int_counter_vec!(
96 "sentinel_inference_input_tokens_total",
97 "Total input tokens processed",
98 &["namespace", "service", "route", "model"]
99 )
100 .context("Failed to register inference_input_tokens metric")?;
101
102 let output_tokens_total = register_int_counter_vec!(
103 "sentinel_inference_output_tokens_total",
104 "Total output tokens generated",
105 &["namespace", "service", "route", "model"]
106 )
107 .context("Failed to register inference_output_tokens metric")?;
108
109 let cost_per_request = register_histogram_vec!(
110 "sentinel_inference_cost_per_request",
111 "Cost per inference request in dollars",
112 &["namespace", "service", "route", "model"],
113 cost_buckets
114 )
115 .context("Failed to register inference_cost_per_request metric")?;
116
117 Ok(Self {
118 budget_limit,
119 budget_used,
120 budget_remaining,
121 budget_exhausted,
122 budget_alerts,
123 cost_total,
124 input_tokens_total,
125 output_tokens_total,
126 cost_per_request,
127 })
128 }
129
130 #[inline]
132 fn scope_labels(scope: &Scope) -> (&str, &str) {
133 match scope {
134 Scope::Global => ("", ""),
135 Scope::Namespace(ns) => (ns.as_str(), ""),
136 Scope::Service { namespace, service } => (namespace.as_str(), service.as_str()),
137 }
138 }
139
140 pub fn record_budget_check(
142 &self,
143 route: &str,
144 tenant: &str,
145 result: &BudgetCheckResult,
146 budget_limit: u64,
147 scope: &Scope,
148 ) {
149 let (namespace, service) = Self::scope_labels(scope);
150
151 self.budget_limit
153 .with_label_values(&[namespace, service, route, tenant])
154 .set(budget_limit as i64);
155
156 if matches!(result, BudgetCheckResult::Exhausted { .. }) {
158 self.budget_exhausted
159 .with_label_values(&[namespace, service, route, tenant])
160 .inc();
161 }
162 }
163
164 pub fn record_budget_usage(
166 &self,
167 route: &str,
168 tenant: &str,
169 tokens: u64,
170 remaining: i64,
171 scope: &Scope,
172 ) {
173 let (namespace, service) = Self::scope_labels(scope);
174
175 self.budget_used
176 .with_label_values(&[namespace, service, route, tenant])
177 .inc_by(tokens);
178
179 self.budget_remaining
180 .with_label_values(&[namespace, service, route, tenant])
181 .set(remaining);
182 }
183
184 pub fn record_budget_alert(&self, route: &str, alert: &BudgetAlert, scope: &Scope) {
186 let (namespace, service) = Self::scope_labels(scope);
187
188 let threshold_str = format!("{:.0}", alert.threshold * 100.0);
190
191 self.budget_alerts
192 .with_label_values(&[namespace, service, route, &alert.tenant, &threshold_str])
193 .inc();
194 }
195
196 pub fn record_cost(&self, route: &str, cost: &CostResult, scope: &Scope) {
198 let (namespace, service) = Self::scope_labels(scope);
199
200 self.cost_total
202 .with_label_values(&[namespace, service, route, &cost.model, &cost.currency])
203 .inc_by(cost.total_cost);
204
205 self.input_tokens_total
207 .with_label_values(&[namespace, service, route, &cost.model])
208 .inc_by(cost.input_tokens);
209
210 self.output_tokens_total
211 .with_label_values(&[namespace, service, route, &cost.model])
212 .inc_by(cost.output_tokens);
213
214 self.cost_per_request
216 .with_label_values(&[namespace, service, route, &cost.model])
217 .observe(cost.total_cost);
218 }
219}
220
221#[cfg(test)]
226mod tests {
227 use super::*;
228
229 #[test]
233 #[ignore = "Requires isolated Prometheus registry"]
234 fn test_metrics_creation() {
235 let metrics = InferenceMetrics::new();
236 assert!(metrics.is_ok());
237 }
238
239 #[test]
240 fn test_scope_labels() {
241 let (ns, svc) = InferenceMetrics::scope_labels(&Scope::Global);
242 assert_eq!(ns, "");
243 assert_eq!(svc, "");
244
245 let ns_scope = Scope::Namespace("api".to_string());
246 let (ns, svc) = InferenceMetrics::scope_labels(&ns_scope);
247 assert_eq!(ns, "api");
248 assert_eq!(svc, "");
249
250 let svc_scope = Scope::Service {
251 namespace: "api".to_string(),
252 service: "payments".to_string(),
253 };
254 let (ns, svc) = InferenceMetrics::scope_labels(&svc_scope);
255 assert_eq!(ns, "api");
256 assert_eq!(svc, "payments");
257 }
258}