ai_lib/
metrics.rs

1use async_trait::async_trait;
2
3/// Simple, injectable metrics trait used by adapters/clients.
4/// Keep the surface minimal: counters, gauges and a timer RAII helper.
5#[async_trait]
6pub trait Metrics: Send + Sync + 'static {
7    /// Increment a named counter by `value`.
8    async fn incr_counter(&self, name: &str, value: u64);
9
10    /// Record a gauge metric.
11    async fn record_gauge(&self, name: &str, value: f64);
12
13    /// Start a timer for a named operation. Returns a boxed Timer which should be stopped
14    /// when the operation completes. Implementations may return None if timers aren't supported.
15    async fn start_timer(&self, name: &str) -> Option<Box<dyn Timer + Send>>;
16
17    /// Record a histogram value for a named metric.
18    async fn record_histogram(&self, name: &str, value: f64);
19
20    /// Record a histogram value with tags/labels.
21    async fn record_histogram_with_tags(&self, name: &str, value: f64, tags: &[(&str, &str)]);
22
23    /// Increment a counter with tags/labels.
24    async fn incr_counter_with_tags(&self, name: &str, value: u64, tags: &[(&str, &str)]);
25
26    /// Record a gauge with tags/labels.
27    async fn record_gauge_with_tags(&self, name: &str, value: f64, tags: &[(&str, &str)]);
28
29    /// Record an error occurrence.
30    async fn record_error(&self, name: &str, error_type: &str);
31
32    /// Record a success/failure boolean metric.
33    async fn record_success(&self, name: &str, success: bool);
34}
35
36/// Timer interface returned by Metrics::start_timer.
37pub trait Timer: Send {
38    /// Stop the timer and record the duration.
39    fn stop(self: Box<Self>);
40}
41
42/// No-op metrics implementation suitable as a default.
43pub struct NoopMetrics;
44
45#[async_trait]
46impl Metrics for NoopMetrics {
47    async fn incr_counter(&self, _name: &str, _value: u64) {}
48    async fn record_gauge(&self, _name: &str, _value: f64) {}
49    async fn start_timer(&self, _name: &str) -> Option<Box<dyn Timer + Send>> {
50        None
51    }
52    async fn record_histogram(&self, _name: &str, _value: f64) {}
53    async fn record_histogram_with_tags(&self, _name: &str, _value: f64, _tags: &[(&str, &str)]) {}
54    async fn incr_counter_with_tags(&self, _name: &str, _value: u64, _tags: &[(&str, &str)]) {}
55    async fn record_gauge_with_tags(&self, _name: &str, _value: f64, _tags: &[(&str, &str)]) {}
56    async fn record_error(&self, _name: &str, _error_type: &str) {}
57    async fn record_success(&self, _name: &str, _success: bool) {}
58}
59
60/// A no-op timer (returned when StartTimer implementations want to return a concrete value).
61pub struct NoopTimer;
62impl Timer for NoopTimer {
63    fn stop(self: Box<Self>) {}
64}
65
66impl NoopMetrics {
67    pub fn new() -> Self {
68        NoopMetrics
69    }
70}
71
72impl Default for NoopMetrics {
73    fn default() -> Self {
74        Self::new()
75    }
76}
77
78/// Convenience methods for common metric patterns
79#[allow(async_fn_in_trait)]
80pub trait MetricsExt: Metrics {
81    /// Record a request with timing and success/failure
82    async fn record_request(
83        &self,
84        name: &str,
85        timer: Option<Box<dyn Timer + Send>>,
86        success: bool,
87    ) {
88        if let Some(t) = timer {
89            t.stop();
90        }
91        self.record_success(name, success).await;
92    }
93
94    /// Record a request with timing, success/failure, and tags
95    async fn record_request_with_tags(
96        &self,
97        name: &str,
98        timer: Option<Box<dyn Timer + Send>>,
99        success: bool,
100        tags: &[(&str, &str)],
101    ) {
102        if let Some(t) = timer {
103            t.stop();
104        }
105        self.record_success(name, success).await;
106        // Record additional metrics with tags
107        self.incr_counter_with_tags(&format!("{}.total", name), 1, tags)
108            .await;
109        if success {
110            self.incr_counter_with_tags(&format!("{}.success", name), 1, tags)
111                .await;
112        } else {
113            self.incr_counter_with_tags(&format!("{}.failure", name), 1, tags)
114                .await;
115        }
116    }
117
118    /// Record an error with context
119    async fn record_error_with_context(&self, name: &str, error_type: &str, context: &str) {
120        self.record_error(name, error_type).await;
121        self.incr_counter_with_tags(name, 1, &[("error_type", error_type), ("context", context)])
122            .await;
123    }
124
125    /// Record a complete request with timing, status, and success metrics
126    async fn record_complete_request(
127        &self,
128        name: &str,
129        timer: Option<Box<dyn Timer + Send>>,
130        status_code: u16,
131        success: bool,
132        tags: &[(&str, &str)],
133    ) {
134        if let Some(t) = timer {
135            t.stop();
136        }
137
138        // Record basic metrics
139        self.record_success(name, success).await;
140        self.record_gauge_with_tags(&format!("{}.status_code", name), status_code as f64, tags)
141            .await;
142
143        // Record counters
144        self.incr_counter_with_tags(&format!("{}.total", name), 1, tags)
145            .await;
146        if success {
147            self.incr_counter_with_tags(&format!("{}.success", name), 1, tags)
148                .await;
149        } else {
150            self.incr_counter_with_tags(&format!("{}.failure", name), 1, tags)
151                .await;
152        }
153    }
154
155    /// Record latency percentiles for a batch of measurements
156    async fn record_batch_latency_percentiles(
157        &self,
158        name: &str,
159        measurements: &[f64],
160        tags: &[(&str, &str)],
161    ) {
162        if measurements.is_empty() {
163            return;
164        }
165
166        let mut sorted = measurements.to_vec();
167        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
168
169        let len = sorted.len();
170        let p50 = sorted[(len * 50 / 100).min(len - 1)];
171        let p95 = sorted[(len * 95 / 100).min(len - 1)];
172        let p99 = sorted[(len * 99 / 100).min(len - 1)];
173
174        self.record_gauge_with_tags(&format!("{}.latency.p50", name), p50, tags)
175            .await;
176        self.record_gauge_with_tags(&format!("{}.latency.p95", name), p95, tags)
177            .await;
178        self.record_gauge_with_tags(&format!("{}.latency.p99", name), p99, tags)
179            .await;
180    }
181}
182
183impl<T: Metrics> MetricsExt for T {}
184
185/// Centralized metric key helpers to standardize naming across adapters
186pub mod keys {
187    /// Request counter key
188    pub fn requests(provider: &str) -> String {
189        format!("{}.requests", provider)
190    }
191    /// Request duration timer key (ms)
192    pub fn request_duration_ms(provider: &str) -> String {
193        format!("{}.request_duration_ms", provider)
194    }
195    /// Success ratio gauge/histogram can be derived; optional explicit keys
196    pub fn success(provider: &str) -> String {
197        format!("{}.success", provider)
198    }
199    pub fn failure(provider: &str) -> String {
200        format!("{}.failure", provider)
201    }
202
203    /// Latency percentile keys
204    pub fn latency_p50(provider: &str) -> String {
205        format!("{}.latency_p50", provider)
206    }
207    pub fn latency_p95(provider: &str) -> String {
208        format!("{}.latency_p95", provider)
209    }
210    pub fn latency_p99(provider: &str) -> String {
211        format!("{}.latency_p99", provider)
212    }
213
214    /// Status code distribution key
215    pub fn status_codes(provider: &str) -> String {
216        format!("{}.status_codes", provider)
217    }
218
219    /// Error rate key
220    pub fn error_rate(provider: &str) -> String {
221        format!("{}.error_rate", provider)
222    }
223
224    /// Throughput key (requests per second)
225    pub fn throughput(provider: &str) -> String {
226        format!("{}.throughput", provider)
227    }
228
229    /// Cost metrics keys
230    pub fn cost_usd(provider: &str) -> String {
231        format!("{}.cost_usd", provider)
232    }
233    pub fn cost_per_request(provider: &str) -> String {
234        format!("{}.cost_per_request", provider)
235    }
236    pub fn tokens_input(provider: &str) -> String {
237        format!("{}.tokens_input", provider)
238    }
239    pub fn tokens_output(provider: &str) -> String {
240        format!("{}.tokens_output", provider)
241    }
242
243    /// Routing metrics keys
244    pub fn routing_requests(route: &str) -> String {
245        format!("routing.{}.requests", route)
246    }
247    pub fn routing_selected(route: &str) -> String {
248        format!("routing.{}.selected", route)
249    }
250    pub fn routing_health_fail(route: &str) -> String {
251        format!("routing.{}.health_fail", route)
252    }
253}
254
255/// Minimal cost accounting helper (feature-gated)
256#[cfg(feature = "cost_metrics")]
257pub mod cost {
258    use crate::metrics::Metrics;
259
260    /// Compute cost using env vars like COST_INPUT_PER_1K and COST_OUTPUT_PER_1K (USD)
261    pub fn estimate_usd(input_tokens: u32, output_tokens: u32) -> f64 {
262        let in_rate = std::env::var("COST_INPUT_PER_1K")
263            .ok()
264            .and_then(|s| s.parse::<f64>().ok())
265            .unwrap_or(0.0);
266        let out_rate = std::env::var("COST_OUTPUT_PER_1K")
267            .ok()
268            .and_then(|s| s.parse::<f64>().ok())
269            .unwrap_or(0.0);
270        (input_tokens as f64 / 1000.0) * in_rate + (output_tokens as f64 / 1000.0) * out_rate
271    }
272
273    /// Report cost via Metrics as histogram (usd) and counters by provider/model
274    pub async fn record_cost<M: Metrics + ?Sized>(m: &M, provider: &str, model: &str, usd: f64) {
275        m.record_histogram_with_tags("cost.usd", usd, &[("provider", provider), ("model", model)])
276            .await;
277    }
278}
279
280// Environment variables for optional features
281//
282// cost_metrics (if enabled):
283// - COST_INPUT_PER_1K: USD per 1000 input tokens
284// - COST_OUTPUT_PER_1K: USD per 1000 output tokens
285//
286// Note: In enterprise deployments (ai-lib PRO), these can be centrally managed
287// and hot-reloaded via external configuration providers.