oxur_repl/metrics/
eval.rs

1//! Evaluation metrics for REPL performance tracking
2//!
3//! Provides [`EvalMetrics`] for tracking evaluation timing and cache performance
4//! with both local tracking (for `(stats)` display with percentiles) and
5//! `metrics` crate facade integration (for external monitoring).
6
7use metrics::{counter, histogram};
8use std::collections::VecDeque;
9use std::time::Duration;
10
11// Re-export ExecutionTier from eval (canonical definition is in eval::context)
12pub use crate::eval::ExecutionTier;
13
14/// Maximum timing samples to keep per tier (for percentile calculation)
15const MAX_SAMPLES: usize = 1000;
16
17/// Evaluation metrics collector.
18///
19/// Tracks execution timing samples and cache metrics per session.
20/// Maintains local state for percentile calculation while also emitting
21/// to the `metrics` crate facade for external monitoring.
22///
23/// Memory-bounded by MAX_SAMPLES limit per tier (~24KB total).
24///
25/// # Usage
26///
27/// ```
28/// use oxur_repl::metrics::{EvalMetrics, ExecutionTier};
29/// use std::time::Duration;
30///
31/// let mut metrics = EvalMetrics::new("session-1");
32///
33/// // Record an evaluation
34/// metrics.record(ExecutionTier::Calculator, false, Duration::from_millis(1));
35///
36/// // Get percentiles for display
37/// if let Some(p) = metrics.percentiles(ExecutionTier::Calculator) {
38///     println!("p50: {:.2}ms", p.p50);
39/// }
40///
41/// // Get cache stats
42/// let cache = metrics.cache_stats();
43/// println!("Hit rate: {:.1}%", cache.hit_rate);
44/// ```
45#[derive(Debug, Clone)]
46pub struct EvalMetrics {
47    /// Session identifier
48    session_id: String,
49
50    /// Tier 1 (Calculator) timing samples
51    tier1_samples: VecDeque<Duration>,
52
53    /// Tier 2 (CachedLoaded) timing samples
54    tier2_samples: VecDeque<Duration>,
55
56    /// Tier 3 (JustInTime) timing samples
57    tier3_samples: VecDeque<Duration>,
58
59    /// Cache hit count
60    cache_hits: u64,
61
62    /// Cache miss count
63    cache_misses: u64,
64
65    /// Total evaluations
66    total_evals: u64,
67
68    /// Parse error count
69    parse_errors: u64,
70
71    /// Compile error count
72    compile_errors: u64,
73
74    /// Runtime error count
75    runtime_errors: u64,
76}
77
78impl EvalMetrics {
79    /// Create a new metrics collector for a session.
80    pub fn new(session_id: impl Into<String>) -> Self {
81        Self {
82            session_id: session_id.into(),
83            tier1_samples: VecDeque::with_capacity(MAX_SAMPLES),
84            tier2_samples: VecDeque::with_capacity(MAX_SAMPLES),
85            tier3_samples: VecDeque::with_capacity(MAX_SAMPLES),
86            cache_hits: 0,
87            cache_misses: 0,
88            total_evals: 0,
89            parse_errors: 0,
90            compile_errors: 0,
91            runtime_errors: 0,
92        }
93    }
94
95    /// Record an evaluation result.
96    ///
97    /// Adds a timing sample to the appropriate tier and updates cache metrics.
98    /// Uses a circular buffer pattern - oldest samples are evicted when MAX_SAMPLES is reached.
99    ///
100    /// Also emits metrics via the `metrics` crate facade:
101    /// - `repl.eval.total` (counter, labeled by tier)
102    /// - `repl.eval.duration_ms` (histogram, labeled by tier)
103    /// - `repl.cache.hits` / `repl.cache.misses` (counters)
104    pub fn record(&mut self, tier: ExecutionTier, cached: bool, duration: Duration) {
105        self.total_evals += 1;
106
107        if cached {
108            self.cache_hits += 1;
109        } else {
110            self.cache_misses += 1;
111        }
112
113        // Add sample to appropriate tier, evicting oldest if at capacity
114        let samples = match tier {
115            ExecutionTier::Calculator => &mut self.tier1_samples,
116            ExecutionTier::CachedLoaded => &mut self.tier2_samples,
117            ExecutionTier::JustInTime => &mut self.tier3_samples,
118        };
119
120        if samples.len() >= MAX_SAMPLES {
121            samples.pop_front();
122        }
123        samples.push_back(duration);
124
125        // Emit metrics via facade
126        let tier_label = tier.as_label();
127
128        counter!("repl.eval.total", "tier" => tier_label).increment(1);
129        histogram!("repl.eval.duration_ms", "tier" => tier_label)
130            .record(duration.as_millis() as f64);
131
132        if cached {
133            counter!("repl.cache.hits").increment(1);
134        } else {
135            counter!("repl.cache.misses").increment(1);
136        }
137    }
138
139    /// Calculate percentiles for a given tier.
140    ///
141    /// Returns None if no samples have been recorded for this tier.
142    pub fn percentiles(&self, tier: ExecutionTier) -> Option<Percentiles> {
143        let samples = match tier {
144            ExecutionTier::Calculator => &self.tier1_samples,
145            ExecutionTier::CachedLoaded => &self.tier2_samples,
146            ExecutionTier::JustInTime => &self.tier3_samples,
147        };
148
149        if samples.is_empty() {
150            return None;
151        }
152
153        // Convert to sorted vec of milliseconds
154        let mut sorted: Vec<f64> = samples.iter().map(|d| d.as_secs_f64() * 1000.0).collect();
155        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
156
157        Some(Percentiles {
158            p50: percentile(&sorted, 50.0),
159            p95: percentile(&sorted, 95.0),
160            p99: percentile(&sorted, 99.0),
161            min: sorted[0],
162            max: sorted[sorted.len() - 1],
163            count: sorted.len(),
164        })
165    }
166
167    /// Get cache statistics.
168    pub fn cache_stats(&self) -> CacheStats {
169        CacheStats {
170            hits: self.cache_hits,
171            misses: self.cache_misses,
172            hit_rate: if self.total_evals > 0 {
173                (self.cache_hits as f64 / self.total_evals as f64) * 100.0
174            } else {
175                0.0
176            },
177        }
178    }
179
180    /// Get total evaluation count.
181    pub fn total_evaluations(&self) -> u64 {
182        self.total_evals
183    }
184
185    /// Get session ID.
186    pub fn session_id(&self) -> &str {
187        &self.session_id
188    }
189
190    /// Record a parse error.
191    pub fn record_parse_error(&mut self) {
192        self.parse_errors += 1;
193        counter!("repl.errors.total", "type" => "parse").increment(1);
194    }
195
196    /// Record a compile error.
197    pub fn record_compile_error(&mut self) {
198        self.compile_errors += 1;
199        counter!("repl.errors.total", "type" => "compile").increment(1);
200    }
201
202    /// Record a runtime error.
203    pub fn record_runtime_error(&mut self) {
204        self.runtime_errors += 1;
205        counter!("repl.errors.total", "type" => "runtime").increment(1);
206    }
207
208    /// Get total error count.
209    pub fn total_errors(&self) -> u64 {
210        self.parse_errors + self.compile_errors + self.runtime_errors
211    }
212
213    /// Get error rate as percentage.
214    pub fn error_rate(&self) -> f64 {
215        let total = self.total_evals + self.total_errors();
216        if total > 0 {
217            (self.total_errors() as f64 / total as f64) * 100.0
218        } else {
219            0.0
220        }
221    }
222}
223
224/// Percentile statistics for a tier.
225#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
226pub struct Percentiles {
227    /// Median (50th percentile)
228    pub p50: f64,
229    /// 95th percentile
230    pub p95: f64,
231    /// 99th percentile
232    pub p99: f64,
233    /// Minimum value
234    pub min: f64,
235    /// Maximum value
236    pub max: f64,
237    /// Number of samples
238    pub count: usize,
239}
240
241/// Cache statistics.
242#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
243pub struct CacheStats {
244    /// Number of cache hits
245    pub hits: u64,
246    /// Number of cache misses
247    pub misses: u64,
248    /// Hit rate as percentage (0-100)
249    pub hit_rate: f64,
250}
251
252/// Snapshot of session statistics for protocol transport.
253///
254/// Contains all session-level evaluation metrics in a serializable form.
255#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
256pub struct SessionStatsSnapshot {
257    /// Session identifier
258    pub session_id: String,
259    /// Total evaluations performed
260    pub total_evaluations: u64,
261    /// Cache statistics
262    pub cache: CacheStats,
263    /// Tier 1 (Calculator) percentiles, if any samples exist
264    pub tier1_percentiles: Option<Percentiles>,
265    /// Tier 2 (CachedLoaded) percentiles, if any samples exist
266    pub tier2_percentiles: Option<Percentiles>,
267    /// Tier 3 (JustInTime) percentiles, if any samples exist
268    pub tier3_percentiles: Option<Percentiles>,
269    /// Parse error count
270    pub parse_errors: u64,
271    /// Compile error count
272    pub compile_errors: u64,
273    /// Runtime error count
274    pub runtime_errors: u64,
275    /// Average evaluation time across all tiers (milliseconds)
276    pub average_eval_time_ms: f64,
277}
278
279impl EvalMetrics {
280    /// Create a snapshot of current metrics for protocol transport.
281    pub fn snapshot(&self) -> SessionStatsSnapshot {
282        // Calculate average eval time across all tiers
283        let all_samples: Vec<f64> = self
284            .tier1_samples
285            .iter()
286            .chain(self.tier2_samples.iter())
287            .chain(self.tier3_samples.iter())
288            .map(|d| d.as_secs_f64() * 1000.0)
289            .collect();
290
291        let average_eval_time_ms = if !all_samples.is_empty() {
292            all_samples.iter().sum::<f64>() / all_samples.len() as f64
293        } else {
294            0.0
295        };
296
297        SessionStatsSnapshot {
298            session_id: self.session_id.clone(),
299            total_evaluations: self.total_evals,
300            cache: self.cache_stats(),
301            tier1_percentiles: self.percentiles(ExecutionTier::Calculator),
302            tier2_percentiles: self.percentiles(ExecutionTier::CachedLoaded),
303            tier3_percentiles: self.percentiles(ExecutionTier::JustInTime),
304            parse_errors: self.parse_errors,
305            compile_errors: self.compile_errors,
306            runtime_errors: self.runtime_errors,
307            average_eval_time_ms,
308        }
309    }
310}
311
312/// Calculate percentile using linear interpolation.
313fn percentile(sorted: &[f64], p: f64) -> f64 {
314    if sorted.is_empty() {
315        return 0.0;
316    }
317    if sorted.len() == 1 {
318        return sorted[0];
319    }
320
321    let index = (p / 100.0) * (sorted.len() - 1) as f64;
322    let lower = index.floor() as usize;
323    let upper = index.ceil() as usize;
324
325    if lower == upper {
326        sorted[lower]
327    } else {
328        let fraction = index - lower as f64;
329        sorted[lower] * (1.0 - fraction) + sorted[upper] * fraction
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn test_eval_metrics_creation() {
339        let metrics = EvalMetrics::new("test-session");
340        assert_eq!(metrics.total_evaluations(), 0);
341        assert_eq!(metrics.session_id(), "test-session");
342    }
343
344    #[test]
345    fn test_record_tier1() {
346        let mut metrics = EvalMetrics::new("test");
347        metrics.record(ExecutionTier::Calculator, false, Duration::from_millis(1));
348
349        assert_eq!(metrics.total_evaluations(), 1);
350        assert_eq!(metrics.cache_stats().misses, 1);
351
352        let p = metrics.percentiles(ExecutionTier::Calculator).unwrap();
353        assert_eq!(p.count, 1);
354        assert!((p.p50 - 1.0).abs() < 0.1);
355    }
356
357    #[test]
358    fn test_percentile_calculation() {
359        let mut metrics = EvalMetrics::new("test");
360
361        // Add samples: 1, 2, 3, 4, 5 ms
362        for i in 1..=5 {
363            metrics.record(ExecutionTier::Calculator, false, Duration::from_millis(i));
364        }
365
366        let p = metrics.percentiles(ExecutionTier::Calculator).unwrap();
367        assert_eq!(p.count, 5);
368        assert!((p.p50 - 3.0).abs() < 0.1); // Median should be 3
369        assert!((p.min - 1.0).abs() < 0.1);
370        assert!((p.max - 5.0).abs() < 0.1);
371    }
372
373    #[test]
374    fn test_sample_limit() {
375        let mut metrics = EvalMetrics::new("test");
376
377        // Add MAX_SAMPLES + 100 samples
378        for i in 0..(MAX_SAMPLES + 100) {
379            metrics.record(ExecutionTier::Calculator, false, Duration::from_millis(i as u64));
380        }
381
382        let p = metrics.percentiles(ExecutionTier::Calculator).unwrap();
383        assert_eq!(p.count, MAX_SAMPLES); // Should cap at MAX_SAMPLES
384    }
385
386    #[test]
387    fn test_cache_hit_rate() {
388        let mut metrics = EvalMetrics::new("test");
389
390        // 7 hits, 3 misses
391        for _ in 0..7 {
392            metrics.record(ExecutionTier::CachedLoaded, true, Duration::from_millis(2));
393        }
394        for _ in 0..3 {
395            metrics.record(ExecutionTier::JustInTime, false, Duration::from_millis(50));
396        }
397
398        let cache = metrics.cache_stats();
399        assert_eq!(cache.hits, 7);
400        assert_eq!(cache.misses, 3);
401        assert!((cache.hit_rate - 70.0).abs() < 0.1);
402    }
403
404    #[test]
405    fn test_percentile_single_value() {
406        let mut metrics = EvalMetrics::new("test");
407        metrics.record(ExecutionTier::Calculator, false, Duration::from_millis(5));
408
409        let p = metrics.percentiles(ExecutionTier::Calculator).unwrap();
410        assert_eq!(p.count, 1);
411        assert!((p.p50 - 5.0).abs() < 0.1);
412        assert!((p.p95 - 5.0).abs() < 0.1);
413        assert!((p.p99 - 5.0).abs() < 0.1);
414    }
415
416    #[test]
417    fn test_empty_percentiles() {
418        let metrics = EvalMetrics::new("test");
419        assert!(metrics.percentiles(ExecutionTier::Calculator).is_none());
420    }
421
422    #[test]
423    fn test_percentile_function() {
424        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
425        assert!((percentile(&data, 0.0) - 1.0).abs() < 0.1);
426        assert!((percentile(&data, 50.0) - 3.0).abs() < 0.1);
427        assert!((percentile(&data, 100.0) - 5.0).abs() < 0.1);
428    }
429
430    #[test]
431    fn test_execution_tier_labels() {
432        assert_eq!(ExecutionTier::Calculator.as_label(), "calculator");
433        assert_eq!(ExecutionTier::CachedLoaded.as_label(), "cached");
434        assert_eq!(ExecutionTier::JustInTime.as_label(), "jit");
435    }
436
437    #[test]
438    fn test_execution_tier_display_names() {
439        assert_eq!(ExecutionTier::Calculator.display_name(), "Calculator");
440        assert_eq!(ExecutionTier::CachedLoaded.display_name(), "Cached");
441        assert_eq!(ExecutionTier::JustInTime.display_name(), "JIT");
442    }
443}