use std::time::{Duration, Instant};
#[derive(Debug, Clone, Default)]
pub struct MoeMetrics {
pub cache_hits: u64,
pub cache_misses: u64,
pub experts_paged_in: u64,
pub experts_paged_out: u64,
pub routing_decisions: u64,
pub routing_latency_us: u64,
pub max_routing_latency_us: u64,
pub paging_latency_us: u64,
pub max_paging_latency_us: u64,
pub prefetch_operations: u64,
pub prefetch_hits: u64,
pub affinity_evictions: u64,
}
impl MoeMetrics {
pub fn new() -> Self {
Self::default()
}
pub fn record_cache_hit(&mut self) {
self.cache_hits += 1;
}
pub fn record_cache_miss(&mut self) {
self.cache_misses += 1;
}
#[inline]
pub fn record_cache_hits(&mut self, count: usize) {
self.cache_hits += count as u64;
}
#[inline]
pub fn record_cache_misses(&mut self, count: usize) {
self.cache_misses += count as u64;
}
pub fn record_page_in(&mut self, latency: Duration) {
self.experts_paged_in += 1;
let latency_us = latency.as_micros() as u64;
self.paging_latency_us += latency_us;
self.max_paging_latency_us = self.max_paging_latency_us.max(latency_us);
}
pub fn record_page_out(&mut self) {
self.experts_paged_out += 1;
}
pub fn record_routing(&mut self, latency: Duration) {
self.routing_decisions += 1;
let latency_us = latency.as_micros() as u64;
self.routing_latency_us += latency_us;
self.max_routing_latency_us = self.max_routing_latency_us.max(latency_us);
}
pub fn record_prefetch(&mut self) {
self.prefetch_operations += 1;
}
pub fn record_prefetch_hit(&mut self) {
self.prefetch_hits += 1;
}
pub fn record_affinity_eviction(&mut self) {
self.affinity_evictions += 1;
}
pub fn hit_rate(&self) -> f32 {
let total = self.cache_hits + self.cache_misses;
if total == 0 {
return 0.0;
}
self.cache_hits as f32 / total as f32
}
pub fn avg_routing_latency_us(&self) -> f64 {
if self.routing_decisions == 0 {
return 0.0;
}
self.routing_latency_us as f64 / self.routing_decisions as f64
}
pub fn avg_paging_latency_us(&self) -> f64 {
if self.experts_paged_in == 0 {
return 0.0;
}
self.paging_latency_us as f64 / self.experts_paged_in as f64
}
pub fn prefetch_accuracy(&self) -> f32 {
if self.prefetch_operations == 0 {
return 0.0;
}
self.prefetch_hits as f32 / self.prefetch_operations as f32
}
pub fn summary(&self) -> MoeMetricsSummary {
MoeMetricsSummary {
hit_rate: self.hit_rate(),
avg_routing_latency_us: self.avg_routing_latency_us(),
max_routing_latency_us: self.max_routing_latency_us,
avg_paging_latency_us: self.avg_paging_latency_us(),
max_paging_latency_us: self.max_paging_latency_us,
prefetch_accuracy: self.prefetch_accuracy(),
total_routing_decisions: self.routing_decisions,
total_page_operations: self.experts_paged_in + self.experts_paged_out,
}
}
pub fn reset(&mut self) {
*self = Self::default();
}
}
#[derive(Debug, Clone)]
pub struct MoeMetricsSummary {
pub hit_rate: f32,
pub avg_routing_latency_us: f64,
pub max_routing_latency_us: u64,
pub avg_paging_latency_us: f64,
pub max_paging_latency_us: u64,
pub prefetch_accuracy: f32,
pub total_routing_decisions: u64,
pub total_page_operations: u64,
}
impl MoeMetricsSummary {
pub fn meets_targets(&self) -> bool {
self.hit_rate >= 0.70 && self.max_routing_latency_us <= 15
}
}
pub struct MetricsTimer {
start: Instant,
}
impl MetricsTimer {
pub fn start() -> Self {
Self {
start: Instant::now(),
}
}
pub fn elapsed(&self) -> Duration {
self.start.elapsed()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metrics_new() {
let metrics = MoeMetrics::new();
assert_eq!(metrics.cache_hits, 0);
assert_eq!(metrics.cache_misses, 0);
assert_eq!(metrics.hit_rate(), 0.0);
}
#[test]
fn test_hit_rate_calculation() {
let mut metrics = MoeMetrics::new();
metrics.record_cache_hit();
metrics.record_cache_hit();
metrics.record_cache_hit();
metrics.record_cache_miss();
assert!((metrics.hit_rate() - 0.75).abs() < 1e-6);
}
#[test]
fn test_routing_latency() {
let mut metrics = MoeMetrics::new();
metrics.record_routing(Duration::from_micros(10));
metrics.record_routing(Duration::from_micros(20));
assert_eq!(metrics.routing_decisions, 2);
assert!((metrics.avg_routing_latency_us() - 15.0).abs() < 1e-6);
assert_eq!(metrics.max_routing_latency_us, 20);
}
#[test]
fn test_prefetch_accuracy() {
let mut metrics = MoeMetrics::new();
metrics.record_prefetch();
metrics.record_prefetch();
metrics.record_prefetch();
metrics.record_prefetch_hit();
metrics.record_prefetch_hit();
assert!((metrics.prefetch_accuracy() - 0.6666667).abs() < 1e-6);
}
#[test]
fn test_summary_meets_targets() {
let summary = MoeMetricsSummary {
hit_rate: 0.75,
avg_routing_latency_us: 8.0,
max_routing_latency_us: 12,
avg_paging_latency_us: 100.0,
max_paging_latency_us: 200,
prefetch_accuracy: 0.6,
total_routing_decisions: 100,
total_page_operations: 20,
};
assert!(summary.meets_targets());
}
#[test]
fn test_summary_fails_targets() {
let summary = MoeMetricsSummary {
hit_rate: 0.50, avg_routing_latency_us: 8.0,
max_routing_latency_us: 12,
avg_paging_latency_us: 100.0,
max_paging_latency_us: 200,
prefetch_accuracy: 0.6,
total_routing_decisions: 100,
total_page_operations: 20,
};
assert!(!summary.meets_targets());
}
#[test]
fn test_metrics_reset() {
let mut metrics = MoeMetrics::new();
metrics.record_cache_hit();
metrics.record_cache_miss();
metrics.record_routing(Duration::from_micros(10));
metrics.reset();
assert_eq!(metrics.cache_hits, 0);
assert_eq!(metrics.cache_misses, 0);
assert_eq!(metrics.routing_decisions, 0);
}
#[test]
fn test_metrics_timer() {
let timer = MetricsTimer::start();
let _elapsed = timer.elapsed();
}
#[test]
fn test_bulk_cache_recording() {
let mut metrics = MoeMetrics::new();
metrics.record_cache_hits(5);
metrics.record_cache_misses(2);
assert_eq!(metrics.cache_hits, 5);
assert_eq!(metrics.cache_misses, 2);
metrics.record_cache_hit();
metrics.record_cache_miss();
assert_eq!(metrics.cache_hits, 6);
assert_eq!(metrics.cache_misses, 3);
assert!((metrics.hit_rate() - 0.6666667).abs() < 1e-5);
}
}