npu_rs/
perf_monitor.rs

1use std::sync::atomic::{AtomicU64, Ordering};
2use std::sync::Arc;
3use std::time::Instant;
4use serde::{Serialize, Deserialize};
5
6/// Performance metrics for NPU operations.
7#[derive(Clone, Debug, Default, Serialize, Deserialize)]
8pub struct PerformanceMetrics {
9    pub total_operations: u64,
10    pub total_time_ms: u64,
11    pub peak_power_watts: f32,
12    pub avg_power_watts: f32,
13    pub memory_used_mb: u64,
14    pub memory_peak_mb: u64,
15    pub avg_utilization_percent: f32,
16}
17
18/// Real-time performance monitor for NPU.
19pub struct PerformanceMonitor {
20    start_time: Instant,
21    operation_count: Arc<AtomicU64>,
22    peak_power: Arc<AtomicU64>,
23    total_power: Arc<AtomicU64>,
24    metrics: Arc<parking_lot::Mutex<PerformanceMetrics>>,
25}
26
27impl PerformanceMonitor {
28    /// Create a new performance monitor.
29    pub fn new() -> Self {
30        Self {
31            start_time: Instant::now(),
32            operation_count: Arc::new(AtomicU64::new(0)),
33            peak_power: Arc::new(AtomicU64::new(0)),
34            total_power: Arc::new(AtomicU64::new(0)),
35            metrics: Arc::new(parking_lot::Mutex::new(PerformanceMetrics::default())),
36        }
37    }
38
39    /// Record a completed operation.
40    pub fn record_operation(&self, ops: u64) {
41        self.operation_count.fetch_add(ops, Ordering::SeqCst);
42    }
43
44    /// Record power consumption in watts.
45    pub fn record_power(&self, power_watts: f32) {
46        let power_bits = power_watts.to_bits() as u64;
47        self.total_power.fetch_add(power_bits as u64, Ordering::SeqCst);
48        
49        let current_peak = f32::from_bits(self.peak_power.load(Ordering::SeqCst) as u32);
50        if power_watts > current_peak {
51            self.peak_power.store(power_bits, Ordering::SeqCst);
52        }
53    }
54
55    /// Get current metrics snapshot.
56    pub fn get_metrics(&self) -> PerformanceMetrics {
57        let mut metrics = self.metrics.lock().clone();
58        let elapsed_ms = self.start_time.elapsed().as_millis() as u64;
59        
60        metrics.total_operations = self.operation_count.load(Ordering::SeqCst);
61        metrics.total_time_ms = elapsed_ms;
62        metrics.peak_power_watts = f32::from_bits(self.peak_power.load(Ordering::SeqCst) as u32);
63        
64        metrics
65    }
66
67    /// Get throughput in GOPS (giga operations per second).
68    pub fn get_throughput_gops(&self) -> f64 {
69        let metrics = self.get_metrics();
70        if metrics.total_time_ms == 0 {
71            return 0.0;
72        }
73        (metrics.total_operations as f64) / (metrics.total_time_ms as f64 / 1000.0) / 1e9
74    }
75
76    /// Reset monitoring counters.
77    pub fn reset(&self) {
78        self.operation_count.store(0, Ordering::SeqCst);
79        self.peak_power.store(0, Ordering::SeqCst);
80        self.total_power.store(0, Ordering::SeqCst);
81    }
82}
83
84impl Default for PerformanceMonitor {
85    fn default() -> Self {
86        Self::new()
87    }
88}