Skip to main content

ringkernel_core/hybrid/
stats.rs

1//! Statistics for hybrid processing decisions.
2
3use std::sync::atomic::{AtomicU64, Ordering};
4use std::time::Duration;
5
6/// Statistics for hybrid processing decisions.
7///
8/// Thread-safe via atomic operations.
9#[derive(Debug, Default)]
10pub struct HybridStats {
11    /// Total CPU executions.
12    cpu_executions: AtomicU64,
13    /// Total GPU executions.
14    gpu_executions: AtomicU64,
15    /// Total CPU time (nanoseconds).
16    cpu_time_ns: AtomicU64,
17    /// Total GPU time (nanoseconds).
18    gpu_time_ns: AtomicU64,
19    /// Total elements processed on CPU.
20    cpu_elements: AtomicU64,
21    /// Total elements processed on GPU.
22    gpu_elements: AtomicU64,
23    /// Crossover threshold learned from measurements.
24    learned_threshold: AtomicU64,
25}
26
27impl HybridStats {
28    /// Creates new empty statistics.
29    #[must_use]
30    pub fn new() -> Self {
31        Self::default()
32    }
33
34    /// Records a CPU execution.
35    pub fn record_cpu_execution(&self, duration: Duration, elements: usize) {
36        self.cpu_executions.fetch_add(1, Ordering::Relaxed);
37        self.cpu_time_ns
38            .fetch_add(duration.as_nanos() as u64, Ordering::Relaxed);
39        self.cpu_elements
40            .fetch_add(elements as u64, Ordering::Relaxed);
41    }
42
43    /// Records a GPU execution.
44    pub fn record_gpu_execution(&self, duration: Duration, elements: usize) {
45        self.gpu_executions.fetch_add(1, Ordering::Relaxed);
46        self.gpu_time_ns
47            .fetch_add(duration.as_nanos() as u64, Ordering::Relaxed);
48        self.gpu_elements
49            .fetch_add(elements as u64, Ordering::Relaxed);
50    }
51
52    /// Updates the learned threshold.
53    pub fn set_learned_threshold(&self, threshold: usize) {
54        self.learned_threshold
55            .store(threshold as u64, Ordering::Relaxed);
56    }
57
58    /// Gets the learned threshold.
59    #[must_use]
60    pub fn learned_threshold(&self) -> usize {
61        self.learned_threshold.load(Ordering::Relaxed) as usize
62    }
63
64    /// Gets the total CPU executions.
65    #[must_use]
66    pub fn cpu_executions(&self) -> u64 {
67        self.cpu_executions.load(Ordering::Relaxed)
68    }
69
70    /// Gets the total GPU executions.
71    #[must_use]
72    pub fn gpu_executions(&self) -> u64 {
73        self.gpu_executions.load(Ordering::Relaxed)
74    }
75
76    /// Gets the average CPU time per execution.
77    #[must_use]
78    pub fn avg_cpu_time(&self) -> Duration {
79        let execs = self.cpu_executions.load(Ordering::Relaxed);
80        if execs == 0 {
81            return Duration::ZERO;
82        }
83        let total_ns = self.cpu_time_ns.load(Ordering::Relaxed);
84        Duration::from_nanos(total_ns / execs)
85    }
86
87    /// Gets the average GPU time per execution.
88    #[must_use]
89    pub fn avg_gpu_time(&self) -> Duration {
90        let execs = self.gpu_executions.load(Ordering::Relaxed);
91        if execs == 0 {
92            return Duration::ZERO;
93        }
94        let total_ns = self.gpu_time_ns.load(Ordering::Relaxed);
95        Duration::from_nanos(total_ns / execs)
96    }
97
98    /// Gets the CPU/GPU execution ratio.
99    #[must_use]
100    pub fn cpu_gpu_ratio(&self) -> f32 {
101        let cpu = self.cpu_executions.load(Ordering::Relaxed) as f32;
102        let gpu = self.gpu_executions.load(Ordering::Relaxed) as f32;
103        if gpu == 0.0 {
104            return f32::INFINITY;
105        }
106        cpu / gpu
107    }
108
109    /// Gets the average CPU throughput (elements per second).
110    #[must_use]
111    pub fn cpu_throughput(&self) -> f64 {
112        let total_ns = self.cpu_time_ns.load(Ordering::Relaxed);
113        let total_elements = self.cpu_elements.load(Ordering::Relaxed);
114        if total_ns == 0 {
115            return 0.0;
116        }
117        (total_elements as f64) / (total_ns as f64 / 1_000_000_000.0)
118    }
119
120    /// Gets the average GPU throughput (elements per second).
121    #[must_use]
122    pub fn gpu_throughput(&self) -> f64 {
123        let total_ns = self.gpu_time_ns.load(Ordering::Relaxed);
124        let total_elements = self.gpu_elements.load(Ordering::Relaxed);
125        if total_ns == 0 {
126            return 0.0;
127        }
128        (total_elements as f64) / (total_ns as f64 / 1_000_000_000.0)
129    }
130
131    /// Creates a snapshot of the current statistics.
132    #[must_use]
133    pub fn snapshot(&self) -> HybridStatsSnapshot {
134        HybridStatsSnapshot {
135            cpu_executions: self.cpu_executions.load(Ordering::Relaxed),
136            gpu_executions: self.gpu_executions.load(Ordering::Relaxed),
137            cpu_time_ns: self.cpu_time_ns.load(Ordering::Relaxed),
138            gpu_time_ns: self.gpu_time_ns.load(Ordering::Relaxed),
139            cpu_elements: self.cpu_elements.load(Ordering::Relaxed),
140            gpu_elements: self.gpu_elements.load(Ordering::Relaxed),
141            learned_threshold: self.learned_threshold.load(Ordering::Relaxed) as usize,
142        }
143    }
144
145    /// Resets all statistics to zero.
146    pub fn reset(&self) {
147        self.cpu_executions.store(0, Ordering::Relaxed);
148        self.gpu_executions.store(0, Ordering::Relaxed);
149        self.cpu_time_ns.store(0, Ordering::Relaxed);
150        self.gpu_time_ns.store(0, Ordering::Relaxed);
151        self.cpu_elements.store(0, Ordering::Relaxed);
152        self.gpu_elements.store(0, Ordering::Relaxed);
153    }
154}
155
156/// A point-in-time snapshot of hybrid processing statistics.
157#[derive(Debug, Clone)]
158pub struct HybridStatsSnapshot {
159    /// Total CPU executions.
160    pub cpu_executions: u64,
161    /// Total GPU executions.
162    pub gpu_executions: u64,
163    /// Total CPU time (nanoseconds).
164    pub cpu_time_ns: u64,
165    /// Total GPU time (nanoseconds).
166    pub gpu_time_ns: u64,
167    /// Total elements processed on CPU.
168    pub cpu_elements: u64,
169    /// Total elements processed on GPU.
170    pub gpu_elements: u64,
171    /// Learned threshold.
172    pub learned_threshold: usize,
173}
174
175impl HybridStatsSnapshot {
176    /// Total executions across both backends.
177    #[must_use]
178    pub fn total_executions(&self) -> u64 {
179        self.cpu_executions + self.gpu_executions
180    }
181
182    /// GPU utilization percentage (0.0-100.0).
183    #[must_use]
184    pub fn gpu_utilization(&self) -> f64 {
185        let total = self.total_executions();
186        if total == 0 {
187            return 0.0;
188        }
189        (self.gpu_executions as f64 / total as f64) * 100.0
190    }
191
192    /// Average CPU time per execution.
193    #[must_use]
194    pub fn avg_cpu_time(&self) -> Duration {
195        if self.cpu_executions == 0 {
196            return Duration::ZERO;
197        }
198        Duration::from_nanos(self.cpu_time_ns / self.cpu_executions)
199    }
200
201    /// Average GPU time per execution.
202    #[must_use]
203    pub fn avg_gpu_time(&self) -> Duration {
204        if self.gpu_executions == 0 {
205            return Duration::ZERO;
206        }
207        Duration::from_nanos(self.gpu_time_ns / self.gpu_executions)
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214
215    #[test]
216    fn test_stats_new() {
217        let stats = HybridStats::new();
218        assert_eq!(stats.cpu_executions(), 0);
219        assert_eq!(stats.gpu_executions(), 0);
220    }
221
222    #[test]
223    fn test_record_cpu_execution() {
224        let stats = HybridStats::new();
225        stats.record_cpu_execution(Duration::from_millis(100), 1000);
226
227        assert_eq!(stats.cpu_executions(), 1);
228        assert_eq!(stats.avg_cpu_time(), Duration::from_millis(100));
229    }
230
231    #[test]
232    fn test_record_gpu_execution() {
233        let stats = HybridStats::new();
234        stats.record_gpu_execution(Duration::from_millis(50), 10000);
235
236        assert_eq!(stats.gpu_executions(), 1);
237        assert_eq!(stats.avg_gpu_time(), Duration::from_millis(50));
238    }
239
240    #[test]
241    fn test_cpu_gpu_ratio() {
242        let stats = HybridStats::new();
243        stats.record_cpu_execution(Duration::from_millis(100), 1000);
244        stats.record_cpu_execution(Duration::from_millis(100), 1000);
245        stats.record_gpu_execution(Duration::from_millis(50), 10000);
246
247        assert!((stats.cpu_gpu_ratio() - 2.0).abs() < f32::EPSILON);
248    }
249
250    #[test]
251    fn test_snapshot() {
252        let stats = HybridStats::new();
253        stats.record_cpu_execution(Duration::from_millis(100), 1000);
254        stats.record_gpu_execution(Duration::from_millis(50), 10000);
255        stats.set_learned_threshold(5000);
256
257        let snapshot = stats.snapshot();
258        assert_eq!(snapshot.cpu_executions, 1);
259        assert_eq!(snapshot.gpu_executions, 1);
260        assert_eq!(snapshot.learned_threshold, 5000);
261        assert!((snapshot.gpu_utilization() - 50.0).abs() < f64::EPSILON);
262    }
263
264    #[test]
265    fn test_reset() {
266        let stats = HybridStats::new();
267        stats.record_cpu_execution(Duration::from_millis(100), 1000);
268        stats.reset();
269
270        assert_eq!(stats.cpu_executions(), 0);
271    }
272}