ringkernel_core/hybrid/
stats.rs1use std::sync::atomic::{AtomicU64, Ordering};
4use std::time::Duration;
5
6#[derive(Debug, Default)]
10pub struct HybridStats {
11 cpu_executions: AtomicU64,
13 gpu_executions: AtomicU64,
15 cpu_time_ns: AtomicU64,
17 gpu_time_ns: AtomicU64,
19 cpu_elements: AtomicU64,
21 gpu_elements: AtomicU64,
23 learned_threshold: AtomicU64,
25}
26
27impl HybridStats {
28 #[must_use]
30 pub fn new() -> Self {
31 Self::default()
32 }
33
34 pub fn record_cpu_execution(&self, duration: Duration, elements: usize) {
36 self.cpu_executions.fetch_add(1, Ordering::Relaxed);
37 self.cpu_time_ns
38 .fetch_add(duration.as_nanos() as u64, Ordering::Relaxed);
39 self.cpu_elements
40 .fetch_add(elements as u64, Ordering::Relaxed);
41 }
42
43 pub fn record_gpu_execution(&self, duration: Duration, elements: usize) {
45 self.gpu_executions.fetch_add(1, Ordering::Relaxed);
46 self.gpu_time_ns
47 .fetch_add(duration.as_nanos() as u64, Ordering::Relaxed);
48 self.gpu_elements
49 .fetch_add(elements as u64, Ordering::Relaxed);
50 }
51
52 pub fn set_learned_threshold(&self, threshold: usize) {
54 self.learned_threshold
55 .store(threshold as u64, Ordering::Relaxed);
56 }
57
58 #[must_use]
60 pub fn learned_threshold(&self) -> usize {
61 self.learned_threshold.load(Ordering::Relaxed) as usize
62 }
63
64 #[must_use]
66 pub fn cpu_executions(&self) -> u64 {
67 self.cpu_executions.load(Ordering::Relaxed)
68 }
69
70 #[must_use]
72 pub fn gpu_executions(&self) -> u64 {
73 self.gpu_executions.load(Ordering::Relaxed)
74 }
75
76 #[must_use]
78 pub fn avg_cpu_time(&self) -> Duration {
79 let execs = self.cpu_executions.load(Ordering::Relaxed);
80 if execs == 0 {
81 return Duration::ZERO;
82 }
83 let total_ns = self.cpu_time_ns.load(Ordering::Relaxed);
84 Duration::from_nanos(total_ns / execs)
85 }
86
87 #[must_use]
89 pub fn avg_gpu_time(&self) -> Duration {
90 let execs = self.gpu_executions.load(Ordering::Relaxed);
91 if execs == 0 {
92 return Duration::ZERO;
93 }
94 let total_ns = self.gpu_time_ns.load(Ordering::Relaxed);
95 Duration::from_nanos(total_ns / execs)
96 }
97
98 #[must_use]
100 pub fn cpu_gpu_ratio(&self) -> f32 {
101 let cpu = self.cpu_executions.load(Ordering::Relaxed) as f32;
102 let gpu = self.gpu_executions.load(Ordering::Relaxed) as f32;
103 if gpu == 0.0 {
104 return f32::INFINITY;
105 }
106 cpu / gpu
107 }
108
109 #[must_use]
111 pub fn cpu_throughput(&self) -> f64 {
112 let total_ns = self.cpu_time_ns.load(Ordering::Relaxed);
113 let total_elements = self.cpu_elements.load(Ordering::Relaxed);
114 if total_ns == 0 {
115 return 0.0;
116 }
117 (total_elements as f64) / (total_ns as f64 / 1_000_000_000.0)
118 }
119
120 #[must_use]
122 pub fn gpu_throughput(&self) -> f64 {
123 let total_ns = self.gpu_time_ns.load(Ordering::Relaxed);
124 let total_elements = self.gpu_elements.load(Ordering::Relaxed);
125 if total_ns == 0 {
126 return 0.0;
127 }
128 (total_elements as f64) / (total_ns as f64 / 1_000_000_000.0)
129 }
130
131 #[must_use]
133 pub fn snapshot(&self) -> HybridStatsSnapshot {
134 HybridStatsSnapshot {
135 cpu_executions: self.cpu_executions.load(Ordering::Relaxed),
136 gpu_executions: self.gpu_executions.load(Ordering::Relaxed),
137 cpu_time_ns: self.cpu_time_ns.load(Ordering::Relaxed),
138 gpu_time_ns: self.gpu_time_ns.load(Ordering::Relaxed),
139 cpu_elements: self.cpu_elements.load(Ordering::Relaxed),
140 gpu_elements: self.gpu_elements.load(Ordering::Relaxed),
141 learned_threshold: self.learned_threshold.load(Ordering::Relaxed) as usize,
142 }
143 }
144
145 pub fn reset(&self) {
147 self.cpu_executions.store(0, Ordering::Relaxed);
148 self.gpu_executions.store(0, Ordering::Relaxed);
149 self.cpu_time_ns.store(0, Ordering::Relaxed);
150 self.gpu_time_ns.store(0, Ordering::Relaxed);
151 self.cpu_elements.store(0, Ordering::Relaxed);
152 self.gpu_elements.store(0, Ordering::Relaxed);
153 }
154}
155
156#[derive(Debug, Clone)]
158pub struct HybridStatsSnapshot {
159 pub cpu_executions: u64,
161 pub gpu_executions: u64,
163 pub cpu_time_ns: u64,
165 pub gpu_time_ns: u64,
167 pub cpu_elements: u64,
169 pub gpu_elements: u64,
171 pub learned_threshold: usize,
173}
174
175impl HybridStatsSnapshot {
176 #[must_use]
178 pub fn total_executions(&self) -> u64 {
179 self.cpu_executions + self.gpu_executions
180 }
181
182 #[must_use]
184 pub fn gpu_utilization(&self) -> f64 {
185 let total = self.total_executions();
186 if total == 0 {
187 return 0.0;
188 }
189 (self.gpu_executions as f64 / total as f64) * 100.0
190 }
191
192 #[must_use]
194 pub fn avg_cpu_time(&self) -> Duration {
195 if self.cpu_executions == 0 {
196 return Duration::ZERO;
197 }
198 Duration::from_nanos(self.cpu_time_ns / self.cpu_executions)
199 }
200
201 #[must_use]
203 pub fn avg_gpu_time(&self) -> Duration {
204 if self.gpu_executions == 0 {
205 return Duration::ZERO;
206 }
207 Duration::from_nanos(self.gpu_time_ns / self.gpu_executions)
208 }
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 #[test]
216 fn test_stats_new() {
217 let stats = HybridStats::new();
218 assert_eq!(stats.cpu_executions(), 0);
219 assert_eq!(stats.gpu_executions(), 0);
220 }
221
222 #[test]
223 fn test_record_cpu_execution() {
224 let stats = HybridStats::new();
225 stats.record_cpu_execution(Duration::from_millis(100), 1000);
226
227 assert_eq!(stats.cpu_executions(), 1);
228 assert_eq!(stats.avg_cpu_time(), Duration::from_millis(100));
229 }
230
231 #[test]
232 fn test_record_gpu_execution() {
233 let stats = HybridStats::new();
234 stats.record_gpu_execution(Duration::from_millis(50), 10000);
235
236 assert_eq!(stats.gpu_executions(), 1);
237 assert_eq!(stats.avg_gpu_time(), Duration::from_millis(50));
238 }
239
240 #[test]
241 fn test_cpu_gpu_ratio() {
242 let stats = HybridStats::new();
243 stats.record_cpu_execution(Duration::from_millis(100), 1000);
244 stats.record_cpu_execution(Duration::from_millis(100), 1000);
245 stats.record_gpu_execution(Duration::from_millis(50), 10000);
246
247 assert!((stats.cpu_gpu_ratio() - 2.0).abs() < f32::EPSILON);
248 }
249
250 #[test]
251 fn test_snapshot() {
252 let stats = HybridStats::new();
253 stats.record_cpu_execution(Duration::from_millis(100), 1000);
254 stats.record_gpu_execution(Duration::from_millis(50), 10000);
255 stats.set_learned_threshold(5000);
256
257 let snapshot = stats.snapshot();
258 assert_eq!(snapshot.cpu_executions, 1);
259 assert_eq!(snapshot.gpu_executions, 1);
260 assert_eq!(snapshot.learned_threshold, 5000);
261 assert!((snapshot.gpu_utilization() - 50.0).abs() < f64::EPSILON);
262 }
263
264 #[test]
265 fn test_reset() {
266 let stats = HybridStats::new();
267 stats.record_cpu_execution(Duration::from_millis(100), 1000);
268 stats.reset();
269
270 assert_eq!(stats.cpu_executions(), 0);
271 }
272}