cuda_rust_wasm/profiling/
mod.rs

1//! Performance profiling tools for cuda-rust-wasm
2
3use std::collections::HashMap;
4use std::sync::{Arc, Mutex};
5use std::time::{Duration, Instant};
6use crate::error::CudaRustError;
7
8pub mod kernel_profiler;
9pub mod memory_profiler;
10pub mod runtime_profiler;
11pub mod performance_monitor;
12
13pub use kernel_profiler::KernelProfiler;
14pub use memory_profiler::MemoryProfiler;
15pub use runtime_profiler::RuntimeProfiler;
16pub use performance_monitor::{
17    PerformanceMonitor, MonitorConfig, CounterType, CounterStats, 
18    PerformanceReport, Timer, global_monitor, time_operation, 
19    record_measurement, global_report
20};
21
22/// Performance metrics collected during profiling
23#[derive(Debug, Clone)]
24pub struct ProfileMetrics {
25    pub name: String,
26    pub total_time: Duration,
27    pub average_time: Duration,
28    pub min_time: Duration,
29    pub max_time: Duration,
30    pub count: usize,
31    pub memory_allocated: usize,
32    pub memory_freed: usize,
33    pub peak_memory: usize,
34    pub custom_metrics: HashMap<String, f64>,
35}
36
37impl ProfileMetrics {
38    pub fn new(name: String) -> Self {
39        Self {
40            name,
41            total_time: Duration::ZERO,
42            average_time: Duration::ZERO,
43            min_time: Duration::MAX,
44            max_time: Duration::ZERO,
45            count: 0,
46            memory_allocated: 0,
47            memory_freed: 0,
48            peak_memory: 0,
49            custom_metrics: HashMap::new(),
50        }
51    }
52
53    pub fn record_duration(&mut self, duration: Duration) {
54        self.total_time += duration;
55        self.count += 1;
56        self.average_time = self.total_time / self.count as u32;
57        
58        if duration < self.min_time {
59            self.min_time = duration;
60        }
61        if duration > self.max_time {
62            self.max_time = duration;
63        }
64    }
65
66    pub fn print_summary(&self) {
67        println!("\n=== Profile: {} ===", self.name);
68        println!("Executions: {}", self.count);
69        println!("Total time: {:?}", self.total_time);
70        println!("Average time: {:?}", self.average_time);
71        println!("Min time: {:?}", self.min_time);
72        println!("Max time: {:?}", self.max_time);
73        
74        if self.memory_allocated > 0 || self.memory_freed > 0 {
75            println!("\nMemory stats:");
76            println!("  Allocated: {} bytes", self.memory_allocated);
77            println!("  Freed: {} bytes", self.memory_freed);
78            println!("  Peak usage: {} bytes", self.peak_memory);
79        }
80        
81        if !self.custom_metrics.is_empty() {
82            println!("\nCustom metrics:");
83            for (key, value) in &self.custom_metrics {
84                println!("  {key}: {value:.2}");
85            }
86        }
87    }
88}
89
90/// Global profiler for collecting performance data
91pub struct GlobalProfiler {
92    profiles: Arc<Mutex<HashMap<String, ProfileMetrics>>>,
93    enabled: Arc<Mutex<bool>>,
94}
95
96impl Default for GlobalProfiler {
97    fn default() -> Self {
98        Self::new()
99    }
100}
101
102impl GlobalProfiler {
103    pub fn new() -> Self {
104        Self {
105            profiles: Arc::new(Mutex::new(HashMap::new())),
106            enabled: Arc::new(Mutex::new(false)),
107        }
108    }
109
110    pub fn enable(&self) {
111        *self.enabled.lock().unwrap() = true;
112    }
113
114    pub fn disable(&self) {
115        *self.enabled.lock().unwrap() = false;
116    }
117
118    pub fn is_enabled(&self) -> bool {
119        *self.enabled.lock().unwrap()
120    }
121
122    pub fn record_event(&self, name: &str, duration: Duration) {
123        if !self.is_enabled() {
124            return;
125        }
126
127        let mut profiles = self.profiles.lock().unwrap();
128        profiles
129            .entry(name.to_string())
130            .or_insert_with(|| ProfileMetrics::new(name.to_string()))
131            .record_duration(duration);
132    }
133
134    pub fn record_memory_event(&self, name: &str, allocated: usize, freed: usize) {
135        if !self.is_enabled() {
136            return;
137        }
138
139        let mut profiles = self.profiles.lock().unwrap();
140        let profile = profiles
141            .entry(name.to_string())
142            .or_insert_with(|| ProfileMetrics::new(name.to_string()));
143        
144        profile.memory_allocated += allocated;
145        profile.memory_freed += freed;
146        
147        let current_usage = profile.memory_allocated - profile.memory_freed;
148        if current_usage > profile.peak_memory {
149            profile.peak_memory = current_usage;
150        }
151    }
152
153    pub fn record_custom_metric(&self, name: &str, metric_name: &str, value: f64) {
154        if !self.is_enabled() {
155            return;
156        }
157
158        let mut profiles = self.profiles.lock().unwrap();
159        profiles
160            .entry(name.to_string())
161            .or_insert_with(|| ProfileMetrics::new(name.to_string()))
162            .custom_metrics
163            .insert(metric_name.to_string(), value);
164    }
165
166    pub fn get_profile(&self, name: &str) -> Option<ProfileMetrics> {
167        self.profiles.lock().unwrap().get(name).cloned()
168    }
169
170    pub fn get_all_profiles(&self) -> Vec<ProfileMetrics> {
171        self.profiles.lock().unwrap().values().cloned().collect()
172    }
173
174    pub fn print_all_summaries(&self) {
175        let profiles = self.profiles.lock().unwrap();
176        
177        println!("\n========== PROFILING SUMMARY ==========");
178        for profile in profiles.values() {
179            profile.print_summary();
180        }
181        println!("======================================\n");
182    }
183
184    pub fn clear(&self) {
185        self.profiles.lock().unwrap().clear();
186    }
187
188    pub fn export_csv(&self, path: &str) -> Result<(), CudaRustError> {
189        use std::fs::File;
190        use std::io::Write;
191
192        let profiles = self.profiles.lock().unwrap();
193        let mut file = File::create(path)
194            .map_err(|e| CudaRustError::RuntimeError(format!("Failed to create file: {e}")))?;
195
196        // Write CSV header
197        writeln!(file, "Name,Count,Total_us,Average_us,Min_us,Max_us,Memory_Allocated,Memory_Freed,Peak_Memory")
198            .map_err(|e| CudaRustError::RuntimeError(format!("Failed to write header: {e}")))?;
199
200        // Write data
201        for profile in profiles.values() {
202            writeln!(
203                file,
204                "{},{},{},{},{},{},{},{},{}",
205                profile.name,
206                profile.count,
207                profile.total_time.as_micros(),
208                profile.average_time.as_micros(),
209                profile.min_time.as_micros(),
210                profile.max_time.as_micros(),
211                profile.memory_allocated,
212                profile.memory_freed,
213                profile.peak_memory
214            ).map_err(|e| CudaRustError::RuntimeError(format!("Failed to write data: {e}")))?;
215        }
216
217        Ok(())
218    }
219}
220
221/// Scoped timer for automatic duration measurement
222pub struct ScopedTimer<'a> {
223    profiler: &'a GlobalProfiler,
224    name: String,
225    start: Instant,
226}
227
228impl<'a> ScopedTimer<'a> {
229    pub fn new(profiler: &'a GlobalProfiler, name: String) -> Self {
230        Self {
231            profiler,
232            name,
233            start: Instant::now(),
234        }
235    }
236}
237
238impl<'a> Drop for ScopedTimer<'a> {
239    fn drop(&mut self) {
240        let duration = self.start.elapsed();
241        self.profiler.record_event(&self.name, duration);
242    }
243}
244
245/// Macro for easy profiling
246#[macro_export]
247macro_rules! profile_scope {
248    ($profiler:expr, $name:expr) => {
249        let _timer = $crate::profiling::ScopedTimer::new($profiler, $name.to_string());
250    };
251}
252
253/// Performance counter for tracking specific metrics
254pub struct PerformanceCounter {
255    name: String,
256    value: Arc<Mutex<f64>>,
257}
258
259impl PerformanceCounter {
260    pub fn new(name: String) -> Self {
261        Self {
262            name,
263            value: Arc::new(Mutex::new(0.0)),
264        }
265    }
266
267    pub fn increment(&self, amount: f64) {
268        *self.value.lock().unwrap() += amount;
269    }
270
271    pub fn set(&self, value: f64) {
272        *self.value.lock().unwrap() = value;
273    }
274
275    pub fn get(&self) -> f64 {
276        *self.value.lock().unwrap()
277    }
278
279    pub fn reset(&self) {
280        *self.value.lock().unwrap() = 0.0;
281    }
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287    use std::thread;
288    use std::time::Duration;
289
290    #[test]
291    fn test_global_profiler() {
292        let profiler = GlobalProfiler::new();
293        profiler.enable();
294
295        // Record some events
296        profiler.record_event("test_op", Duration::from_millis(10));
297        profiler.record_event("test_op", Duration::from_millis(20));
298        profiler.record_event("test_op", Duration::from_millis(15));
299
300        let profile = profiler.get_profile("test_op").unwrap();
301        assert_eq!(profile.count, 3);
302        assert_eq!(profile.total_time, Duration::from_millis(45));
303        assert_eq!(profile.average_time, Duration::from_millis(15));
304        assert_eq!(profile.min_time, Duration::from_millis(10));
305        assert_eq!(profile.max_time, Duration::from_millis(20));
306    }
307
308    #[test]
309    fn test_scoped_timer() {
310        let profiler = GlobalProfiler::new();
311        profiler.enable();
312
313        {
314            let _timer = ScopedTimer::new(&profiler, "scoped_test".to_string());
315            thread::sleep(Duration::from_millis(10));
316        }
317
318        let profile = profiler.get_profile("scoped_test").unwrap();
319        assert_eq!(profile.count, 1);
320        assert!(profile.total_time >= Duration::from_millis(10));
321    }
322
323    #[test]
324    fn test_memory_profiling() {
325        let profiler = GlobalProfiler::new();
326        profiler.enable();
327
328        profiler.record_memory_event("memory_test", 1000, 0);
329        profiler.record_memory_event("memory_test", 500, 200);
330        profiler.record_memory_event("memory_test", 0, 800);
331
332        let profile = profiler.get_profile("memory_test").unwrap();
333        assert_eq!(profile.memory_allocated, 1500);
334        assert_eq!(profile.memory_freed, 1000);
335        assert_eq!(profile.peak_memory, 1300); // Peak was after second allocation
336    }
337
338    #[test]
339    fn test_performance_counter() {
340        let counter = PerformanceCounter::new("test_counter".to_string());
341        
342        counter.increment(10.0);
343        counter.increment(5.0);
344        assert_eq!(counter.get(), 15.0);
345        
346        counter.set(100.0);
347        assert_eq!(counter.get(), 100.0);
348        
349        counter.reset();
350        assert_eq!(counter.get(), 0.0);
351    }
352}