cuda_rust_wasm/profiling/
memory_profiler.rs1use std::collections::HashMap;
4use std::sync::{Arc, Mutex};
5use std::time::Instant;
6use crate::error::CudaRustError;
7
8#[derive(Debug, Clone)]
10pub struct AllocationEvent {
11 pub timestamp: Instant,
12 pub size: usize,
13 pub address: usize,
14 pub allocation_type: AllocationType,
15 pub tag: Option<String>,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
19pub enum AllocationType {
20 DeviceMemory,
21 UnifiedMemory,
22 PinnedMemory,
23 SharedMemory,
24}
25
26pub struct MemoryProfiler {
28 allocations: Arc<Mutex<HashMap<usize, AllocationEvent>>>,
29 allocation_history: Arc<Mutex<Vec<AllocationEvent>>>,
30 current_usage: Arc<Mutex<HashMap<AllocationType, usize>>>,
31 peak_usage: Arc<Mutex<HashMap<AllocationType, usize>>>,
32 enabled: bool,
33}
34
35impl Default for MemoryProfiler {
36 fn default() -> Self {
37 Self::new()
38 }
39}
40
41impl MemoryProfiler {
42 pub fn new() -> Self {
43 let mut current_usage = HashMap::new();
44 let mut peak_usage = HashMap::new();
45
46 for alloc_type in &[
47 AllocationType::DeviceMemory,
48 AllocationType::UnifiedMemory,
49 AllocationType::PinnedMemory,
50 AllocationType::SharedMemory,
51 ] {
52 current_usage.insert(*alloc_type, 0);
53 peak_usage.insert(*alloc_type, 0);
54 }
55
56 Self {
57 allocations: Arc::new(Mutex::new(HashMap::new())),
58 allocation_history: Arc::new(Mutex::new(Vec::new())),
59 current_usage: Arc::new(Mutex::new(current_usage)),
60 peak_usage: Arc::new(Mutex::new(peak_usage)),
61 enabled: false,
62 }
63 }
64
65 pub fn enable(&mut self) {
66 self.enabled = true;
67 }
68
69 pub fn disable(&mut self) {
70 self.enabled = false;
71 }
72
73 pub fn track_allocation(
74 &self,
75 address: usize,
76 size: usize,
77 alloc_type: AllocationType,
78 tag: Option<String>,
79 ) {
80 if !self.enabled {
81 return;
82 }
83
84 let event = AllocationEvent {
85 timestamp: Instant::now(),
86 size,
87 address,
88 allocation_type: alloc_type,
89 tag,
90 };
91
92 {
94 let mut allocations = self.allocations.lock().unwrap();
95 allocations.insert(address, event.clone());
96 }
97
98 {
100 let mut history = self.allocation_history.lock().unwrap();
101 history.push(event);
102 }
103
104 {
106 let mut current = self.current_usage.lock().unwrap();
107 let mut peak = self.peak_usage.lock().unwrap();
108
109 *current.get_mut(&alloc_type).unwrap() += size;
110
111 let current_total = *current.get(&alloc_type).unwrap();
112 let peak_total = peak.get_mut(&alloc_type).unwrap();
113
114 if current_total > *peak_total {
115 *peak_total = current_total;
116 }
117 }
118 }
119
120 pub fn track_deallocation(&self, address: usize) {
121 if !self.enabled {
122 return;
123 }
124
125 let mut allocations = self.allocations.lock().unwrap();
126
127 if let Some(event) = allocations.remove(&address) {
128 let mut current = self.current_usage.lock().unwrap();
129 *current.get_mut(&event.allocation_type).unwrap() -= event.size;
130 }
131 }
132
133 pub fn get_current_usage(&self) -> HashMap<AllocationType, usize> {
134 self.current_usage.lock().unwrap().clone()
135 }
136
137 pub fn get_peak_usage(&self) -> HashMap<AllocationType, usize> {
138 self.peak_usage.lock().unwrap().clone()
139 }
140
141 pub fn get_total_current_usage(&self) -> usize {
142 self.current_usage.lock().unwrap().values().sum()
143 }
144
145 pub fn get_total_peak_usage(&self) -> usize {
146 self.peak_usage.lock().unwrap().values().sum()
147 }
148
149 pub fn get_active_allocations(&self) -> Vec<AllocationEvent> {
150 self.allocations.lock().unwrap().values().cloned().collect()
151 }
152
153 pub fn get_allocation_history(&self) -> Vec<AllocationEvent> {
154 self.allocation_history.lock().unwrap().clone()
155 }
156
157 pub fn find_leaks(&self) -> Vec<AllocationEvent> {
158 self.allocations.lock().unwrap().values().cloned().collect()
159 }
160
161 pub fn print_summary(&self) {
162 println!("\n========== MEMORY PROFILING SUMMARY ==========");
163
164 let current = self.get_current_usage();
165 let peak = self.get_peak_usage();
166
167 println!("\nCurrent Memory Usage:");
168 for (alloc_type, size) in ¤t {
169 println!(" {:?}: {} MB", alloc_type, size / (1024 * 1024));
170 }
171 println!(" Total: {} MB", self.get_total_current_usage() / (1024 * 1024));
172
173 println!("\nPeak Memory Usage:");
174 for (alloc_type, size) in &peak {
175 println!(" {:?}: {} MB", alloc_type, size / (1024 * 1024));
176 }
177 println!(" Total: {} MB", self.get_total_peak_usage() / (1024 * 1024));
178
179 let active_allocations = self.get_active_allocations();
180 println!("\nActive Allocations: {}", active_allocations.len());
181
182 let mut sorted_allocs = active_allocations.clone();
184 sorted_allocs.sort_by(|a, b| b.size.cmp(&a.size));
185
186 if !sorted_allocs.is_empty() {
187 println!("\nLargest Active Allocations:");
188 for (i, alloc) in sorted_allocs.iter().take(10).enumerate() {
189 println!(" {}. {} MB - {:?} {}",
190 i + 1,
191 alloc.size / (1024 * 1024),
192 alloc.allocation_type,
193 alloc.tag.as_ref().unwrap_or(&"<untagged>".to_string())
194 );
195 }
196 }
197
198 println!("==============================================\n");
199 }
200
201 pub fn analyze_fragmentation(&self) -> FragmentationAnalysis {
202 let allocations = self.allocations.lock().unwrap();
203
204 if allocations.is_empty() {
205 return FragmentationAnalysis {
206 total_allocations: 0,
207 total_size: 0,
208 average_size: 0,
209 fragmentation_score: 0.0,
210 size_distribution: HashMap::new(),
211 };
212 }
213
214 let total_allocations = allocations.len();
215 let total_size: usize = allocations.values().map(|a| a.size).sum();
216 let average_size = total_size / total_allocations;
217
218 let mut size_distribution: HashMap<String, usize> = HashMap::new();
220
221 for alloc in allocations.values() {
222 let size_category = match alloc.size {
223 0..=1024 => "0-1KB",
224 1025..=65536 => "1KB-64KB",
225 65537..=1048576 => "64KB-1MB",
226 1048577..=16777216 => "1MB-16MB",
227 _ => ">16MB",
228 };
229
230 *size_distribution.entry(size_category.to_string()).or_insert(0) += 1;
231 }
232
233 let variance: f64 = allocations.values()
235 .map(|a| {
236 let diff = a.size as f64 - average_size as f64;
237 diff * diff
238 })
239 .sum::<f64>() / total_allocations as f64;
240
241 let std_dev = variance.sqrt();
242 let fragmentation_score = (std_dev / average_size as f64).min(1.0) * 100.0;
243
244 FragmentationAnalysis {
245 total_allocations,
246 total_size,
247 average_size,
248 fragmentation_score,
249 size_distribution,
250 }
251 }
252
253 pub fn export_timeline(&self, path: &str) -> Result<(), CudaRustError> {
254 use std::fs::File;
255 use std::io::Write;
256
257 let history = self.allocation_history.lock().unwrap();
258 let mut file = File::create(path)
259 .map_err(|e| CudaRustError::RuntimeError(format!("Failed to create file: {e}")))?;
260
261 writeln!(file, "timestamp_us,event_type,size,allocation_type,tag")
262 .map_err(|e| CudaRustError::RuntimeError(format!("Failed to write header: {e}")))?;
263
264 let start_time = history.first().map(|e| e.timestamp).unwrap_or_else(Instant::now);
265
266 for event in history.iter() {
267 let timestamp_us = event.timestamp.duration_since(start_time).as_micros();
268 writeln!(
269 file,
270 "{},allocation,{},{:?},{}",
271 timestamp_us,
272 event.size,
273 event.allocation_type,
274 event.tag.as_ref().unwrap_or(&"".to_string())
275 ).map_err(|e| CudaRustError::RuntimeError(format!("Failed to write data: {e}")))?;
276 }
277
278 Ok(())
279 }
280
281 pub fn clear(&self) {
282 self.allocations.lock().unwrap().clear();
283 self.allocation_history.lock().unwrap().clear();
284
285 let mut current = self.current_usage.lock().unwrap();
286 for value in current.values_mut() {
287 *value = 0;
288 }
289 }
290}
291
292#[derive(Debug, Clone)]
293pub struct FragmentationAnalysis {
294 pub total_allocations: usize,
295 pub total_size: usize,
296 pub average_size: usize,
297 pub fragmentation_score: f64,
298 pub size_distribution: HashMap<String, usize>,
299}
300
301impl FragmentationAnalysis {
302 pub fn print_analysis(&self) {
303 println!("\n=== Memory Fragmentation Analysis ===");
304 println!("Total allocations: {}", self.total_allocations);
305 println!("Total size: {} MB", self.total_size / (1024 * 1024));
306 println!("Average allocation size: {} KB", self.average_size / 1024);
307 println!("Fragmentation score: {:.1}%", self.fragmentation_score);
308
309 println!("\nSize distribution:");
310 let mut categories: Vec<_> = self.size_distribution.iter().collect();
311 categories.sort_by_key(|(k, _)| k.as_str());
312
313 for (category, count) in categories {
314 println!(" {category}: {count} allocations");
315 }
316 }
317}
318
319pub struct MemoryPressureMonitor {
321 threshold_percent: f32,
322 total_memory: usize,
323 callback: Option<Box<dyn Fn(MemoryPressureEvent) + Send + Sync>>,
324}
325
326#[derive(Debug, Clone)]
327pub struct MemoryPressureEvent {
328 pub current_usage: usize,
329 pub total_memory: usize,
330 pub usage_percent: f32,
331 pub pressure_level: PressureLevel,
332}
333
334#[derive(Debug, Clone, Copy, PartialEq)]
335pub enum PressureLevel {
336 Low,
337 Medium,
338 High,
339 Critical,
340}
341
342impl MemoryPressureMonitor {
343 pub fn new(total_memory: usize, threshold_percent: f32) -> Self {
344 Self {
345 threshold_percent,
346 total_memory,
347 callback: None,
348 }
349 }
350
351 pub fn set_callback<F>(&mut self, callback: F)
352 where
353 F: Fn(MemoryPressureEvent) + Send + Sync + 'static,
354 {
355 self.callback = Some(Box::new(callback));
356 }
357
358 pub fn check_pressure(&self, current_usage: usize) -> Option<MemoryPressureEvent> {
359 let usage_percent = (current_usage as f32 / self.total_memory as f32) * 100.0;
360
361 let pressure_level = match usage_percent {
362 p if p < 50.0 => PressureLevel::Low,
363 p if p < 75.0 => PressureLevel::Medium,
364 p if p < 90.0 => PressureLevel::High,
365 _ => PressureLevel::Critical,
366 };
367
368 let event = MemoryPressureEvent {
369 current_usage,
370 total_memory: self.total_memory,
371 usage_percent,
372 pressure_level,
373 };
374
375 if usage_percent >= self.threshold_percent {
376 if let Some(callback) = &self.callback {
377 callback(event.clone());
378 }
379 Some(event)
380 } else {
381 None
382 }
383 }
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389
390 #[test]
391 fn test_memory_profiler() {
392 let mut profiler = MemoryProfiler::new();
393 profiler.enable();
394
395 profiler.track_allocation(0x1000, 1024, AllocationType::DeviceMemory, Some("test1".to_string()));
397 profiler.track_allocation(0x2000, 2048, AllocationType::DeviceMemory, Some("test2".to_string()));
398 profiler.track_allocation(0x3000, 4096, AllocationType::UnifiedMemory, None);
399
400 let current = profiler.get_current_usage();
401 assert_eq!(current[&AllocationType::DeviceMemory], 3072);
402 assert_eq!(current[&AllocationType::UnifiedMemory], 4096);
403
404 profiler.track_deallocation(0x1000);
406
407 let current = profiler.get_current_usage();
408 assert_eq!(current[&AllocationType::DeviceMemory], 2048);
409
410 let peak = profiler.get_peak_usage();
412 assert_eq!(peak[&AllocationType::DeviceMemory], 3072);
413 }
414
415 #[test]
416 fn test_fragmentation_analysis() {
417 let mut profiler = MemoryProfiler::new();
418 profiler.enable();
419
420 for i in 0..100 {
422 let size = if i % 2 == 0 { 1024 } else { 1024 * 1024 };
423 profiler.track_allocation(i, size, AllocationType::DeviceMemory, None);
424 }
425
426 let analysis = profiler.analyze_fragmentation();
427 assert_eq!(analysis.total_allocations, 100);
428 assert!(analysis.fragmentation_score > 0.0);
429 }
430
431 #[test]
432 fn test_memory_pressure_monitor() {
433 let monitor = MemoryPressureMonitor::new(1024 * 1024 * 1024, 80.0); let low_pressure = monitor.check_pressure(500 * 1024 * 1024); assert!(low_pressure.is_none());
437
438 let high_pressure = monitor.check_pressure(900 * 1024 * 1024); assert!(high_pressure.is_some());
440
441 let event = high_pressure.unwrap();
442 assert_eq!(event.pressure_level, PressureLevel::High);
443 }
444}