Skip to main content

tenflowers_core/memory/
tracking.rs

1//! Performance monitoring and allocation analytics
2//!
3//! This module provides comprehensive tracking of memory operations,
4//! kernel performance, and system-wide memory usage statistics.
5
6use std::collections::HashMap;
7use std::sync::{Arc, Mutex};
8use std::time::{Duration, Instant};
9
10/// Kernel occupancy statistics for GPU performance analysis
11#[derive(Debug, Clone)]
12pub struct KernelOccupancyStats {
13    pub kernel_name: String,
14    pub workgroup_size: u32,
15    pub workgroups_dispatched: u32,
16    pub theoretical_occupancy: f32,
17    pub achieved_occupancy: f32,
18    pub efficiency_ratio: f32,
19    pub memory_bandwidth_utilization: f32,
20    pub arithmetic_intensity: f32,
21}
22
23/// Performance monitoring for operation timing and memory usage tracking
24#[derive(Debug)]
25pub struct PerformanceMonitor {
26    inner: Arc<Mutex<PerformanceMonitorInner>>,
27}
28
29#[derive(Debug)]
30struct PerformanceMonitorInner {
31    operation_timings: HashMap<String, Vec<Duration>>,
32    memory_usage: HashMap<String, usize>,
33    total_allocations: usize,
34    total_deallocations: usize,
35    peak_memory: usize,
36    current_memory: usize,
37    kernel_occupancy: HashMap<String, Vec<KernelOccupancyStats>>,
38}
39
40impl Default for PerformanceMonitor {
41    fn default() -> Self {
42        Self::new()
43    }
44}
45
46impl PerformanceMonitor {
47    /// Create a new performance monitor
48    pub fn new() -> Self {
49        Self {
50            inner: Arc::new(Mutex::new(PerformanceMonitorInner {
51                operation_timings: HashMap::new(),
52                memory_usage: HashMap::new(),
53                total_allocations: 0,
54                total_deallocations: 0,
55                peak_memory: 0,
56                current_memory: 0,
57                kernel_occupancy: HashMap::new(),
58            })),
59        }
60    }
61
62    /// Record the execution time of an operation
63    pub fn record_operation_time(&self, operation: &str, duration: Duration) {
64        if let Ok(mut inner) = self.inner.lock() {
65            inner
66                .operation_timings
67                .entry(operation.to_string())
68                .or_default()
69                .push(duration);
70        }
71    }
72
73    /// Record memory allocation
74    pub fn record_allocation(&self, operation: &str, size: usize) {
75        if let Ok(mut inner) = self.inner.lock() {
76            inner.memory_usage.insert(operation.to_string(), size);
77            inner.total_allocations += 1;
78            inner.current_memory += size;
79            if inner.current_memory > inner.peak_memory {
80                inner.peak_memory = inner.current_memory;
81            }
82        }
83    }
84
85    /// Record memory deallocation
86    pub fn record_deallocation(&self, size: usize) {
87        if let Ok(mut inner) = self.inner.lock() {
88            inner.total_deallocations += 1;
89            inner.current_memory = inner.current_memory.saturating_sub(size);
90        }
91    }
92
93    /// Get average execution time for an operation
94    pub fn get_average_time(&self, operation: &str) -> Option<Duration> {
95        if let Ok(inner) = self.inner.lock() {
96            if let Some(times) = inner.operation_timings.get(operation) {
97                if !times.is_empty() {
98                    let total: Duration = times.iter().sum();
99                    return Some(total / times.len() as u32);
100                }
101            }
102        }
103        None
104    }
105
106    /// Get all recorded operation times
107    pub fn get_all_operation_times(&self) -> HashMap<String, Vec<Duration>> {
108        if let Ok(inner) = self.inner.lock() {
109            inner.operation_timings.clone()
110        } else {
111            HashMap::new()
112        }
113    }
114
115    /// Get current memory usage
116    pub fn get_current_memory(&self) -> usize {
117        if let Ok(inner) = self.inner.lock() {
118            inner.current_memory
119        } else {
120            0
121        }
122    }
123
124    /// Get peak memory usage
125    pub fn get_peak_memory(&self) -> usize {
126        if let Ok(inner) = self.inner.lock() {
127            inner.peak_memory
128        } else {
129            0
130        }
131    }
132
133    /// Get memory allocation statistics
134    pub fn get_allocation_stats(&self) -> (usize, usize) {
135        if let Ok(inner) = self.inner.lock() {
136            (inner.total_allocations, inner.total_deallocations)
137        } else {
138            (0, 0)
139        }
140    }
141
142    /// Generate a performance report
143    pub fn generate_report(&self) -> String {
144        if let Ok(inner) = self.inner.lock() {
145            let mut report = String::new();
146            report.push_str("=== Performance Monitor Report ===\n\n");
147
148            report.push_str("Memory Statistics:\n");
149            report.push_str(&format!(
150                "  Current Memory: {} bytes\n",
151                inner.current_memory
152            ));
153            report.push_str(&format!("  Peak Memory: {} bytes\n", inner.peak_memory));
154            report.push_str(&format!(
155                "  Total Allocations: {}\n",
156                inner.total_allocations
157            ));
158            report.push_str(&format!(
159                "  Total Deallocations: {}\n",
160                inner.total_deallocations
161            ));
162            report.push('\n');
163
164            report.push_str("Operation Timings:\n");
165            for (operation, times) in &inner.operation_timings {
166                if !times.is_empty() {
167                    let total: Duration = times.iter().sum();
168                    let avg = total / times.len() as u32;
169                    let min = times.iter().min().copied().unwrap_or_default();
170                    let max = times.iter().max().copied().unwrap_or_default();
171
172                    report.push_str(&format!("  {operation}:\n"));
173                    report.push_str(&format!("    Count: {}\n", times.len()));
174                    report.push_str(&format!("    Average: {avg:?}\n"));
175                    report.push_str(&format!("    Min: {min:?}\n"));
176                    report.push_str(&format!("    Max: {max:?}\n"));
177                    report.push_str(&format!("    Total: {total:?}\n"));
178                }
179            }
180
181            report
182        } else {
183            "Failed to generate report".to_string()
184        }
185    }
186
187    /// Record kernel occupancy statistics
188    pub fn record_kernel_occupancy(&self, stats: KernelOccupancyStats) {
189        if let Ok(mut inner) = self.inner.lock() {
190            inner
191                .kernel_occupancy
192                .entry(stats.kernel_name.clone())
193                .or_default()
194                .push(stats);
195        }
196    }
197
198    /// Get kernel occupancy statistics for a specific kernel
199    pub fn get_kernel_occupancy(&self, kernel_name: &str) -> Vec<KernelOccupancyStats> {
200        if let Ok(inner) = self.inner.lock() {
201            inner
202                .kernel_occupancy
203                .get(kernel_name)
204                .cloned()
205                .unwrap_or_default()
206        } else {
207            Vec::new()
208        }
209    }
210
211    /// Get all kernel occupancy statistics
212    pub fn get_all_kernel_occupancy(&self) -> HashMap<String, Vec<KernelOccupancyStats>> {
213        if let Ok(inner) = self.inner.lock() {
214            inner.kernel_occupancy.clone()
215        } else {
216            HashMap::new()
217        }
218    }
219
220    /// Calculate average occupancy for a kernel
221    pub fn get_average_kernel_occupancy(&self, kernel_name: &str) -> Option<f32> {
222        if let Ok(inner) = self.inner.lock() {
223            if let Some(stats) = inner.kernel_occupancy.get(kernel_name) {
224                if !stats.is_empty() {
225                    let total: f32 = stats.iter().map(|s| s.achieved_occupancy).sum();
226                    return Some(total / stats.len() as f32);
227                }
228            }
229        }
230        None
231    }
232
233    /// Generate kernel occupancy analysis report
234    pub fn generate_occupancy_report(&self) -> String {
235        if let Ok(inner) = self.inner.lock() {
236            let mut report = String::new();
237            report.push_str("=== Kernel Occupancy Analysis ===\n\n");
238
239            for (kernel_name, stats_vec) in &inner.kernel_occupancy {
240                if !stats_vec.is_empty() {
241                    let avg_occupancy: f32 =
242                        stats_vec.iter().map(|s| s.achieved_occupancy).sum::<f32>()
243                            / stats_vec.len() as f32;
244                    let avg_efficiency: f32 =
245                        stats_vec.iter().map(|s| s.efficiency_ratio).sum::<f32>()
246                            / stats_vec.len() as f32;
247                    let avg_bandwidth: f32 = stats_vec
248                        .iter()
249                        .map(|s| s.memory_bandwidth_utilization)
250                        .sum::<f32>()
251                        / stats_vec.len() as f32;
252                    let avg_intensity: f32 = stats_vec
253                        .iter()
254                        .map(|s| s.arithmetic_intensity)
255                        .sum::<f32>()
256                        / stats_vec.len() as f32;
257
258                    report.push_str(&format!("Kernel: {kernel_name}\n"));
259                    report.push_str(&format!("  Invocations: {}\n", stats_vec.len()));
260                    report.push_str(&format!("  Average Occupancy: {avg_occupancy:.2}%\n"));
261                    report.push_str(&format!("  Average Efficiency: {avg_efficiency:.2}%\n"));
262                    report.push_str(&format!(
263                        "  Average Bandwidth Utilization: {avg_bandwidth:.2}%\n"
264                    ));
265                    report.push_str(&format!(
266                        "  Average Arithmetic Intensity: {avg_intensity:.2}\n"
267                    ));
268
269                    // Performance recommendations
270                    if avg_occupancy < 50.0 {
271                        report.push_str(
272                            "  ⚠️  Low occupancy detected. Consider increasing workgroup size.\n",
273                        );
274                    }
275                    if avg_efficiency < 70.0 {
276                        report.push_str(
277                            "  ⚠️  Low efficiency. Check for thread divergence or memory issues.\n",
278                        );
279                    }
280                    if avg_bandwidth < 60.0 {
281                        report.push_str("  ⚠️  Low memory bandwidth utilization. Consider memory access optimization.\n");
282                    }
283
284                    report.push('\n');
285                }
286            }
287
288            report
289        } else {
290            "Failed to generate occupancy report".to_string()
291        }
292    }
293
294    /// Clear all recorded statistics
295    pub fn clear(&self) {
296        if let Ok(mut inner) = self.inner.lock() {
297            inner.operation_timings.clear();
298            inner.memory_usage.clear();
299            inner.total_allocations = 0;
300            inner.total_deallocations = 0;
301            inner.peak_memory = 0;
302            inner.current_memory = 0;
303            inner.kernel_occupancy.clear();
304        }
305    }
306}
307
308/// A timer for measuring operation execution time
309pub struct OperationTimer {
310    operation: String,
311    start: Instant,
312    monitor: Arc<PerformanceMonitor>,
313}
314
315impl OperationTimer {
316    /// Create a new operation timer
317    pub fn new(operation: String, monitor: Arc<PerformanceMonitor>) -> Self {
318        Self {
319            operation,
320            start: Instant::now(),
321            monitor,
322        }
323    }
324}
325
326impl Drop for OperationTimer {
327    fn drop(&mut self) {
328        let duration = self.start.elapsed();
329        self.monitor
330            .record_operation_time(&self.operation, duration);
331    }
332}
333
334/// Global performance monitor instance
335static GLOBAL_MONITOR: std::sync::OnceLock<Arc<PerformanceMonitor>> = std::sync::OnceLock::new();
336
337/// Get the global performance monitor
338pub fn global_monitor() -> &'static PerformanceMonitor {
339    GLOBAL_MONITOR.get_or_init(|| Arc::new(PerformanceMonitor::new()))
340}
341
342/// Get the global performance monitor as Arc
343pub fn global_monitor_arc() -> Arc<PerformanceMonitor> {
344    GLOBAL_MONITOR
345        .get_or_init(|| Arc::new(PerformanceMonitor::new()))
346        .clone()
347}
348
349/// Macro for easily timing operations
350#[macro_export]
351macro_rules! time_operation {
352    ($name:expr, $code:block) => {{
353        let monitor = $crate::memory::tracking::global_monitor_arc();
354        let _timer = $crate::memory::tracking::OperationTimer::new($name.to_string(), monitor);
355        $code
356    }};
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362    use std::thread;
363
364    #[test]
365    fn test_performance_monitor() {
366        let monitor = PerformanceMonitor::new();
367
368        // Test operation timing
369        monitor.record_operation_time("test_op", Duration::from_millis(100));
370        monitor.record_operation_time("test_op", Duration::from_millis(200));
371
372        let avg_time = monitor
373            .get_average_time("test_op")
374            .expect("test: get_average_time should succeed");
375        assert_eq!(avg_time, Duration::from_millis(150));
376
377        // Test memory tracking
378        monitor.record_allocation("tensor_alloc", 1024);
379        assert_eq!(monitor.get_current_memory(), 1024);
380        assert_eq!(monitor.get_peak_memory(), 1024);
381
382        monitor.record_allocation("another_alloc", 512);
383        assert_eq!(monitor.get_current_memory(), 1536);
384        assert_eq!(monitor.get_peak_memory(), 1536);
385
386        monitor.record_deallocation(512);
387        assert_eq!(monitor.get_current_memory(), 1024);
388        assert_eq!(monitor.get_peak_memory(), 1536); // Peak remains
389
390        let (allocs, deallocs) = monitor.get_allocation_stats();
391        assert_eq!(allocs, 2);
392        assert_eq!(deallocs, 1);
393    }
394
395    #[test]
396    fn test_operation_timer() {
397        let monitor = Arc::new(PerformanceMonitor::new());
398
399        {
400            let _timer = OperationTimer::new("sleep_test".to_string(), monitor.clone());
401            thread::sleep(Duration::from_millis(10));
402        }
403
404        let avg_time = monitor
405            .get_average_time("sleep_test")
406            .expect("test: get_average_time should succeed");
407        assert!(avg_time >= Duration::from_millis(9)); // Allow some variance
408    }
409
410    #[test]
411    fn test_report_generation() {
412        let monitor = PerformanceMonitor::new();
413        monitor.record_operation_time("op1", Duration::from_millis(100));
414        monitor.record_allocation("alloc1", 1024);
415
416        let report = monitor.generate_report();
417        assert!(report.contains("Performance Monitor Report"));
418        assert!(report.contains("Current Memory: 1024 bytes"));
419        assert!(report.contains("op1:"));
420    }
421
422    #[test]
423    fn test_global_monitor() {
424        let monitor1 = global_monitor();
425        let monitor2 = global_monitor();
426
427        // Should be the same instance
428        assert!(std::ptr::eq(monitor1, monitor2));
429
430        // Test that we can use it - use relative check for test isolation
431        let initial_memory = monitor1.get_current_memory();
432
433        monitor1.record_allocation("global_test", 512);
434        let final_memory = monitor2.get_current_memory();
435
436        // Check that memory increased by exactly 512
437        assert_eq!(final_memory - initial_memory, 512);
438    }
439
440    #[test]
441    fn test_kernel_occupancy() {
442        let monitor = PerformanceMonitor::new();
443
444        let stats = KernelOccupancyStats {
445            kernel_name: "test_kernel".to_string(),
446            workgroup_size: 256,
447            workgroups_dispatched: 100,
448            theoretical_occupancy: 100.0,
449            achieved_occupancy: 85.0,
450            efficiency_ratio: 90.0,
451            memory_bandwidth_utilization: 75.0,
452            arithmetic_intensity: 2.5,
453        };
454
455        monitor.record_kernel_occupancy(stats);
456
457        let avg_occupancy = monitor
458            .get_average_kernel_occupancy("test_kernel")
459            .expect("test: get_average_kernel_occupancy should succeed");
460        assert_eq!(avg_occupancy, 85.0);
461
462        let occupancy_report = monitor.generate_occupancy_report();
463        assert!(occupancy_report.contains("Kernel Occupancy Analysis"));
464        assert!(occupancy_report.contains("test_kernel"));
465    }
466
467    #[test]
468    fn test_clear_statistics() {
469        let monitor = PerformanceMonitor::new();
470
471        monitor.record_operation_time("op", Duration::from_millis(100));
472        monitor.record_allocation("alloc", 1024);
473
474        assert_eq!(monitor.get_current_memory(), 1024);
475        assert!(monitor.get_average_time("op").is_some());
476
477        monitor.clear();
478
479        assert_eq!(monitor.get_current_memory(), 0);
480        assert!(monitor.get_average_time("op").is_none());
481    }
482}