mathhook_core/core/performance/
profiler.rs

1//! Runtime Performance Profiler
2//!
3//! This module implements adaptive performance profiling that learns from actual
4//! runtime behavior to optimize SIMD and parallelism thresholds dynamically.
5
6use std::collections::HashMap;
7use std::sync::{Arc, OnceLock, RwLock};
8use std::time::{Duration, Instant};
9
10/// Performance measurement for a specific operation type and size
11#[derive(Debug, Clone)]
12pub struct PerformanceMeasurement {
13    /// Operation type (e.g., "simd_add", "parallel_multiply", "sequential_add")
14    pub operation_type: String,
15    /// Number of elements processed
16    pub operation_size: usize,
17    /// Time taken for the operation
18    pub duration: Duration,
19    /// Timestamp when measurement was taken
20    pub timestamp: Instant,
21}
22
23/// Adaptive threshold configuration that learns from runtime performance
24#[derive(Debug, Clone)]
25pub struct AdaptiveThresholds {
26    /// Current SIMD threshold (dynamically adjusted)
27    pub simd_threshold: usize,
28    /// Current parallelism threshold (dynamically adjusted)
29    pub parallel_threshold: usize,
30    /// Confidence level in current thresholds (0.0 - 1.0)
31    pub confidence: f64,
32    /// Number of measurements used to determine thresholds
33    pub sample_count: usize,
34}
35
36impl Default for AdaptiveThresholds {
37    fn default() -> Self {
38        Self {
39            simd_threshold: 50,       // Conservative starting point
40            parallel_threshold: 1000, // Conservative starting point
41            confidence: 0.0,          // No confidence initially
42            sample_count: 0,
43        }
44    }
45}
46
47/// Runtime performance profiler that adapts thresholds based on actual performance
48pub struct RuntimeProfiler {
49    /// Historical performance measurements
50    measurements: Arc<RwLock<Vec<PerformanceMeasurement>>>,
51    /// Current adaptive thresholds
52    thresholds: Arc<RwLock<AdaptiveThresholds>>,
53    /// Maximum number of measurements to keep in memory
54    max_measurements: usize,
55    /// Minimum samples needed before adapting thresholds
56    min_samples_for_adaptation: usize,
57}
58
59impl Default for RuntimeProfiler {
60    fn default() -> Self {
61        Self::new()
62    }
63}
64
65impl RuntimeProfiler {
66    /// Create a new runtime profiler
67    pub fn new() -> Self {
68        Self {
69            measurements: Arc::new(RwLock::new(Vec::new())),
70            thresholds: Arc::new(RwLock::new(AdaptiveThresholds::default())),
71            max_measurements: 10000,        // Keep last 10k measurements
72            min_samples_for_adaptation: 50, // Need 50+ samples to adapt
73        }
74    }
75
76    /// Record a performance measurement
77    pub fn record_measurement(&self, measurement: PerformanceMeasurement) {
78        if let Ok(mut measurements) = self.measurements.write() {
79            measurements.push(measurement);
80
81            // Keep only the most recent measurements
82            if measurements.len() > self.max_measurements {
83                let len = measurements.len();
84                measurements.drain(0..len - self.max_measurements);
85            }
86
87            // Trigger threshold adaptation if we have enough samples
88            if measurements.len() >= self.min_samples_for_adaptation {
89                self.adapt_thresholds(&measurements);
90            }
91        }
92    }
93
94    /// Get current adaptive thresholds
95    pub fn get_thresholds(&self) -> AdaptiveThresholds {
96        self.thresholds
97            .read()
98            .unwrap_or_else(|poisoned| poisoned.into_inner())
99            .clone()
100    }
101
102    /// Adapt thresholds based on performance measurements
103    fn adapt_thresholds(&self, measurements: &[PerformanceMeasurement]) {
104        let simd_threshold = self.find_optimal_simd_threshold(measurements);
105        let parallel_threshold = self.find_optimal_parallel_threshold(measurements);
106
107        if let Ok(mut thresholds) = self.thresholds.write() {
108            let old_simd = thresholds.simd_threshold;
109            let old_parallel = thresholds.parallel_threshold;
110
111            // Use exponential moving average for smooth adaptation
112            let alpha = 0.1; // Learning rate
113            thresholds.simd_threshold =
114                ((1.0 - alpha) * old_simd as f64 + alpha * simd_threshold as f64) as usize;
115            thresholds.parallel_threshold =
116                ((1.0 - alpha) * old_parallel as f64 + alpha * parallel_threshold as f64) as usize;
117
118            // Update confidence based on sample size and consistency
119            thresholds.sample_count = measurements.len();
120            thresholds.confidence = self.calculate_confidence(measurements);
121
122            // Log threshold changes for debugging
123            if old_simd != thresholds.simd_threshold
124                || old_parallel != thresholds.parallel_threshold
125            {
126                println!("Adaptive thresholds updated: SIMD {} -> {}, Parallel {} -> {} (confidence: {:.2})",
127                    old_simd, thresholds.simd_threshold,
128                    old_parallel, thresholds.parallel_threshold,
129                    thresholds.confidence
130                );
131            }
132        }
133    }
134
135    /// Find optimal SIMD threshold by analyzing performance crossover point
136    fn find_optimal_simd_threshold(&self, measurements: &[PerformanceMeasurement]) -> usize {
137        let mut simd_measurements: Vec<_> = measurements
138            .iter()
139            .filter(|m| m.operation_type.contains("simd"))
140            .collect();
141        let mut sequential_measurements: Vec<_> = measurements
142            .iter()
143            .filter(|m| m.operation_type.contains("sequential"))
144            .collect();
145
146        if simd_measurements.is_empty() || sequential_measurements.is_empty() {
147            return 50; // Default fallback
148        }
149
150        // Sort by operation size
151        simd_measurements.sort_by_key(|m| m.operation_size);
152        sequential_measurements.sort_by_key(|m| m.operation_size);
153
154        // Find crossover point where SIMD becomes faster than sequential
155        for size in (10..=1000).step_by(10) {
156            let simd_perf = self.estimate_performance_at_size(&simd_measurements, size);
157            let seq_perf = self.estimate_performance_at_size(&sequential_measurements, size);
158
159            if let (Some(simd_time), Some(seq_time)) = (simd_perf, seq_perf) {
160                if simd_time < seq_time {
161                    return size;
162                }
163            }
164        }
165
166        50 // Conservative fallback
167    }
168
169    /// Find optimal parallelism threshold by analyzing performance crossover point
170    fn find_optimal_parallel_threshold(&self, measurements: &[PerformanceMeasurement]) -> usize {
171        let mut parallel_measurements: Vec<_> = measurements
172            .iter()
173            .filter(|m| m.operation_type.contains("parallel"))
174            .collect();
175        let mut sequential_measurements: Vec<_> = measurements
176            .iter()
177            .filter(|m| m.operation_type.contains("sequential"))
178            .collect();
179
180        if parallel_measurements.is_empty() || sequential_measurements.is_empty() {
181            return 1000; // Default fallback
182        }
183
184        // Sort by operation size
185        parallel_measurements.sort_by_key(|m| m.operation_size);
186        sequential_measurements.sort_by_key(|m| m.operation_size);
187
188        // Find crossover point where parallel becomes faster than sequential
189        for size in (100..=5000).step_by(100) {
190            let parallel_perf = self.estimate_performance_at_size(&parallel_measurements, size);
191            let seq_perf = self.estimate_performance_at_size(&sequential_measurements, size);
192
193            if let (Some(parallel_time), Some(seq_time)) = (parallel_perf, seq_perf) {
194                if parallel_time < seq_time {
195                    return size;
196                }
197            }
198        }
199
200        1000 // Conservative fallback
201    }
202
203    /// Estimate performance at a specific operation size using interpolation
204    fn estimate_performance_at_size(
205        &self,
206        measurements: &[&PerformanceMeasurement],
207        target_size: usize,
208    ) -> Option<Duration> {
209        if measurements.is_empty() {
210            return None;
211        }
212
213        // Find measurements closest to target size
214        let mut closest_smaller = None;
215        let mut closest_larger = None;
216
217        for measurement in measurements {
218            if measurement.operation_size <= target_size {
219                closest_smaller = Some(measurement);
220            } else if closest_larger.is_none() {
221                closest_larger = Some(measurement);
222                break;
223            }
224        }
225
226        match (closest_smaller, closest_larger) {
227            (Some(smaller), Some(larger)) => {
228                // Linear interpolation
229                let size_diff = larger.operation_size - smaller.operation_size;
230                let time_diff =
231                    larger.duration.as_nanos() as f64 - smaller.duration.as_nanos() as f64;
232                let target_offset = target_size - smaller.operation_size;
233
234                let interpolated_nanos = smaller.duration.as_nanos() as f64
235                    + (time_diff * target_offset as f64) / size_diff as f64;
236
237                Some(Duration::from_nanos(interpolated_nanos as u64))
238            }
239            (Some(measurement), None) | (None, Some(measurement)) => {
240                // Use the closest measurement
241                Some(measurement.duration)
242            }
243            (None, None) => None,
244        }
245    }
246
247    /// Calculate confidence in current thresholds based on measurement consistency
248    fn calculate_confidence(&self, measurements: &[PerformanceMeasurement]) -> f64 {
249        if measurements.len() < 10 {
250            return 0.0;
251        }
252
253        // Calculate variance in performance measurements
254        let recent_measurements: Vec<_> = measurements
255            .iter()
256            .rev()
257            .take(100) // Use last 100 measurements
258            .collect();
259
260        if recent_measurements.is_empty() {
261            return 0.0;
262        }
263
264        // Group by operation type and calculate consistency
265        let mut type_groups: HashMap<String, Vec<Duration>> = HashMap::new();
266        for measurement in recent_measurements {
267            type_groups
268                .entry(measurement.operation_type.clone())
269                .or_default()
270                .push(measurement.duration);
271        }
272
273        let mut total_consistency = 0.0;
274        let mut group_count = 0;
275
276        for (_, durations) in type_groups {
277            if durations.len() < 3 {
278                continue;
279            }
280
281            let mean_duration =
282                durations.iter().sum::<Duration>().as_nanos() as f64 / durations.len() as f64;
283            let variance = durations
284                .iter()
285                .map(|d| {
286                    let diff = d.as_nanos() as f64 - mean_duration;
287                    diff * diff
288                })
289                .sum::<f64>()
290                / durations.len() as f64;
291
292            let coefficient_of_variation = if mean_duration > 0.0 {
293                variance.sqrt() / mean_duration
294            } else {
295                1.0
296            };
297
298            // Lower coefficient of variation = higher consistency = higher confidence
299            let consistency = 1.0 / (1.0 + coefficient_of_variation);
300            total_consistency += consistency;
301            group_count += 1;
302        }
303
304        if group_count > 0 {
305            (total_consistency / group_count as f64).min(1.0)
306        } else {
307            0.0
308        }
309    }
310
311    /// Get performance statistics for monitoring
312    pub fn get_statistics(&self) -> ProfilerStatistics {
313        let measurements = self
314            .measurements
315            .read()
316            .expect("BUG: Profiler measurements lock poisoned - indicates panic during profiler read in another thread");
317        let thresholds = self.get_thresholds();
318
319        let total_measurements = measurements.len();
320        let recent_measurements = measurements.iter().rev().take(100).count();
321
322        // Calculate average performance by operation type
323        let mut type_stats: HashMap<String, (Duration, usize)> = HashMap::new();
324        for measurement in measurements.iter().rev().take(1000) {
325            let (total_duration, count) = type_stats
326                .entry(measurement.operation_type.clone())
327                .or_insert((Duration::ZERO, 0));
328            *total_duration += measurement.duration;
329            *count += 1;
330        }
331
332        let average_performance: HashMap<String, Duration> = type_stats
333            .into_iter()
334            .map(|(op_type, (total_duration, count))| {
335                let avg_duration = if count > 0 {
336                    Duration::from_nanos((total_duration.as_nanos() / count as u128) as u64)
337                } else {
338                    Duration::ZERO
339                };
340                (op_type, avg_duration)
341            })
342            .collect();
343
344        ProfilerStatistics {
345            total_measurements,
346            recent_measurements,
347            current_thresholds: thresholds,
348            average_performance,
349        }
350    }
351}
352
353/// Statistics from the runtime profiler
354#[derive(Debug, Clone)]
355pub struct ProfilerStatistics {
356    /// Total number of measurements recorded
357    pub total_measurements: usize,
358    /// Number of recent measurements (last 100)
359    pub recent_measurements: usize,
360    /// Current adaptive thresholds
361    pub current_thresholds: AdaptiveThresholds,
362    /// Average performance by operation type
363    pub average_performance: HashMap<String, Duration>,
364}
365
366/// Global runtime profiler instance
367static GLOBAL_PROFILER: OnceLock<RuntimeProfiler> = OnceLock::new();
368
369/// Get the global runtime profiler instance
370pub fn get_global_profiler() -> &'static RuntimeProfiler {
371    GLOBAL_PROFILER.get_or_init(RuntimeProfiler::new)
372}
373
374/// Record a performance measurement in the global profiler
375pub fn record_performance(operation_type: &str, operation_size: usize, duration: Duration) {
376    let measurement = PerformanceMeasurement {
377        operation_type: operation_type.to_owned(),
378        operation_size,
379        duration,
380        timestamp: Instant::now(),
381    };
382
383    get_global_profiler().record_measurement(measurement);
384}
385
386/// Get current adaptive thresholds from the global profiler
387pub fn get_adaptive_thresholds() -> AdaptiveThresholds {
388    get_global_profiler().get_thresholds()
389}
390
391/// Get profiler statistics for monitoring
392pub fn get_profiler_statistics() -> ProfilerStatistics {
393    get_global_profiler().get_statistics()
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399
400    #[test]
401    fn test_runtime_profiler_creation() {
402        let profiler = RuntimeProfiler::new();
403        let thresholds = profiler.get_thresholds();
404
405        assert_eq!(thresholds.simd_threshold, 50);
406        assert_eq!(thresholds.parallel_threshold, 1000);
407        assert_eq!(thresholds.confidence, 0.0);
408        assert_eq!(thresholds.sample_count, 0);
409    }
410
411    #[test]
412    fn test_measurement_recording() {
413        let profiler = RuntimeProfiler::new();
414
415        let measurement = PerformanceMeasurement {
416            operation_type: "test_operation".to_string(),
417            operation_size: 100,
418            duration: Duration::from_millis(10),
419            timestamp: Instant::now(),
420        };
421
422        profiler.record_measurement(measurement);
423
424        // Should not adapt thresholds with just one measurement
425        let thresholds = profiler.get_thresholds();
426        assert_eq!(thresholds.sample_count, 0); // Not enough samples yet
427    }
428
429    #[test]
430    fn test_global_profiler() {
431        record_performance("test_simd", 100, Duration::from_micros(50));
432        record_performance("test_sequential", 100, Duration::from_micros(100));
433
434        let stats = get_profiler_statistics();
435        assert!(stats.total_measurements >= 2);
436
437        let thresholds = get_adaptive_thresholds();
438        assert!(thresholds.simd_threshold > 0);
439        assert!(thresholds.parallel_threshold > 0);
440    }
441}
mathhook_core/core/performance/profiler.rs

mathhook_core/core/performance/
profiler.rs