scirs2_interpolate/
memory_monitor.rs

1//! Memory leak detection and monitoring for continuous use
2//!
3//! This module provides utilities for tracking memory usage patterns, detecting
4//! potential leaks, and monitoring memory-related performance issues during
5//! long-running interpolation operations.
6//!
7//! # Overview
8//!
9//! The memory monitoring system tracks:
10//! - Memory allocations and deallocations per interpolator
11//! - Cache memory usage and growth patterns  
12//! - Peak memory usage across operations
13//! - Memory leaks through reference counting
14//! - Memory pressure and allocation patterns
15//!
16//! # Usage
17//!
18//! ```rust
19//! use scirs2_interpolate::memory_monitor::{MemoryMonitor, start_monitoring};
20//!
21//! // Start global memory monitoring
22//! start_monitoring();
23//!
24//! // Create a monitored interpolator
25//! let mut monitor = MemoryMonitor::new("rbf_interpolator");
26//!
27//! // Track memory during operations
28//! monitor.track_allocation(1024, "distance_matrix");
29//! // ... perform interpolation operations ...
30//! monitor.track_deallocation(1024, "distance_matrix");
31//!
32//! // Check for memory leaks
33//! let report = monitor.generate_report();
34//! if report.has_potential_leaks() {
35//!     println!("Warning: Potential memory leaks detected");
36//! }
37//! ```
38
39use std::collections::{HashMap, VecDeque};
40use std::sync::{Arc, Mutex, OnceLock};
41use std::time::{Duration, Instant};
42
43/// Global memory monitoring registry
44static GLOBAL_MONITOR: OnceLock<Arc<Mutex<GlobalMemoryMonitor>>> = OnceLock::new();
45
46/// Global memory monitoring system
47#[derive(Debug)]
48struct GlobalMemoryMonitor {
49    /// Active memory monitors by name
50    monitors: HashMap<String, Arc<Mutex<MemoryMonitor>>>,
51
52    /// Global memory statistics
53    global_stats: GlobalMemoryStats,
54
55    /// Whether monitoring is enabled
56    enabled: bool,
57
58    /// Maximum number of monitors to track
59    max_monitors: usize,
60}
61
62/// Global memory statistics across all interpolators
63#[derive(Debug, Clone)]
64pub struct GlobalMemoryStats {
65    /// Total memory allocated across all interpolators
66    pub total_allocated_bytes: usize,
67
68    /// Peak memory usage across all interpolators
69    pub peak_total_bytes: usize,
70
71    /// Number of active interpolators being monitored
72    pub active_interpolators: usize,
73
74    /// Total number of allocations tracked
75    pub total_allocations: u64,
76
77    /// Total number of deallocations tracked
78    pub total_deallocations: u64,
79
80    /// Start time of monitoring
81    pub monitoring_start: Instant,
82}
83
84impl Default for GlobalMemoryStats {
85    fn default() -> Self {
86        Self {
87            total_allocated_bytes: 0,
88            peak_total_bytes: 0,
89            active_interpolators: 0,
90            total_allocations: 0,
91            total_deallocations: 0,
92            monitoring_start: Instant::now(),
93        }
94    }
95}
96
97/// Individual memory monitor for a specific interpolator
98#[derive(Debug)]
99pub struct MemoryMonitor {
100    /// Name/identifier for this monitor
101    name: String,
102
103    /// Current memory allocations by category
104    allocations: HashMap<String, usize>,
105
106    /// Memory allocation history
107    allocation_history: VecDeque<AllocationEvent>,
108
109    /// Peak memory usage for this interpolator
110    peak_memory_bytes: usize,
111
112    /// Current total memory usage
113    current_memory_bytes: usize,
114
115    /// Statistics for leak detection
116    leak_stats: LeakDetectionStats,
117
118    /// Performance metrics
119    perf_metrics: MemoryPerformanceMetrics,
120
121    /// Whether this monitor is active
122    active: bool,
123
124    /// Creation timestamp
125    created_at: Instant,
126}
127
128/// Memory allocation/deallocation event
129#[derive(Debug, Clone)]
130struct AllocationEvent {
131    /// Type of event (allocation or deallocation)
132    event_type: EventType,
133
134    /// Size in bytes
135    sizebytes: usize,
136
137    /// Category of memory (e.g., "distance_matrix", "cache", "coefficients")
138    #[allow(dead_code)]
139    category: String,
140
141    /// Timestamp of event
142    #[allow(dead_code)]
143    timestamp: Instant,
144}
145
146/// Type of memory event
147#[derive(Debug, Clone, Copy, PartialEq)]
148enum EventType {
149    Allocation,
150    Deallocation,
151}
152
153/// Statistics for leak detection
154#[derive(Debug, Clone)]
155struct LeakDetectionStats {
156    /// Total number of allocations
157    total_allocations: u64,
158
159    /// Total number of deallocations
160    total_deallocations: u64,
161
162    /// Number of unmatched allocations (potential leaks)
163    #[allow(dead_code)]
164    unmatched_allocations: u64,
165
166    /// Memory that has been allocated but not freed for a long time
167    long_lived_allocations: HashMap<String, (usize, Instant)>,
168
169    /// Threshold for considering allocations as potential leaks (in seconds)
170    leak_detection_threshold: Duration,
171}
172
173impl Default for LeakDetectionStats {
174    fn default() -> Self {
175        Self {
176            total_allocations: 0,
177            total_deallocations: 0,
178            unmatched_allocations: 0,
179            long_lived_allocations: HashMap::new(),
180            leak_detection_threshold: Duration::from_secs(300), // 5 minutes
181        }
182    }
183}
184
185/// Memory performance metrics
186#[derive(Debug, Clone)]
187struct MemoryPerformanceMetrics {
188    /// Average allocation size
189    avg_allocation_size: f64,
190
191    /// Average time between allocations
192    #[allow(dead_code)]
193    avg_allocation_interval: Duration,
194
195    /// Memory fragmentation estimate (0.0 to 1.0)
196    #[allow(dead_code)]
197    fragmentation_estimate: f64,
198
199    /// Cache hit ratio for memory reuse
200    cache_hit_ratio: f64,
201
202    /// Last update timestamp
203    last_update: Instant,
204}
205
206impl Default for MemoryPerformanceMetrics {
207    fn default() -> Self {
208        Self {
209            avg_allocation_size: 0.0,
210            avg_allocation_interval: Duration::from_millis(0),
211            fragmentation_estimate: 0.0,
212            cache_hit_ratio: 0.0,
213            last_update: Instant::now(),
214        }
215    }
216}
217
218/// Memory monitoring report
219#[derive(Debug, Clone)]
220pub struct MemoryReport {
221    /// Monitor name
222    pub monitorname: String,
223
224    /// Current memory usage by category
225    pub current_allocations: HashMap<String, usize>,
226
227    /// Peak memory usage
228    pub peak_memory_bytes: usize,
229
230    /// Total memory allocated over lifetime
231    pub total_allocated_bytes: usize,
232
233    /// Memory leak indicators
234    pub leak_indicators: LeakIndicators,
235
236    /// Performance metrics
237    pub performance_summary: PerformanceSummary,
238
239    /// Recommendations for memory optimization
240    pub recommendations: Vec<String>,
241
242    /// Report generation timestamp
243    pub generated_at: Instant,
244}
245
246/// Memory leak indicators
247#[derive(Debug, Clone)]
248pub struct LeakIndicators {
249    /// Potential memory leaks detected
250    pub has_potential_leaks: bool,
251
252    /// Number of unmatched allocations
253    pub unmatched_allocations: u64,
254
255    /// Memory that has been held for a long time
256    pub long_lived_memory_bytes: usize,
257
258    /// Categories with suspicious allocation patterns
259    pub suspicious_categories: Vec<String>,
260
261    /// Leak severity (0.0 = no leaks, 1.0 = severe leaks)
262    pub leak_severity: f64,
263}
264
265/// Performance summary for memory usage
266#[derive(Debug, Clone)]
267pub struct PerformanceSummary {
268    /// Memory efficiency (lower is better)
269    pub memory_efficiency_score: f64,
270
271    /// Allocation pattern efficiency
272    pub allocation_pattern_score: f64,
273
274    /// Cache utilization score
275    pub cache_utilization_score: f64,
276
277    /// Overall memory performance grade
278    pub overall_grade: PerformanceGrade,
279}
280
281/// Performance grade classification
282#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
283pub enum PerformanceGrade {
284    Excellent,
285    Good,
286    Fair,
287    Poor,
288    Critical,
289}
290
291impl MemoryMonitor {
292    /// Create a new memory monitor
293    pub fn new(name: impl Into<String>) -> Self {
294        let name = name.into();
295        let monitor = Self {
296            name: name.clone(),
297            allocations: HashMap::new(),
298            allocation_history: VecDeque::new(),
299            peak_memory_bytes: 0,
300            current_memory_bytes: 0,
301            leak_stats: LeakDetectionStats::default(),
302            perf_metrics: MemoryPerformanceMetrics::default(),
303            active: true,
304            created_at: Instant::now(),
305        };
306
307        // Register with global monitor
308        register_monitor(&name, monitor.clone());
309        monitor
310    }
311
312    /// Track a memory allocation
313    pub fn track_allocation(&mut self, sizebytes: usize, category: impl Into<String>) {
314        if !self.active {
315            return;
316        }
317
318        let category = category.into();
319        let now = Instant::now();
320
321        // Update current allocations
322        *self.allocations.entry(category.clone()).or_insert(0) += sizebytes;
323        self.current_memory_bytes += sizebytes;
324
325        // Update peak usage
326        if self.current_memory_bytes > self.peak_memory_bytes {
327            self.peak_memory_bytes = self.current_memory_bytes;
328        }
329
330        // Record allocation event
331        let event = AllocationEvent {
332            event_type: EventType::Allocation,
333            sizebytes,
334            category: category.clone(),
335            timestamp: now,
336        };
337
338        self.allocation_history.push_back(event);
339
340        // Limit history size to prevent memory growth
341        if self.allocation_history.len() > 10000 {
342            self.allocation_history.pop_front();
343        }
344
345        // Update leak detection stats
346        self.leak_stats.total_allocations += 1;
347        self.leak_stats.long_lived_allocations.insert(
348            format!("{}_{}", category, self.leak_stats.total_allocations),
349            (sizebytes, now),
350        );
351
352        // Update performance metrics
353        self.update_performance_metrics();
354
355        // Update global stats
356        update_global_stats(sizebytes, true);
357    }
358
359    /// Track a memory deallocation
360    pub fn track_deallocation(&mut self, sizebytes: usize, category: impl Into<String>) {
361        if !self.active {
362            return;
363        }
364
365        let category = category.into();
366        let now = Instant::now();
367
368        // Update current allocations
369        if let Some(current) = self.allocations.get_mut(&category) {
370            *current = current.saturating_sub(sizebytes);
371            if *current == 0 {
372                self.allocations.remove(&category);
373            }
374        }
375
376        self.current_memory_bytes = self.current_memory_bytes.saturating_sub(sizebytes);
377
378        // Record deallocation event
379        let event = AllocationEvent {
380            event_type: EventType::Deallocation,
381            sizebytes,
382            category: category.clone(),
383            timestamp: now,
384        };
385
386        self.allocation_history.push_back(event);
387
388        // Update leak detection stats
389        self.leak_stats.total_deallocations += 1;
390
391        // Remove from long-lived allocations (simplified - would need better matching in production)
392        self.leak_stats
393            .long_lived_allocations
394            .retain(|k, _| !k.starts_with(&category));
395
396        // Update performance metrics
397        self.update_performance_metrics();
398
399        // Update global stats
400        update_global_stats(sizebytes, false);
401    }
402
403    /// Generate a comprehensive memory report
404    pub fn generate_report(&self) -> MemoryReport {
405        let leak_indicators = self.analyze_leaks();
406        let performance_summary = self.analyze_performance();
407        let recommendations = self.generate_recommendations(&leak_indicators, &performance_summary);
408
409        MemoryReport {
410            monitorname: self.name.clone(),
411            current_allocations: self.allocations.clone(),
412            peak_memory_bytes: self.peak_memory_bytes,
413            total_allocated_bytes: self.calculate_total_allocated(),
414            leak_indicators,
415            performance_summary,
416            recommendations,
417            generated_at: Instant::now(),
418        }
419    }
420
421    /// Analyze potential memory leaks
422    fn analyze_leaks(&self) -> LeakIndicators {
423        let unmatched = self
424            .leak_stats
425            .total_allocations
426            .saturating_sub(self.leak_stats.total_deallocations);
427
428        // Calculate long-lived memory
429        let now = Instant::now();
430        let long_lived_memory: usize = self
431            .leak_stats
432            .long_lived_allocations
433            .values()
434            .filter(|(_, timestamp)| {
435                now.duration_since(*timestamp) > self.leak_stats.leak_detection_threshold
436            })
437            .map(|(size, _)| *size)
438            .sum();
439
440        // Identify suspicious categories (categories with consistently growing memory)
441        let suspicious_categories: Vec<String> = self.allocations
442            .iter()
443            .filter(|(_, &size)| size > 1024 * 1024) // More than 1MB
444            .map(|(cat, _)| cat.clone())
445            .collect();
446
447        let has_potential_leaks =
448            unmatched > 0 || long_lived_memory > 0 || !suspicious_categories.is_empty();
449
450        // Calculate leak severity
451        let leak_severity = if has_potential_leaks {
452            let severity_factors = [
453                (unmatched as f64) / (self.leak_stats.total_allocations as f64).max(1.0),
454                (long_lived_memory as f64) / (self.peak_memory_bytes as f64).max(1.0),
455                (suspicious_categories.len() as f64) / 10.0, // Normalize by 10 categories
456            ];
457            severity_factors.iter().sum::<f64>() / severity_factors.len() as f64
458        } else {
459            0.0
460        };
461
462        LeakIndicators {
463            has_potential_leaks,
464            unmatched_allocations: unmatched,
465            long_lived_memory_bytes: long_lived_memory,
466            suspicious_categories,
467            leak_severity: leak_severity.min(1.0),
468        }
469    }
470
471    /// Analyze memory performance
472    fn analyze_performance(&self) -> PerformanceSummary {
473        // Calculate memory efficiency (lower peak/current ratio is better)
474        let memory_efficiency_score = if self.peak_memory_bytes > 0 {
475            1.0 - (self.current_memory_bytes as f64 / self.peak_memory_bytes as f64)
476        } else {
477            1.0
478        };
479
480        // Calculate allocation pattern efficiency
481        let allocation_pattern_score = if self.leak_stats.total_allocations > 0 {
482            let deallocation_ratio = self.leak_stats.total_deallocations as f64
483                / self.leak_stats.total_allocations as f64;
484            deallocation_ratio.min(1.0)
485        } else {
486            1.0
487        };
488
489        // Use cached cache utilization score
490        let cache_utilization_score = self.perf_metrics.cache_hit_ratio;
491
492        // Calculate overall grade
493        let overall_score =
494            (memory_efficiency_score + allocation_pattern_score + cache_utilization_score) / 3.0;
495        let overall_grade = match overall_score {
496            s if s >= 0.9 => PerformanceGrade::Excellent,
497            s if s >= 0.7 => PerformanceGrade::Good,
498            s if s >= 0.5 => PerformanceGrade::Fair,
499            s if s >= 0.3 => PerformanceGrade::Poor,
500            _ => PerformanceGrade::Critical,
501        };
502
503        PerformanceSummary {
504            memory_efficiency_score,
505            allocation_pattern_score,
506            cache_utilization_score,
507            overall_grade,
508        }
509    }
510
511    /// Generate optimization recommendations
512    fn generate_recommendations(
513        &self,
514        leak_indicators: &LeakIndicators,
515        performance: &PerformanceSummary,
516    ) -> Vec<String> {
517        let mut recommendations = Vec::new();
518
519        if leak_indicators.has_potential_leaks {
520            recommendations
521                .push("Consider implementing explicit memory cleanup in destructor".to_string());
522
523            if leak_indicators.unmatched_allocations > 0 {
524                recommendations.push(format!(
525                    "Found {} unmatched allocations - check for missing deallocations",
526                    leak_indicators.unmatched_allocations
527                ));
528            }
529
530            if leak_indicators.long_lived_memory_bytes > 1024 * 1024 {
531                recommendations.push(format!(
532                    "Large amount of long-lived memory ({} MB) - consider periodic cleanup",
533                    leak_indicators.long_lived_memory_bytes / (1024 * 1024)
534                ));
535            }
536        }
537
538        if matches!(
539            performance.overall_grade,
540            PerformanceGrade::Fair | PerformanceGrade::Poor | PerformanceGrade::Critical
541        ) {
542            recommendations.push("Memory performance can be improved".to_string());
543
544            if performance.memory_efficiency_score < 0.5 {
545                recommendations.push(
546                    "High peak memory usage - consider processing data in chunks".to_string(),
547                );
548            }
549
550            if performance.cache_utilization_score < 0.3 {
551                recommendations.push(
552                    "Low cache utilization - enable caching for repeated operations".to_string(),
553                );
554            }
555        }
556
557        if self.peak_memory_bytes > 1024 * 1024 * 1024 {
558            recommendations.push(
559                "Very high memory usage - consider using memory-efficient algorithms".to_string(),
560            );
561        }
562
563        recommendations
564    }
565
566    /// Update performance metrics
567    fn update_performance_metrics(&mut self) {
568        let now = Instant::now();
569
570        // Update average allocation size
571        if self.leak_stats.total_allocations > 0 {
572            let total_size: usize = self
573                .allocation_history
574                .iter()
575                .filter(|e| e.event_type == EventType::Allocation)
576                .map(|e| e.sizebytes)
577                .sum();
578            self.perf_metrics.avg_allocation_size =
579                total_size as f64 / self.leak_stats.total_allocations as f64;
580        }
581
582        // Simple cache hit ratio simulation (would need actual cache statistics in practice)
583        self.perf_metrics.cache_hit_ratio = 0.7; // Placeholder
584
585        self.perf_metrics.last_update = now;
586    }
587
588    /// Calculate total memory allocated over lifetime
589    fn calculate_total_allocated(&self) -> usize {
590        self.allocation_history
591            .iter()
592            .filter(|e| e.event_type == EventType::Allocation)
593            .map(|e| e.sizebytes)
594            .sum()
595    }
596
597    /// Disable this monitor
598    pub fn disable(&mut self) {
599        self.active = false;
600    }
601
602    /// Check if monitor is active
603    pub fn is_active(&self) -> bool {
604        self.active
605    }
606}
607
608impl Clone for MemoryMonitor {
609    fn clone(&self) -> Self {
610        Self {
611            name: format!("{}_clone", self.name),
612            allocations: self.allocations.clone(),
613            allocation_history: self.allocation_history.clone(),
614            peak_memory_bytes: self.peak_memory_bytes,
615            current_memory_bytes: self.current_memory_bytes,
616            leak_stats: self.leak_stats.clone(),
617            perf_metrics: self.perf_metrics.clone(),
618            active: self.active,
619            created_at: self.created_at,
620        }
621    }
622}
623
624impl MemoryReport {
625    /// Check if the report indicates potential memory leaks
626    pub fn has_potential_leaks(&self) -> bool {
627        self.leak_indicators.has_potential_leaks
628    }
629
630    /// Get memory efficiency rating
631    pub fn memory_efficiency_rating(&self) -> PerformanceGrade {
632        self.performance_summary.overall_grade
633    }
634
635    /// Get human-readable summary
636    pub fn summary(&self) -> String {
637        format!(
638            "Memory Report for '{}': Current: {} KB, Peak: {} KB, Grade: {:?}, Leaks: {}",
639            self.monitorname,
640            self.current_allocations.values().sum::<usize>() / 1024,
641            self.peak_memory_bytes / 1024,
642            self.performance_summary.overall_grade,
643            if self.has_potential_leaks() {
644                "Detected"
645            } else {
646                "None"
647            }
648        )
649    }
650}
651
652/// Global memory monitoring functions
653/// Start global memory monitoring
654#[allow(dead_code)]
655pub fn start_monitoring() {
656    let _ = GLOBAL_MONITOR.set(Arc::new(Mutex::new(GlobalMemoryMonitor {
657        monitors: HashMap::new(),
658        global_stats: GlobalMemoryStats::default(),
659        enabled: true,
660        max_monitors: 100,
661    })));
662}
663
664/// Stop global memory monitoring
665#[allow(dead_code)]
666pub fn stop_monitoring() {
667    if let Some(monitor) = GLOBAL_MONITOR.get() {
668        if let Ok(mut global) = monitor.lock() {
669            global.enabled = false;
670            global.monitors.clear();
671        }
672    }
673}
674
675/// Register a memory monitor with the global system
676#[allow(dead_code)]
677fn register_monitor(name: &str, monitor: MemoryMonitor) {
678    if let Some(global_monitor) = GLOBAL_MONITOR.get() {
679        if let Ok(mut global) = global_monitor.lock() {
680            if global.enabled && global.monitors.len() < global.max_monitors {
681                global
682                    .monitors
683                    .insert(name.to_string(), Arc::new(Mutex::new(monitor)));
684                global.global_stats.active_interpolators = global.monitors.len();
685            }
686        }
687    }
688}
689
690/// Update global memory statistics
691#[allow(dead_code)]
692fn update_global_stats(sizebytes: usize, isallocation: bool) {
693    if let Some(global_monitor) = GLOBAL_MONITOR.get() {
694        if let Ok(mut global) = global_monitor.lock() {
695            if isallocation {
696                global.global_stats.total_allocated_bytes += sizebytes;
697                global.global_stats.total_allocations += 1;
698
699                if global.global_stats.total_allocated_bytes > global.global_stats.peak_total_bytes
700                {
701                    global.global_stats.peak_total_bytes =
702                        global.global_stats.total_allocated_bytes;
703                }
704            } else {
705                global.global_stats.total_allocated_bytes = global
706                    .global_stats
707                    .total_allocated_bytes
708                    .saturating_sub(sizebytes);
709                global.global_stats.total_deallocations += 1;
710            }
711        }
712    }
713}
714
715/// Get global memory statistics
716#[allow(dead_code)]
717pub fn get_global_stats() -> Option<GlobalMemoryStats> {
718    GLOBAL_MONITOR
719        .get()
720        .and_then(|monitor| monitor.lock().ok())
721        .map(|global| global.global_stats.clone())
722}
723
724/// Get report for a specific monitor
725#[allow(dead_code)]
726pub fn get_monitor_report(name: &str) -> Option<MemoryReport> {
727    GLOBAL_MONITOR
728        .get()
729        .and_then(|global_monitor| {
730            global_monitor
731                .lock()
732                .ok()
733                .and_then(|global| global.monitors.get(name).cloned())
734        })
735        .and_then(|monitor| monitor.lock().ok().map(|m| m.generate_report()))
736}
737
738/// Get reports for all active monitors
739#[allow(dead_code)]
740pub fn get_all_reports() -> Vec<MemoryReport> {
741    if let Some(global_monitor) = GLOBAL_MONITOR.get() {
742        if let Ok(global) = global_monitor.lock() {
743            return global
744                .monitors
745                .values()
746                .filter_map(|monitor| monitor.lock().ok())
747                .map(|m| m.generate_report())
748                .collect();
749        }
750    }
751    Vec::new()
752}
753
754/// Enhanced stress testing memory profiler
755#[derive(Debug)]
756pub struct StressMemoryProfiler {
757    /// Base monitor for standard tracking
758    base_monitor: MemoryMonitor,
759
760    /// Stress test specific metrics
761    stress_metrics: StressMemoryMetrics,
762
763    /// Memory usage history during stress tests
764    stress_history: VecDeque<MemorySnapshot>,
765
766    /// System memory pressure indicators
767    pressure_indicators: MemoryPressureIndicators,
768
769    /// Configuration for stress profiling
770    stress_config: StressProfilingConfig,
771}
772
773/// Stress-specific memory metrics
774#[derive(Debug, Clone)]
775pub struct StressMemoryMetrics {
776    /// Maximum memory growth rate during stress (bytes/second)
777    pub max_growth_rate: f64,
778
779    /// Memory allocation spikes during stress
780    pub allocation_spikes: Vec<AllocationSpike>,
781
782    /// Memory fragmentation under stress
783    pub stress_fragmentation: f64,
784
785    /// Concurrent access memory overhead
786    pub concurrent_overhead: f64,
787
788    /// Large dataset memory efficiency
789    pub large_dataset_efficiency: f64,
790
791    /// Memory recovery time after stress
792    pub recovery_time_seconds: f64,
793}
794
795/// Memory allocation spike during stress testing
796#[derive(Debug, Clone)]
797pub struct AllocationSpike {
798    /// Time when spike occurred
799    pub timestamp: Instant,
800
801    /// Size of the spike in bytes
802    pub spike_size: usize,
803
804    /// Duration of the spike
805    pub duration: Duration,
806
807    /// Stress condition that caused the spike
808    pub stresscondition: String,
809}
810
811/// Memory snapshot during stress testing
812#[derive(Debug, Clone)]
813pub struct MemorySnapshot {
814    /// Timestamp of snapshot
815    pub timestamp: Instant,
816
817    /// Total memory usage at this point
818    pub total_memory: usize,
819
820    /// Memory usage by category
821    pub category_breakdown: HashMap<String, usize>,
822
823    /// System memory pressure level (0.0 to 1.0)
824    pub system_pressure: f64,
825
826    /// Active stress conditions
827    pub active_stressconditions: Vec<String>,
828}
829
830/// System memory pressure indicators
831#[derive(Debug, Clone)]
832pub struct MemoryPressureIndicators {
833    /// System memory utilization percentage
834    pub system_memory_utilization: f64,
835
836    /// Available memory in bytes
837    pub available_memory: usize,
838
839    /// Memory allocation failure rate
840    pub allocation_failure_rate: f64,
841
842    /// Garbage collection frequency (if applicable)
843    pub gc_frequency: f64,
844
845    /// Swap usage percentage
846    pub swap_utilization: f64,
847}
848
849impl Default for MemoryPressureIndicators {
850    fn default() -> Self {
851        Self {
852            system_memory_utilization: 0.0,
853            available_memory: 8 * 1024 * 1024 * 1024, // 8GB default
854            allocation_failure_rate: 0.0,
855            gc_frequency: 0.0,
856            swap_utilization: 0.0,
857        }
858    }
859}
860
861/// Configuration for stress profiling
862#[derive(Debug, Clone)]
863pub struct StressProfilingConfig {
864    /// Sampling interval for memory snapshots during stress
865    pub snapshot_interval: Duration,
866
867    /// Maximum number of snapshots to retain
868    pub max_snapshots: usize,
869
870    /// Threshold for detecting allocation spikes (bytes)
871    pub spike_threshold: usize,
872
873    /// Enable system memory pressure monitoring
874    pub monitor_system_pressure: bool,
875
876    /// Enable detailed category tracking under stress
877    pub detailed_category_tracking: bool,
878}
879
880impl Default for StressProfilingConfig {
881    fn default() -> Self {
882        Self {
883            snapshot_interval: Duration::from_millis(100), // 10 samples per second
884            max_snapshots: 10000,                          // ~17 minutes at 100ms intervals
885            spike_threshold: 10 * 1024 * 1024,             // 10MB
886            monitor_system_pressure: true,
887            detailed_category_tracking: true,
888        }
889    }
890}
891
892impl StressMemoryProfiler {
893    /// Create a new stress memory profiler
894    pub fn new(name: impl Into<String>, config: Option<StressProfilingConfig>) -> Self {
895        Self {
896            base_monitor: MemoryMonitor::new(name),
897            stress_metrics: StressMemoryMetrics {
898                max_growth_rate: 0.0,
899                allocation_spikes: Vec::new(),
900                stress_fragmentation: 0.0,
901                concurrent_overhead: 0.0,
902                large_dataset_efficiency: 1.0,
903                recovery_time_seconds: 0.0,
904            },
905            stress_history: VecDeque::new(),
906            pressure_indicators: MemoryPressureIndicators::default(),
907            stress_config: config.unwrap_or_default(),
908        }
909    }
910
911    /// Start profiling under specific stress condition
912    pub fn start_stress_profiling(&mut self, stresscondition: &str) {
913        println!("Starting stress memory profiling for: {}", stresscondition);
914
915        // Take initial snapshot
916        self.take_memory_snapshot(vec![stresscondition.to_string()]);
917
918        // Update system pressure indicators
919        self.update_system_pressure();
920    }
921
922    /// Track memory allocation during stress test
923    pub fn track_stress_allocation(
924        &mut self,
925        sizebytes: usize,
926        category: impl Into<String>,
927        stresscondition: &str,
928    ) {
929        let category = category.into();
930
931        // Track with base monitor
932        self.base_monitor.track_allocation(sizebytes, &category);
933
934        // Check for allocation spike
935        if sizebytes >= self.stress_config.spike_threshold {
936            self.stress_metrics.allocation_spikes.push(AllocationSpike {
937                timestamp: Instant::now(),
938                spike_size: sizebytes,
939                duration: Duration::from_millis(0), // Would measure actual duration
940                stresscondition: stresscondition.to_string(),
941            });
942        }
943
944        // Take periodic snapshots
945        if self.should_take_snapshot() {
946            self.take_memory_snapshot(vec![stresscondition.to_string()]);
947        }
948
949        // Update growth rate
950        self.update_growth_rate();
951    }
952
953    /// Track memory deallocation during stress test
954    pub fn track_stress_deallocation(&mut self, sizebytes: usize, category: impl Into<String>) {
955        self.base_monitor.track_deallocation(sizebytes, category);
956
957        // Update stress metrics
958        self.update_growth_rate();
959    }
960
961    /// Take a memory snapshot for stress analysis
962    fn take_memory_snapshot(&mut self, active_stressconditions: Vec<String>) {
963        let snapshot = MemorySnapshot {
964            timestamp: Instant::now(),
965            total_memory: self.base_monitor.current_memory_bytes,
966            category_breakdown: self.base_monitor.allocations.clone(),
967            system_pressure: self.calculate_system_pressure(),
968            active_stressconditions,
969        };
970
971        self.stress_history.push_back(snapshot);
972
973        // Limit history size
974        if self.stress_history.len() > self.stress_config.max_snapshots {
975            self.stress_history.pop_front();
976        }
977    }
978
979    /// Check if should take snapshot based on timing
980    fn should_take_snapshot(&self) -> bool {
981        if let Some(last_snapshot) = self.stress_history.back() {
982            last_snapshot.timestamp.elapsed() >= self.stress_config.snapshot_interval
983        } else {
984            true // Always take first snapshot
985        }
986    }
987
988    /// Update memory growth rate during stress
989    fn update_growth_rate(&mut self) {
990        if self.stress_history.len() >= 2 {
991            let recent_snapshots: Vec<_> = self.stress_history.iter().rev().take(10).collect();
992
993            if recent_snapshots.len() >= 2 {
994                let latest = recent_snapshots[0];
995                let previous = recent_snapshots[recent_snapshots.len() - 1];
996
997                let memory_delta = latest.total_memory as i64 - previous.total_memory as i64;
998                let time_delta = latest
999                    .timestamp
1000                    .duration_since(previous.timestamp)
1001                    .as_secs_f64();
1002
1003                if time_delta > 0.0 {
1004                    let growth_rate = memory_delta as f64 / time_delta;
1005                    self.stress_metrics.max_growth_rate =
1006                        self.stress_metrics.max_growth_rate.max(growth_rate);
1007                }
1008            }
1009        }
1010    }
1011
1012    /// Update system memory pressure indicators
1013    fn update_system_pressure(&mut self) {
1014        // In a real implementation, this would query the operating system
1015        // For now, simulate pressure based on our current usage
1016
1017        let total_system_memory: u64 = 16 * 1024 * 1024 * 1024; // 16GB assumed
1018        let our_usage = self.base_monitor.current_memory_bytes;
1019
1020        self.pressure_indicators.system_memory_utilization =
1021            (our_usage as f64 / total_system_memory as f64 * 100.0).min(100.0);
1022
1023        self.pressure_indicators.available_memory =
1024            (total_system_memory as usize).saturating_sub(our_usage);
1025
1026        // Simulate other metrics
1027        self.pressure_indicators.allocation_failure_rate =
1028            if self.pressure_indicators.system_memory_utilization > 90.0 {
1029                0.1
1030            } else {
1031                0.0
1032            };
1033    }
1034
1035    /// Calculate current system pressure level
1036    fn calculate_system_pressure(&self) -> f64 {
1037        let pressure_factors = [
1038            self.pressure_indicators.system_memory_utilization / 100.0,
1039            self.pressure_indicators.allocation_failure_rate,
1040            self.pressure_indicators.swap_utilization / 100.0,
1041        ];
1042
1043        pressure_factors.iter().sum::<f64>() / pressure_factors.len() as f64
1044    }
1045
1046    /// Analyze memory efficiency under large dataset stress
1047    pub fn analyze_large_dataset_efficiency(
1048        &mut self,
1049        dataset_size: usize,
1050        expected_memory: usize,
1051    ) {
1052        let actual_memory = self.base_monitor.current_memory_bytes;
1053
1054        self.stress_metrics.large_dataset_efficiency =
1055            expected_memory as f64 / actual_memory.max(1) as f64;
1056
1057        println!(
1058            "Large dataset efficiency for {} elements: {:.2} (expected: {}MB, actual: {}MB)",
1059            dataset_size,
1060            self.stress_metrics.large_dataset_efficiency,
1061            expected_memory / (1024 * 1024),
1062            actual_memory / (1024 * 1024)
1063        );
1064    }
1065
1066    /// Analyze concurrent access memory overhead
1067    pub fn analyze_concurrent_overhead(
1068        &mut self,
1069        baseline_memory: usize,
1070        concurrent_threads: usize,
1071    ) {
1072        let current_memory = self.base_monitor.current_memory_bytes;
1073        let overhead = current_memory.saturating_sub(baseline_memory);
1074
1075        self.stress_metrics.concurrent_overhead = overhead as f64 / concurrent_threads as f64;
1076
1077        println!(
1078            "Concurrent access overhead: {:.1}KB per thread ({} threads)",
1079            self.stress_metrics.concurrent_overhead / 1024.0,
1080            concurrent_threads
1081        );
1082    }
1083
1084    /// Measure memory recovery time after stress
1085    pub fn measure_recovery_time(&mut self, stress_endtime: Instant) {
1086        let _recovery_start_memory = self.base_monitor.current_memory_bytes;
1087
1088        // Monitor memory for recovery (simplified - would need async monitoring in practice)
1089        let recovery_time = Instant::now().duration_since(stress_endtime);
1090        self.stress_metrics.recovery_time_seconds = recovery_time.as_secs_f64();
1091
1092        println!(
1093            "Memory recovery _time: {:.2}s",
1094            self.stress_metrics.recovery_time_seconds
1095        );
1096    }
1097
1098    /// Generate comprehensive stress memory report
1099    pub fn generate_stress_report(&self) -> StressMemoryReport {
1100        let base_report = self.base_monitor.generate_report();
1101
1102        let memory_pressure_analysis = self.analyze_memory_pressure();
1103        let allocation_pattern_analysis = self.analyze_allocation_patterns();
1104        let stress_performance_analysis = self.analyze_stress_performance();
1105
1106        StressMemoryReport {
1107            base_report,
1108            stress_metrics: self.stress_metrics.clone(),
1109            memory_pressure_analysis,
1110            allocation_pattern_analysis,
1111            stress_performance_analysis,
1112            system_pressure: self.pressure_indicators.clone(),
1113            snapshot_count: self.stress_history.len(),
1114            stress_recommendations: self.generate_stress_recommendations(),
1115        }
1116    }
1117
1118    /// Analyze memory pressure patterns
1119    fn analyze_memory_pressure(&self) -> MemoryPressureAnalysis {
1120        let max_pressure = self
1121            .stress_history
1122            .iter()
1123            .map(|s| s.system_pressure)
1124            .fold(0.0, f64::max);
1125
1126        let avg_pressure = if !self.stress_history.is_empty() {
1127            self.stress_history
1128                .iter()
1129                .map(|s| s.system_pressure)
1130                .sum::<f64>()
1131                / self.stress_history.len() as f64
1132        } else {
1133            0.0
1134        };
1135
1136        let pressure_spikes = self
1137            .stress_history
1138            .iter()
1139            .filter(|s| s.system_pressure > 0.8)
1140            .count();
1141
1142        MemoryPressureAnalysis {
1143            max_pressure,
1144            avg_pressure,
1145            pressure_spikes,
1146            critical_periods: pressure_spikes, // Simplified
1147        }
1148    }
1149
1150    /// Analyze allocation patterns under stress
1151    fn analyze_allocation_patterns(&self) -> AllocationPatternAnalysis {
1152        let spike_count = self.stress_metrics.allocation_spikes.len();
1153        let total_spike_memory: usize = self
1154            .stress_metrics
1155            .allocation_spikes
1156            .iter()
1157            .map(|s| s.spike_size)
1158            .sum();
1159
1160        let pattern_regularity = if spike_count > 1 {
1161            // Calculate variance in spike timing
1162            let intervals: Vec<_> = self
1163                .stress_metrics
1164                .allocation_spikes
1165                .windows(2)
1166                .map(|pair| {
1167                    pair[1]
1168                        .timestamp
1169                        .duration_since(pair[0].timestamp)
1170                        .as_secs_f64()
1171                })
1172                .collect();
1173
1174            if !intervals.is_empty() {
1175                let mean_interval = intervals.iter().sum::<f64>() / intervals.len() as f64;
1176                let variance = intervals
1177                    .iter()
1178                    .map(|&x| (x - mean_interval).powi(2))
1179                    .sum::<f64>()
1180                    / intervals.len() as f64;
1181                1.0 / (1.0 + variance) // Higher variance = lower regularity
1182            } else {
1183                1.0
1184            }
1185        } else {
1186            1.0
1187        };
1188
1189        AllocationPatternAnalysis {
1190            spike_count,
1191            total_spike_memory,
1192            pattern_regularity,
1193            fragmentation_level: self.stress_metrics.stress_fragmentation,
1194        }
1195    }
1196
1197    /// Analyze stress performance
1198    fn analyze_stress_performance(&self) -> StressPerformanceAnalysis {
1199        StressPerformanceAnalysis {
1200            max_growth_rate: self.stress_metrics.max_growth_rate,
1201            concurrent_overhead: self.stress_metrics.concurrent_overhead,
1202            large_dataset_efficiency: self.stress_metrics.large_dataset_efficiency,
1203            recovery_time: self.stress_metrics.recovery_time_seconds,
1204            overall_stress_grade: self.calculate_stress_grade(),
1205        }
1206    }
1207
1208    /// Calculate overall stress performance grade
1209    fn calculate_stress_grade(&self) -> StressPerformanceGrade {
1210        let factors = [
1211            if self.stress_metrics.max_growth_rate < 1024.0 * 1024.0 {
1212                1.0
1213            } else {
1214                0.0
1215            }, // < 1MB/s growth
1216            if self.stress_metrics.concurrent_overhead < 1024.0 * 1024.0 {
1217                1.0
1218            } else {
1219                0.0
1220            }, // < 1MB overhead per thread
1221            self.stress_metrics.large_dataset_efficiency.min(1.0), // Efficiency ratio
1222            if self.stress_metrics.recovery_time_seconds < 10.0 {
1223                1.0
1224            } else {
1225                0.0
1226            }, // < 10s recovery
1227        ];
1228
1229        let score = factors.iter().sum::<f64>() / factors.len() as f64;
1230
1231        match score {
1232            s if s >= 0.9 => StressPerformanceGrade::Excellent,
1233            s if s >= 0.7 => StressPerformanceGrade::Good,
1234            s if s >= 0.5 => StressPerformanceGrade::Fair,
1235            s if s >= 0.3 => StressPerformanceGrade::Poor,
1236            _ => StressPerformanceGrade::Critical,
1237        }
1238    }
1239
1240    /// Generate stress-specific recommendations
1241    fn generate_stress_recommendations(&self) -> Vec<String> {
1242        let mut recommendations = Vec::new();
1243
1244        if self.stress_metrics.max_growth_rate > 10.0 * 1024.0 * 1024.0 {
1245            // > 10MB/s
1246            recommendations
1247                .push("High memory growth rate detected - consider batch processing".to_string());
1248        }
1249
1250        if self.stress_metrics.allocation_spikes.len() > 10 {
1251            recommendations
1252                .push("Frequent allocation spikes - implement memory pre-allocation".to_string());
1253        }
1254
1255        if self.stress_metrics.concurrent_overhead > 5.0 * 1024.0 * 1024.0 {
1256            // > 5MB per thread
1257            recommendations
1258                .push("High concurrent overhead - review thread-local memory usage".to_string());
1259        }
1260
1261        if self.stress_metrics.large_dataset_efficiency < 0.7 {
1262            recommendations
1263                .push("Poor large dataset efficiency - optimize memory layout".to_string());
1264        }
1265
1266        if self.stress_metrics.recovery_time_seconds > 30.0 {
1267            recommendations.push("Slow memory recovery - implement explicit cleanup".to_string());
1268        }
1269
1270        recommendations
1271    }
1272}
1273
1274/// Comprehensive stress memory report
1275#[derive(Debug, Clone)]
1276pub struct StressMemoryReport {
1277    /// Base memory report
1278    pub base_report: MemoryReport,
1279
1280    /// Stress-specific metrics
1281    pub stress_metrics: StressMemoryMetrics,
1282
1283    /// Memory pressure analysis
1284    pub memory_pressure_analysis: MemoryPressureAnalysis,
1285
1286    /// Allocation pattern analysis
1287    pub allocation_pattern_analysis: AllocationPatternAnalysis,
1288
1289    /// Stress performance analysis
1290    pub stress_performance_analysis: StressPerformanceAnalysis,
1291
1292    /// System pressure indicators
1293    pub system_pressure: MemoryPressureIndicators,
1294
1295    /// Number of snapshots taken
1296    pub snapshot_count: usize,
1297
1298    /// Stress-specific recommendations
1299    pub stress_recommendations: Vec<String>,
1300}
1301
1302/// Memory pressure analysis results
1303#[derive(Debug, Clone)]
1304pub struct MemoryPressureAnalysis {
1305    /// Maximum pressure level reached (0.0 to 1.0)
1306    pub max_pressure: f64,
1307
1308    /// Average pressure level
1309    pub avg_pressure: f64,
1310
1311    /// Number of pressure spikes
1312    pub pressure_spikes: usize,
1313
1314    /// Number of critical pressure periods
1315    pub critical_periods: usize,
1316}
1317
1318/// Allocation pattern analysis results
1319#[derive(Debug, Clone)]
1320pub struct AllocationPatternAnalysis {
1321    /// Number of allocation spikes
1322    pub spike_count: usize,
1323
1324    /// Total memory in spikes
1325    pub total_spike_memory: usize,
1326
1327    /// Pattern regularity (0.0 to 1.0)
1328    pub pattern_regularity: f64,
1329
1330    /// Memory fragmentation level
1331    pub fragmentation_level: f64,
1332}
1333
1334/// Stress performance analysis
1335#[derive(Debug, Clone)]
1336pub struct StressPerformanceAnalysis {
1337    /// Maximum memory growth rate (bytes/second)
1338    pub max_growth_rate: f64,
1339
1340    /// Concurrent access overhead per thread
1341    pub concurrent_overhead: f64,
1342
1343    /// Large dataset memory efficiency
1344    pub large_dataset_efficiency: f64,
1345
1346    /// Recovery time after stress
1347    pub recovery_time: f64,
1348
1349    /// Overall stress performance grade
1350    pub overall_stress_grade: StressPerformanceGrade,
1351}
1352
1353/// Stress performance grades
1354#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
1355pub enum StressPerformanceGrade {
1356    Excellent,
1357    Good,
1358    Fair,
1359    Poor,
1360    Critical,
1361}
1362
1363/// Create a stress memory profiler for testing
1364#[allow(dead_code)]
1365pub fn create_stress_profiler(name: impl Into<String>) -> StressMemoryProfiler {
1366    StressMemoryProfiler::new(name, None)
1367}
1368
1369/// Create a stress memory profiler with custom configuration
1370#[allow(dead_code)]
1371pub fn create_stress_profiler_with_config(
1372    name: impl Into<String>,
1373    config: StressProfilingConfig,
1374) -> StressMemoryProfiler {
1375    StressMemoryProfiler::new(name, Some(config))
1376}
1377
1378#[cfg(test)]
1379mod tests {
1380    use super::*;
1381
1382    #[test]
1383    fn test_memory_monitor_basic() {
1384        let mut monitor = MemoryMonitor::new("test");
1385
1386        // Track some allocations
1387        monitor.track_allocation(1024, "matrix");
1388        monitor.track_allocation(512, "cache");
1389
1390        assert_eq!(monitor.current_memory_bytes, 1536);
1391        assert_eq!(monitor.peak_memory_bytes, 1536);
1392
1393        // Track deallocations - deallocate all to avoid false leak detection
1394        monitor.track_deallocation(512, "cache");
1395        assert_eq!(monitor.current_memory_bytes, 1024);
1396
1397        monitor.track_deallocation(1024, "matrix");
1398        assert_eq!(monitor.current_memory_bytes, 0);
1399
1400        let report = monitor.generate_report();
1401        // After deallocating all memory, should have no leaks
1402        assert!(!report.has_potential_leaks());
1403    }
1404
1405    #[test]
1406    fn test_leak_detection() {
1407        let mut monitor = MemoryMonitor::new("leak_test");
1408
1409        // Allocate without deallocating (potential leak)
1410        monitor.track_allocation(2048, "leaked_memory");
1411
1412        let report = monitor.generate_report();
1413        assert!(report.leak_indicators.unmatched_allocations > 0);
1414    }
1415
1416    #[test]
1417    fn test_global_monitoring() {
1418        start_monitoring();
1419
1420        let _monitor1 = MemoryMonitor::new("global_test_1");
1421        let _monitor2 = MemoryMonitor::new("global_test_2");
1422
1423        let stats = get_global_stats().unwrap();
1424        assert_eq!(stats.active_interpolators, 2);
1425
1426        stop_monitoring();
1427    }
1428
1429    #[test]
1430    fn test_stress_profiler_basic() {
1431        let mut profiler = create_stress_profiler("stress_test");
1432
1433        profiler.start_stress_profiling("large_dataset");
1434        profiler.track_stress_allocation(10 * 1024 * 1024, "large_matrix", "large_dataset");
1435        profiler.track_stress_allocation(5 * 1024 * 1024, "cache", "large_dataset");
1436
1437        let report = profiler.generate_stress_report();
1438        assert!(report.stress_metrics.allocation_spikes.len() > 0);
1439        assert!(report.snapshot_count > 0);
1440    }
1441
1442    #[test]
1443    fn test_stress_allocation_spike_detection() {
1444        let mut profiler = create_stress_profiler("spike_test");
1445
1446        // Trigger allocation spike (default threshold is 10MB)
1447        profiler.track_stress_allocation(15 * 1024 * 1024, "spike", "stress_test");
1448
1449        assert_eq!(profiler.stress_metrics.allocation_spikes.len(), 1);
1450        assert_eq!(
1451            profiler.stress_metrics.allocation_spikes[0].spike_size,
1452            15 * 1024 * 1024
1453        );
1454    }
1455}