Skip to main content

trustformers_debug/
memory_profiler.rs

1//! Advanced memory profiling for TrustformeRS models.
2//!
3//! This module provides comprehensive memory profiling capabilities including:
4//! - Heap allocation tracking
5//! - Memory leak detection
6//! - Peak memory analysis
7//! - Allocation patterns
8//! - GC pressure analysis
9//! - Memory fragmentation monitoring
10//!
11//! # Example
12//!
13//! ```no_run
14//! use trustformers_debug::{MemoryProfiler, MemoryProfilingConfig};
15//!
16//! let config = MemoryProfilingConfig::default();
17//! let mut profiler = MemoryProfiler::new(config);
18//!
19//! profiler.start().await?;
20//! // ... run model training/inference ...
21//! let report = profiler.stop().await?;
22//!
23//! println!("Peak memory usage: {} MB", report.peak_memory_mb);
24//! println!("Memory leaks detected: {}", report.potential_leaks.len());
25//! ```
26
27use anyhow::Result;
28use serde::{Deserialize, Serialize};
29use std::collections::{HashMap, VecDeque};
30use std::sync::{Arc, Mutex};
31use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
32use tokio::time::interval;
33use uuid::Uuid;
34
35/// Configuration for memory profiling
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct MemoryProfilingConfig {
38    /// Enable heap allocation tracking
39    pub enable_heap_tracking: bool,
40    /// Enable leak detection
41    pub enable_leak_detection: bool,
42    /// Enable allocation pattern analysis
43    pub enable_pattern_analysis: bool,
44    /// Enable memory fragmentation monitoring
45    pub enable_fragmentation_monitoring: bool,
46    /// Enable GC pressure analysis
47    pub enable_gc_pressure_analysis: bool,
48    /// Sampling interval for memory measurements (milliseconds)
49    pub sampling_interval_ms: u64,
50    /// Maximum number of allocation records to keep
51    pub max_allocation_records: usize,
52    /// Threshold for considering an allocation "large" (bytes)
53    pub large_allocation_threshold: usize,
54    /// Window size for detecting allocation patterns (seconds)
55    pub pattern_analysis_window_secs: u64,
56    /// Threshold for leak detection (allocations alive for this duration)
57    pub leak_detection_threshold_secs: u64,
58}
59
60impl Default for MemoryProfilingConfig {
61    fn default() -> Self {
62        Self {
63            enable_heap_tracking: true,
64            enable_leak_detection: true,
65            enable_pattern_analysis: true,
66            enable_fragmentation_monitoring: true,
67            enable_gc_pressure_analysis: true,
68            sampling_interval_ms: 100, // 100ms sampling
69            max_allocation_records: 100000,
70            large_allocation_threshold: 1024 * 1024, // 1MB
71            pattern_analysis_window_secs: 60,        // 1 minute window
72            leak_detection_threshold_secs: 300,      // 5 minutes
73        }
74    }
75}
76
77/// Allocation record for tracking individual allocations
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct AllocationRecord {
80    pub id: Uuid,
81    pub size: usize,
82    pub timestamp: SystemTime,
83    pub stack_trace: Vec<String>,
84    pub allocation_type: AllocationType,
85    pub freed: bool,
86    pub freed_at: Option<SystemTime>,
87    pub tags: Vec<String>, // For categorizing allocations
88}
89
90/// Type of allocation
91#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
92pub enum AllocationType {
93    Tensor,
94    Buffer,
95    Weights,
96    Gradients,
97    Activations,
98    Cache,
99    Temporary,
100    Other(String),
101}
102
103/// Memory usage snapshot at a point in time
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct MemorySnapshot {
106    pub timestamp: SystemTime,
107    pub total_heap_bytes: usize,
108    pub used_heap_bytes: usize,
109    pub free_heap_bytes: usize,
110    pub peak_heap_bytes: usize,
111    pub allocation_count: usize,
112    pub free_count: usize,
113    pub fragmentation_ratio: f64,
114    pub gc_pressure_score: f64,
115    pub allocations_by_type: HashMap<AllocationType, usize>,
116    pub allocations_by_size: HashMap<String, usize>, // Size buckets
117}
118
119/// Memory leak information
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct MemoryLeak {
122    pub allocation_id: Uuid,
123    pub size: usize,
124    pub age_seconds: f64,
125    pub allocation_type: AllocationType,
126    pub stack_trace: Vec<String>,
127    pub tags: Vec<String>,
128    pub severity: LeakSeverity,
129}
130
131/// Severity of memory leak
132#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
133pub enum LeakSeverity {
134    Low,      // Small allocations, short-lived
135    Medium,   // Moderate size or moderately old
136    High,     // Large allocations or very old
137    Critical, // Very large or extremely old
138}
139
140/// Allocation pattern detected by analysis
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct AllocationPattern {
143    pub pattern_type: PatternType,
144    pub description: String,
145    pub confidence: f64,   // 0.0 to 1.0
146    pub impact_score: f64, // 0.0 to 1.0 (higher = more concerning)
147    pub recommendations: Vec<String>,
148    pub examples: Vec<AllocationRecord>,
149}
150
151/// Type of allocation pattern
152#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
153pub enum PatternType {
154    MemoryLeak,           // Consistent growth without deallocation
155    ChurningAllocations,  // Rapid alloc/free cycles
156    FragmentationCausing, // Allocations that cause fragmentation
157    LargeAllocations,     // Unexpectedly large allocations
158    UnbalancedTypes,      // Disproportionate allocation types
159    PeakUsageSpikes,      // Sudden memory usage spikes
160}
161
162/// Memory fragmentation analysis
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct FragmentationAnalysis {
165    pub fragmentation_ratio: f64,
166    pub largest_free_block: usize,
167    pub total_free_memory: usize,
168    pub free_block_count: usize,
169    pub average_free_block_size: f64,
170    pub fragmentation_severity: FragmentationSeverity,
171    pub recommendations: Vec<String>,
172}
173
174/// Fragmentation severity levels
175#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
176pub enum FragmentationSeverity {
177    Low,    // < 10% fragmentation
178    Medium, // 10-30% fragmentation
179    High,   // 30-60% fragmentation
180    Severe, // > 60% fragmentation
181}
182
183/// Garbage collection pressure analysis
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct GCPressureAnalysis {
186    pub pressure_score: f64,    // 0.0 to 1.0
187    pub allocation_rate: f64,   // allocations per second
188    pub deallocation_rate: f64, // deallocations per second
189    pub churn_rate: f64,        // alloc/dealloc cycles per second
190    pub pressure_level: GCPressureLevel,
191    pub contributing_factors: Vec<String>,
192    pub recommendations: Vec<String>,
193}
194
195/// GC pressure levels
196#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
197pub enum GCPressureLevel {
198    Low,
199    Medium,
200    High,
201    Critical,
202}
203
204/// Comprehensive memory profiling report
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct MemoryProfilingReport {
207    pub session_id: Uuid,
208    pub start_time: SystemTime,
209    pub end_time: SystemTime,
210    pub duration_secs: f64,
211    pub config: MemoryProfilingConfig,
212
213    // Summary statistics
214    pub peak_memory_mb: f64,
215    pub average_memory_mb: f64,
216    pub total_allocations: usize,
217    pub total_deallocations: usize,
218    pub net_allocations: i64,
219
220    // Memory timeline
221    pub memory_timeline: Vec<MemorySnapshot>,
222
223    // Leak detection
224    pub potential_leaks: Vec<MemoryLeak>,
225    pub leak_summary: HashMap<AllocationType, usize>,
226
227    // Pattern analysis
228    pub detected_patterns: Vec<AllocationPattern>,
229
230    // Fragmentation analysis
231    pub fragmentation_analysis: FragmentationAnalysis,
232
233    // GC pressure analysis
234    pub gc_pressure_analysis: GCPressureAnalysis,
235
236    // Allocation statistics
237    pub allocations_by_type: HashMap<AllocationType, AllocationTypeStats>,
238    pub allocations_by_size_bucket: HashMap<String, usize>,
239
240    // Performance metrics
241    pub profiling_overhead_ms: f64,
242    pub sampling_accuracy: f64,
243}
244
245/// Statistics for each allocation type
246#[derive(Debug, Clone, Serialize, Deserialize)]
247pub struct AllocationTypeStats {
248    pub total_allocations: usize,
249    pub total_deallocations: usize,
250    pub current_count: usize,
251    pub total_bytes_allocated: usize,
252    pub total_bytes_deallocated: usize,
253    pub current_bytes: usize,
254    pub peak_count: usize,
255    pub peak_bytes: usize,
256    pub average_allocation_size: f64,
257    pub largest_allocation: usize,
258}
259
260/// Memory profiler implementation
261#[derive(Debug)]
262pub struct MemoryProfiler {
263    config: MemoryProfilingConfig,
264    session_id: Uuid,
265    start_time: Option<Instant>,
266    allocations: Arc<Mutex<HashMap<Uuid, AllocationRecord>>>,
267    memory_timeline: Arc<Mutex<VecDeque<MemorySnapshot>>>,
268    type_stats: Arc<Mutex<HashMap<AllocationType, AllocationTypeStats>>>,
269    running: Arc<Mutex<bool>>,
270    profiling_start_time: Option<Instant>,
271}
272
273impl MemoryProfiler {
274    /// Create a new memory profiler
275    pub fn new(config: MemoryProfilingConfig) -> Self {
276        Self {
277            config,
278            session_id: Uuid::new_v4(),
279            start_time: None,
280            allocations: Arc::new(Mutex::new(HashMap::new())),
281            memory_timeline: Arc::new(Mutex::new(VecDeque::new())),
282            type_stats: Arc::new(Mutex::new(HashMap::new())),
283            running: Arc::new(Mutex::new(false)),
284            profiling_start_time: None,
285        }
286    }
287
288    /// Start memory profiling
289    pub async fn start(&mut self) -> Result<()> {
290        let mut running = self.running.lock().expect("lock should not be poisoned");
291        if *running {
292            return Err(anyhow::anyhow!("Memory profiler is already running"));
293        }
294
295        *running = true;
296        self.start_time = Some(Instant::now());
297        self.profiling_start_time = Some(Instant::now());
298
299        // Start periodic sampling
300        if self.config.enable_heap_tracking {
301            self.start_sampling().await?;
302        }
303
304        tracing::info!("Memory profiler started for session {}", self.session_id);
305        Ok(())
306    }
307
308    /// Stop memory profiling and generate report
309    pub async fn stop(&mut self) -> Result<MemoryProfilingReport> {
310        let mut running = self.running.lock().expect("lock should not be poisoned");
311        if !*running {
312            return Err(anyhow::anyhow!("Memory profiler is not running"));
313        }
314
315        *running = false;
316        let end_time = SystemTime::now();
317        let start_time =
318            self.start_time.expect("start_time should be set when profiler is running");
319        let duration =
320            end_time.duration_since(UNIX_EPOCH)?.as_secs_f64() - start_time.elapsed().as_secs_f64();
321
322        // Calculate profiling overhead
323        let profiling_overhead = if let Some(prof_start) = self.profiling_start_time {
324            prof_start.elapsed().as_millis() as f64 * 0.01 // Estimated 1% overhead
325        } else {
326            0.0
327        };
328
329        let report = self.generate_report(end_time, duration, profiling_overhead).await?;
330
331        tracing::info!("Memory profiler stopped for session {}", self.session_id);
332        Ok(report)
333    }
334
335    /// Record an allocation
336    pub fn record_allocation(
337        &self,
338        size: usize,
339        allocation_type: AllocationType,
340        tags: Vec<String>,
341    ) -> Result<Uuid> {
342        let running = self.running.lock().expect("lock should not be poisoned");
343        if !*running {
344            return Err(anyhow::anyhow!("Memory profiler is not running"));
345        }
346
347        let allocation_id = Uuid::new_v4();
348        let record = AllocationRecord {
349            id: allocation_id,
350            size,
351            timestamp: SystemTime::now(),
352            stack_trace: self.capture_stack_trace(),
353            allocation_type: allocation_type.clone(),
354            freed: false,
355            freed_at: None,
356            tags,
357        };
358
359        // Store allocation record
360        let mut allocations = self.allocations.lock().expect("lock should not be poisoned");
361        allocations.insert(allocation_id, record);
362
363        // Update type statistics
364        self.update_type_stats(&allocation_type, size, true);
365
366        Ok(allocation_id)
367    }
368
369    /// Record a deallocation
370    pub fn record_deallocation(&self, allocation_id: Uuid) -> Result<()> {
371        let running = self.running.lock().expect("lock should not be poisoned");
372        if !*running {
373            return Ok(()); // Silently ignore if not running
374        }
375
376        let mut allocations = self.allocations.lock().expect("lock should not be poisoned");
377        if let Some(record) = allocations.get_mut(&allocation_id) {
378            record.freed = true;
379            record.freed_at = Some(SystemTime::now());
380
381            // Update type statistics
382            self.update_type_stats(&record.allocation_type, record.size, false);
383        }
384
385        Ok(())
386    }
387
388    /// Tag an existing allocation
389    pub fn tag_allocation(&self, allocation_id: Uuid, tag: String) -> Result<()> {
390        let mut allocations = self.allocations.lock().expect("lock should not be poisoned");
391        if let Some(record) = allocations.get_mut(&allocation_id) {
392            record.tags.push(tag);
393        }
394        Ok(())
395    }
396
397    /// Get current memory usage snapshot
398    pub fn get_memory_snapshot(&self) -> Result<MemorySnapshot> {
399        let allocations = self.allocations.lock().expect("lock should not be poisoned");
400        let _type_stats = self.type_stats.lock().expect("lock should not be poisoned");
401
402        let mut total_heap = 0;
403        let mut used_heap = 0;
404        let mut allocation_count = 0;
405        let mut free_count = 0;
406        let mut allocations_by_type = HashMap::new();
407        let mut allocations_by_size = HashMap::new();
408
409        for record in allocations.values() {
410            total_heap += record.size;
411
412            if !record.freed {
413                used_heap += record.size;
414                allocation_count += 1;
415
416                *allocations_by_type.entry(record.allocation_type.clone()).or_insert(0) +=
417                    record.size;
418
419                let size_bucket = self.get_size_bucket(record.size);
420                *allocations_by_size.entry(size_bucket).or_insert(0) += 1;
421            } else {
422                free_count += 1;
423            }
424        }
425
426        let free_heap = total_heap - used_heap;
427        let fragmentation_ratio =
428            if total_heap > 0 { free_heap as f64 / total_heap as f64 } else { 0.0 };
429
430        let gc_pressure_score = self.calculate_gc_pressure_score();
431
432        Ok(MemorySnapshot {
433            timestamp: SystemTime::now(),
434            total_heap_bytes: total_heap,
435            used_heap_bytes: used_heap,
436            free_heap_bytes: free_heap,
437            peak_heap_bytes: used_heap, // Simplified for now
438            allocation_count,
439            free_count,
440            fragmentation_ratio,
441            gc_pressure_score,
442            allocations_by_type,
443            allocations_by_size,
444        })
445    }
446
447    /// Detect memory leaks
448    pub fn detect_leaks(&self) -> Result<Vec<MemoryLeak>> {
449        let allocations = self.allocations.lock().expect("lock should not be poisoned");
450        let now = SystemTime::now();
451        let threshold = Duration::from_secs(self.config.leak_detection_threshold_secs);
452        let mut leaks = Vec::new();
453
454        for record in allocations.values() {
455            if !record.freed {
456                let age = now.duration_since(record.timestamp)?;
457                if age > threshold {
458                    let age_seconds = age.as_secs_f64();
459                    let severity = self.classify_leak_severity(record.size, age_seconds);
460
461                    leaks.push(MemoryLeak {
462                        allocation_id: record.id,
463                        size: record.size,
464                        age_seconds,
465                        allocation_type: record.allocation_type.clone(),
466                        stack_trace: record.stack_trace.clone(),
467                        tags: record.tags.clone(),
468                        severity,
469                    });
470                }
471            }
472        }
473
474        // Sort by severity and size
475        leaks.sort_by(|a, b| b.severity.cmp(&a.severity).then(b.size.cmp(&a.size)));
476
477        Ok(leaks)
478    }
479
480    /// Analyze allocation patterns
481    pub fn analyze_patterns(&self) -> Result<Vec<AllocationPattern>> {
482        let mut patterns = Vec::new();
483
484        // Detect memory leak patterns
485        if let Ok(leak_pattern) = self.detect_leak_pattern() {
486            patterns.push(leak_pattern);
487        }
488
489        // Detect churning allocation patterns
490        if let Ok(churn_pattern) = self.detect_churn_pattern() {
491            patterns.push(churn_pattern);
492        }
493
494        // Detect large allocation patterns
495        if let Ok(large_alloc_pattern) = self.detect_large_allocation_pattern() {
496            patterns.push(large_alloc_pattern);
497        }
498
499        // Detect fragmentation-causing patterns
500        if let Ok(frag_pattern) = self.detect_fragmentation_pattern() {
501            patterns.push(frag_pattern);
502        }
503
504        Ok(patterns)
505    }
506
507    /// Analyze memory fragmentation
508    pub fn analyze_fragmentation(&self) -> Result<FragmentationAnalysis> {
509        let snapshot = self.get_memory_snapshot()?;
510
511        let fragmentation_ratio = snapshot.fragmentation_ratio;
512        let severity = match fragmentation_ratio {
513            r if r < 0.1 => FragmentationSeverity::Low,
514            r if r < 0.3 => FragmentationSeverity::Medium,
515            r if r < 0.6 => FragmentationSeverity::High,
516            _ => FragmentationSeverity::Severe,
517        };
518
519        let recommendations = match severity {
520            FragmentationSeverity::Low => {
521                vec!["Memory fragmentation is low. Continue current practices.".to_string()]
522            },
523            FragmentationSeverity::Medium => vec![
524                "Consider pooling allocations of similar sizes.".to_string(),
525                "Monitor for increasing fragmentation trends.".to_string(),
526            ],
527            FragmentationSeverity::High => vec![
528                "Implement memory pooling for frequent allocations.".to_string(),
529                "Consider compaction strategies for long-running processes.".to_string(),
530                "Review allocation patterns for optimization opportunities.".to_string(),
531            ],
532            FragmentationSeverity::Severe => vec![
533                "Critical fragmentation detected. Immediate action required.".to_string(),
534                "Implement custom allocators with compaction.".to_string(),
535                "Consider restarting the process to reset memory layout.".to_string(),
536                "Review and optimize allocation strategies.".to_string(),
537            ],
538        };
539
540        Ok(FragmentationAnalysis {
541            fragmentation_ratio,
542            largest_free_block: snapshot.free_heap_bytes, // Simplified
543            total_free_memory: snapshot.free_heap_bytes,
544            free_block_count: snapshot.free_count,
545            average_free_block_size: if snapshot.free_count > 0 {
546                snapshot.free_heap_bytes as f64 / snapshot.free_count as f64
547            } else {
548                0.0
549            },
550            fragmentation_severity: severity,
551            recommendations,
552        })
553    }
554
555    /// Analyze GC pressure
556    pub fn analyze_gc_pressure(&self) -> Result<GCPressureAnalysis> {
557        let timeline = self.memory_timeline.lock().expect("lock should not be poisoned");
558
559        let pressure_score = self.calculate_gc_pressure_score();
560        let (allocation_rate, deallocation_rate) = self.calculate_allocation_rates(&timeline);
561        let churn_rate = allocation_rate.min(deallocation_rate);
562
563        let pressure_level = match pressure_score {
564            p if p < 0.25 => GCPressureLevel::Low,
565            p if p < 0.5 => GCPressureLevel::Medium,
566            p if p < 0.75 => GCPressureLevel::High,
567            _ => GCPressureLevel::Critical,
568        };
569
570        let mut contributing_factors = Vec::new();
571        let mut recommendations = Vec::new();
572
573        if allocation_rate > 1000.0 {
574            contributing_factors.push("High allocation rate".to_string());
575            recommendations.push("Consider object pooling or reuse strategies".to_string());
576        }
577
578        if churn_rate > 500.0 {
579            contributing_factors.push("High allocation churn".to_string());
580            recommendations.push("Reduce temporary object creation".to_string());
581        }
582
583        if pressure_level == GCPressureLevel::Critical {
584            recommendations
585                .push("Consider manual memory management for critical paths".to_string());
586        }
587
588        Ok(GCPressureAnalysis {
589            pressure_score,
590            allocation_rate,
591            deallocation_rate,
592            churn_rate,
593            pressure_level,
594            contributing_factors,
595            recommendations,
596        })
597    }
598
599    // Private helper methods
600
601    async fn start_sampling(&self) -> Result<()> {
602        let interval_duration = Duration::from_millis(self.config.sampling_interval_ms);
603        let mut interval = interval(interval_duration);
604        let _timeline = Arc::clone(&self.memory_timeline);
605        let running = Arc::clone(&self.running);
606
607        tokio::spawn(async move {
608            loop {
609                interval.tick().await;
610
611                let is_running = {
612                    let running_guard = running.lock().expect("lock should not be poisoned");
613                    *running_guard
614                };
615
616                if !is_running {
617                    break;
618                }
619
620                // This would normally sample actual memory usage
621                // For now, we'll use a placeholder implementation
622            }
623        });
624
625        Ok(())
626    }
627
628    pub async fn generate_report(
629        &self,
630        end_time: SystemTime,
631        duration_secs: f64,
632        profiling_overhead_ms: f64,
633    ) -> Result<MemoryProfilingReport> {
634        let allocations = self.allocations.lock().expect("lock should not be poisoned");
635        let timeline = self.memory_timeline.lock().expect("lock should not be poisoned");
636        let type_stats = self.type_stats.lock().expect("lock should not be poisoned");
637
638        let total_allocations = allocations.len();
639        let total_deallocations = allocations.values().filter(|r| r.freed).count();
640        let net_allocations = total_allocations as i64 - total_deallocations as i64;
641
642        let potential_leaks = self.detect_leaks()?;
643        let detected_patterns = self.analyze_patterns()?;
644        let fragmentation_analysis = self.analyze_fragmentation()?;
645        let gc_pressure_analysis = self.analyze_gc_pressure()?;
646
647        // Calculate summary statistics
648        let peak_memory_mb = timeline
649            .iter()
650            .map(|s| s.peak_heap_bytes as f64 / 1024.0 / 1024.0)
651            .fold(0.0, f64::max);
652
653        let average_memory_mb = if !timeline.is_empty() {
654            timeline.iter().map(|s| s.used_heap_bytes as f64 / 1024.0 / 1024.0).sum::<f64>()
655                / timeline.len() as f64
656        } else {
657            0.0
658        };
659
660        let mut leak_summary = HashMap::new();
661        for leak in &potential_leaks {
662            *leak_summary.entry(leak.allocation_type.clone()).or_insert(0) += 1;
663        }
664
665        // Create size buckets
666        let mut allocations_by_size_bucket = HashMap::new();
667        for record in allocations.values() {
668            let bucket = self.get_size_bucket(record.size);
669            *allocations_by_size_bucket.entry(bucket).or_insert(0) += 1;
670        }
671
672        Ok(MemoryProfilingReport {
673            session_id: self.session_id,
674            start_time: UNIX_EPOCH
675                + Duration::from_secs_f64(
676                    SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs_f64() - duration_secs,
677                ),
678            end_time,
679            duration_secs,
680            config: self.config.clone(),
681            peak_memory_mb,
682            average_memory_mb,
683            total_allocations,
684            total_deallocations,
685            net_allocations,
686            memory_timeline: timeline.iter().cloned().collect(),
687            potential_leaks,
688            leak_summary,
689            detected_patterns,
690            fragmentation_analysis,
691            gc_pressure_analysis,
692            allocations_by_type: type_stats.clone(),
693            allocations_by_size_bucket,
694            profiling_overhead_ms,
695            sampling_accuracy: 0.95, // Placeholder
696        })
697    }
698
699    fn capture_stack_trace(&self) -> Vec<String> {
700        // Placeholder implementation - in a real implementation,
701        // this would capture the actual call stack
702        vec![
703            "function_a".to_string(),
704            "function_b".to_string(),
705            "main".to_string(),
706        ]
707    }
708
709    fn update_type_stats(
710        &self,
711        allocation_type: &AllocationType,
712        size: usize,
713        is_allocation: bool,
714    ) {
715        let mut type_stats = self.type_stats.lock().expect("lock should not be poisoned");
716        let stats = type_stats.entry(allocation_type.clone()).or_insert(AllocationTypeStats {
717            total_allocations: 0,
718            total_deallocations: 0,
719            current_count: 0,
720            total_bytes_allocated: 0,
721            total_bytes_deallocated: 0,
722            current_bytes: 0,
723            peak_count: 0,
724            peak_bytes: 0,
725            average_allocation_size: 0.0,
726            largest_allocation: 0,
727        });
728
729        if is_allocation {
730            stats.total_allocations += 1;
731            stats.current_count += 1;
732            stats.total_bytes_allocated += size;
733            stats.current_bytes += size;
734            stats.peak_count = stats.peak_count.max(stats.current_count);
735            stats.peak_bytes = stats.peak_bytes.max(stats.current_bytes);
736            stats.largest_allocation = stats.largest_allocation.max(size);
737        } else {
738            stats.total_deallocations += 1;
739            stats.current_count = stats.current_count.saturating_sub(1);
740            stats.total_bytes_deallocated += size;
741            stats.current_bytes = stats.current_bytes.saturating_sub(size);
742        }
743
744        stats.average_allocation_size = if stats.total_allocations > 0 {
745            stats.total_bytes_allocated as f64 / stats.total_allocations as f64
746        } else {
747            0.0
748        };
749    }
750
751    fn get_size_bucket(&self, size: usize) -> String {
752        match size {
753            0..=1024 => "0-1KB".to_string(),
754            1025..=10240 => "1-10KB".to_string(),
755            10241..=102400 => "10-100KB".to_string(),
756            102401..=1048576 => "100KB-1MB".to_string(),
757            1048577..=10485760 => "1-10MB".to_string(),
758            _ => ">10MB".to_string(),
759        }
760    }
761
762    fn classify_leak_severity(&self, size: usize, age_seconds: f64) -> LeakSeverity {
763        let large_size = size > self.config.large_allocation_threshold;
764        let old_age = age_seconds > 1800.0; // 30 minutes
765        let very_old_age = age_seconds > 3600.0; // 1 hour
766
767        match (large_size, old_age, very_old_age) {
768            (true, _, true) => LeakSeverity::Critical,
769            (true, true, _) => LeakSeverity::High,
770            (true, false, _) => LeakSeverity::Medium,
771            (false, true, _) => LeakSeverity::Medium,
772            _ => LeakSeverity::Low,
773        }
774    }
775
776    fn calculate_gc_pressure_score(&self) -> f64 {
777        // Simplified GC pressure calculation
778        // In a real implementation, this would consider allocation patterns,
779        // heap growth rate, and other factors
780        0.3 // Placeholder value
781    }
782
783    fn calculate_allocation_rates(&self, timeline: &VecDeque<MemorySnapshot>) -> (f64, f64) {
784        if timeline.len() < 2 {
785            return (0.0, 0.0);
786        }
787
788        // Simplified rate calculation
789        let first = &timeline[0];
790        let last = &timeline[timeline.len() - 1];
791
792        let duration = last
793            .timestamp
794            .duration_since(first.timestamp)
795            .unwrap_or(Duration::from_secs(1))
796            .as_secs_f64();
797
798        let allocation_rate =
799            (last.allocation_count as f64 - first.allocation_count as f64) / duration;
800        let deallocation_rate = (last.free_count as f64 - first.free_count as f64) / duration;
801
802        (allocation_rate.max(0.0), deallocation_rate.max(0.0))
803    }
804
805    // Pattern detection methods
806
807    fn detect_leak_pattern(&self) -> Result<AllocationPattern> {
808        let leaks = self.detect_leaks()?;
809        let high_severity_leaks = leaks
810            .iter()
811            .filter(|l| l.severity == LeakSeverity::High || l.severity == LeakSeverity::Critical)
812            .count();
813
814        let confidence = if leaks.len() > 10 { 0.9 } else { 0.5 };
815        let impact_score = (high_severity_leaks as f64 / (leaks.len().max(1)) as f64).min(1.0);
816
817        Ok(AllocationPattern {
818            pattern_type: PatternType::MemoryLeak,
819            description: format!("Detected {} potential memory leaks", leaks.len()),
820            confidence,
821            impact_score,
822            recommendations: vec![
823                "Review long-lived allocations for proper cleanup".to_string(),
824                "Implement RAII patterns for automatic resource management".to_string(),
825            ],
826            examples: leaks
827                .into_iter()
828                .take(3)
829                .map(|leak| {
830                    // Convert leak to allocation record for example
831                    AllocationRecord {
832                        id: leak.allocation_id,
833                        size: leak.size,
834                        timestamp: SystemTime::now(), // Placeholder
835                        stack_trace: leak.stack_trace,
836                        allocation_type: leak.allocation_type,
837                        freed: false,
838                        freed_at: None,
839                        tags: leak.tags,
840                    }
841                })
842                .collect(),
843        })
844    }
845
846    fn detect_churn_pattern(&self) -> Result<AllocationPattern> {
847        // Simplified churn detection
848        let allocations = self.allocations.lock().expect("lock should not be poisoned");
849        let short_lived_count = allocations
850            .values()
851            .filter(|record| {
852                if let (Some(_freed_at), false) = (record.freed_at, record.freed) {
853                    false // Contradiction, skip
854                } else if record.freed {
855                    if let Some(freed_at) = record.freed_at {
856                        freed_at.duration_since(record.timestamp).unwrap_or(Duration::from_secs(0))
857                            < Duration::from_secs(1)
858                    } else {
859                        false
860                    }
861                } else {
862                    false
863                }
864            })
865            .count();
866
867        let total_count = allocations.len();
868        let churn_ratio = if total_count > 0 {
869            short_lived_count as f64 / total_count as f64
870        } else {
871            0.0
872        };
873
874        Ok(AllocationPattern {
875            pattern_type: PatternType::ChurningAllocations,
876            description: format!(
877                "High allocation churn detected: {:.1}% short-lived allocations",
878                churn_ratio * 100.0
879            ),
880            confidence: if churn_ratio > 0.5 { 0.8 } else { 0.4 },
881            impact_score: churn_ratio,
882            recommendations: vec![
883                "Consider object pooling for frequently allocated objects".to_string(),
884                "Reduce temporary object creation in hot paths".to_string(),
885            ],
886            examples: vec![], // Simplified for now
887        })
888    }
889
890    fn detect_large_allocation_pattern(&self) -> Result<AllocationPattern> {
891        let allocations = self.allocations.lock().expect("lock should not be poisoned");
892        let large_allocations: Vec<_> = allocations
893            .values()
894            .filter(|record| record.size > self.config.large_allocation_threshold)
895            .cloned()
896            .collect();
897
898        let impact_score = if !allocations.is_empty() {
899            large_allocations.len() as f64 / allocations.len() as f64
900        } else {
901            0.0
902        };
903
904        Ok(AllocationPattern {
905            pattern_type: PatternType::LargeAllocations,
906            description: format!(
907                "Found {} large allocations (>{}MB)",
908                large_allocations.len(),
909                self.config.large_allocation_threshold / 1024 / 1024
910            ),
911            confidence: if large_allocations.len() > 5 { 0.9 } else { 0.6 },
912            impact_score,
913            recommendations: vec![
914                "Review large allocations for optimization opportunities".to_string(),
915                "Consider streaming or chunked processing for large data".to_string(),
916            ],
917            examples: large_allocations.into_iter().take(3).collect(),
918        })
919    }
920
921    fn detect_fragmentation_pattern(&self) -> Result<AllocationPattern> {
922        let fragmentation = self.analyze_fragmentation()?;
923
924        Ok(AllocationPattern {
925            pattern_type: PatternType::FragmentationCausing,
926            description: format!(
927                "Memory fragmentation at {:.1}%",
928                fragmentation.fragmentation_ratio * 100.0
929            ),
930            confidence: 0.8,
931            impact_score: fragmentation.fragmentation_ratio,
932            recommendations: fragmentation.recommendations,
933            examples: vec![], // Simplified for now
934        })
935    }
936}
937
938impl PartialOrd for LeakSeverity {
939    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
940        Some(self.cmp(other))
941    }
942}
943
944impl Ord for LeakSeverity {
945    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
946        let self_val = match self {
947            LeakSeverity::Low => 0,
948            LeakSeverity::Medium => 1,
949            LeakSeverity::High => 2,
950            LeakSeverity::Critical => 3,
951        };
952        let other_val = match other {
953            LeakSeverity::Low => 0,
954            LeakSeverity::Medium => 1,
955            LeakSeverity::High => 2,
956            LeakSeverity::Critical => 3,
957        };
958        self_val.cmp(&other_val)
959    }
960}
961
962#[cfg(test)]
963mod tests {
964    use super::*;
965    use tokio;
966
967    #[tokio::test(flavor = "multi_thread")]
968    #[ignore] // FIXME: This test has implementation issues causing slow execution
969    async fn test_memory_profiler_basic() -> Result<()> {
970        let config = MemoryProfilingConfig {
971            sampling_interval_ms: 1000, // Slower sampling for faster tests
972            ..Default::default()
973        };
974        let mut profiler = MemoryProfiler::new(config);
975
976        // Wrap in timeout to prevent hanging
977        let test_result = tokio::time::timeout(Duration::from_millis(500), async {
978            profiler.start().await?;
979
980            // Record some allocations
981            let alloc_id1 = profiler.record_allocation(
982                1024,
983                AllocationType::Tensor,
984                vec!["test".to_string()],
985            )?;
986
987            let _alloc_id2 = profiler.record_allocation(
988                2048,
989                AllocationType::Buffer,
990                vec!["test".to_string()],
991            )?;
992
993            // Free one allocation
994            profiler.record_deallocation(alloc_id1)?;
995
996            // Give background tasks a moment to process
997            tokio::time::sleep(Duration::from_millis(1)).await;
998
999            let report = profiler.stop().await?;
1000
1001            assert_eq!(report.total_allocations, 2);
1002            assert_eq!(report.total_deallocations, 1);
1003            assert_eq!(report.net_allocations, 1);
1004
1005            Ok::<(), anyhow::Error>(())
1006        })
1007        .await;
1008
1009        match test_result {
1010            Ok(result) => result,
1011            Err(_) => Err(anyhow::anyhow!("Test timed out after 500ms")),
1012        }
1013    }
1014
1015    #[tokio::test]
1016    async fn test_leak_detection() -> Result<()> {
1017        let config = MemoryProfilingConfig {
1018            leak_detection_threshold_secs: 1, // 1 second for testing
1019            ..Default::default()
1020        };
1021
1022        let mut profiler = MemoryProfiler::new(config);
1023        profiler.start().await?; // Start the profiler
1024
1025        // Record allocation and wait
1026        profiler.record_allocation(1024, AllocationType::Tensor, vec!["leak_test".to_string()])?;
1027
1028        tokio::time::sleep(Duration::from_secs(2)).await;
1029
1030        let leaks = profiler.detect_leaks()?;
1031        assert!(!leaks.is_empty());
1032
1033        Ok(())
1034    }
1035
1036    #[test]
1037    fn test_size_buckets() {
1038        let config = MemoryProfilingConfig::default();
1039        let profiler = MemoryProfiler::new(config);
1040
1041        assert_eq!(profiler.get_size_bucket(512), "0-1KB");
1042        assert_eq!(profiler.get_size_bucket(5120), "1-10KB");
1043        assert_eq!(profiler.get_size_bucket(51200), "10-100KB");
1044        assert_eq!(profiler.get_size_bucket(512000), "100KB-1MB");
1045        assert_eq!(profiler.get_size_bucket(5120000), "1-10MB");
1046        assert_eq!(profiler.get_size_bucket(51200000), ">10MB");
1047    }
1048
1049    #[test]
1050    fn test_leak_severity_classification() {
1051        let config = MemoryProfilingConfig::default();
1052        let profiler = MemoryProfiler::new(config);
1053
1054        // Small, new allocation
1055        assert_eq!(
1056            profiler.classify_leak_severity(1024, 60.0),
1057            LeakSeverity::Low
1058        );
1059
1060        // Large, old allocation
1061        assert_eq!(
1062            profiler.classify_leak_severity(10485760, 3700.0),
1063            LeakSeverity::Critical
1064        );
1065
1066        // Medium size, medium age
1067        assert_eq!(
1068            profiler.classify_leak_severity(524288, 1900.0),
1069            LeakSeverity::Medium
1070        );
1071    }
1072}