memscope_rs/lockfree/
tracker.rs

1//! Lock-free thread-local memory tracking with intelligent sampling and binary file output.
2//!
3//! This module implements the multi-threaded approach that eliminates lock contention
4//! by using completely independent thread-local tracking with binary file intermediates
5//! and intelligent sampling strategies.
6//!
7//! Key features:
8//! - Zero-lock design: Each thread operates independently
9//! - Intelligent sampling: Frequency + size dual-dimension sampling
10//! - Binary format: Uses bincode for zero-overhead serialization
11//! - Thread isolation: Complete elimination of shared state
12//! - Performance focused: Minimal overhead on target application
13
14use bincode::{Decode, Encode};
15use serde::{Deserialize, Serialize};
16use std::collections::HashMap;
17use std::hash::{DefaultHasher, Hash, Hasher};
18use std::sync::atomic::{AtomicU64, Ordering};
19use std::time::{SystemTime, UNIX_EPOCH};
20
21// EventType moved to analysis.rs to avoid duplication
22pub use crate::lockfree::analysis::EventType;
23
24/// Enhanced tracking event with rich metadata
25#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
26pub struct Event {
27    pub timestamp: u64,
28    pub ptr: usize,
29    pub size: usize,
30    pub call_stack_hash: u64,
31    pub event_type: EventType,
32    pub thread_id: u64,
33    /// Full call stack addresses (limited to prevent explosion)
34    pub call_stack: Vec<usize>,
35    /// CPU time consumed by this thread (nanoseconds)
36    pub cpu_time_ns: u64,
37    /// Memory alignment used for this allocation
38    pub alignment: usize,
39    /// Allocation category (Small/Medium/Large)
40    pub allocation_category: AllocationCategory,
41    /// Thread name if available
42    pub thread_name: Option<String>,
43    /// Process memory stats at time of allocation
44    pub memory_stats: MemoryStats,
45    /// Real call stack with symbols (when available)
46    #[cfg(feature = "backtrace")]
47    pub real_call_stack: Option<RealCallStack>,
48    /// System performance metrics
49    #[cfg(feature = "system-metrics")]
50    pub system_metrics: Option<SystemMetrics>,
51    /// Advanced analysis data
52    #[cfg(feature = "advanced-analysis")]
53    pub analysis_data: Option<AnalysisData>,
54}
55
56/// Enhanced frequency tracking data
57#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
58pub struct FrequencyData {
59    pub call_stack_hash: u64,
60    pub frequency: u64,
61    pub total_size: usize,
62    pub thread_id: u64,
63    /// Average allocation size for this call stack
64    pub avg_size: f64,
65    /// Min and max sizes observed
66    pub size_range: (usize, usize),
67    /// First and last seen timestamps
68    pub time_range: (u64, u64),
69    /// CPU time spent in allocations from this call stack
70    pub total_cpu_time: u64,
71}
72
73/// Memory allocation category
74#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
75pub enum AllocationCategory {
76    Small,  // < 2KB
77    Medium, // 2KB - 64KB
78    Large,  // >= 64KB
79}
80
81/// Process memory statistics snapshot
82#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
83pub struct MemoryStats {
84    /// Virtual memory size in bytes
85    pub virtual_memory: usize,
86    /// Resident memory size in bytes
87    pub resident_memory: usize,
88    /// Heap memory in use
89    pub heap_memory: usize,
90    /// Number of page faults
91    pub page_faults: u64,
92}
93
94/// Real call stack with symbol information
95#[cfg(feature = "backtrace")]
96#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
97pub struct RealCallStack {
98    /// Raw addresses from backtrace
99    pub addresses: Vec<usize>,
100    /// Resolved symbols with function names
101    pub symbols: Vec<StackFrame>,
102    /// Call stack depth
103    pub depth: usize,
104}
105
106#[cfg(feature = "backtrace")]
107#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
108pub struct StackFrame {
109    /// Function name if available
110    pub function_name: Option<String>,
111    /// File name if available
112    pub filename: Option<String>,
113    /// Line number if available
114    pub line_number: Option<u32>,
115    /// Memory address
116    pub address: usize,
117}
118
119/// System performance metrics
120#[cfg(feature = "system-metrics")]
121#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
122pub struct SystemMetrics {
123    /// CPU usage percentage (0-100)
124    pub cpu_usage: f32,
125    /// Available memory in bytes
126    pub available_memory: u64,
127    /// Total memory in bytes
128    pub total_memory: u64,
129    /// Load average (1, 5, 15 minutes)
130    pub load_average: (f64, f64, f64),
131    /// Number of active threads in process
132    pub thread_count: usize,
133    /// Current memory fragmentation ratio
134    pub fragmentation_ratio: f32,
135}
136
137/// Advanced analysis data
138#[cfg(feature = "advanced-analysis")]
139#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
140pub struct AnalysisData {
141    /// Allocation lifetime prediction
142    pub predicted_lifetime_ms: u64,
143    /// Allocation frequency pattern
144    pub frequency_pattern: FrequencyPattern,
145    /// Cross-thread sharing likelihood
146    pub sharing_likelihood: f32,
147    /// Memory access pattern prediction
148    pub access_pattern: AccessPattern,
149    /// Performance impact score (0-100)
150    pub performance_impact: u8,
151}
152
153#[cfg(feature = "advanced-analysis")]
154#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
155pub enum FrequencyPattern {
156    Sporadic,
157    Regular,
158    Burst,
159    Constant,
160}
161
162#[cfg(feature = "advanced-analysis")]
163#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
164pub enum AccessPattern {
165    Sequential,
166    Random,
167    Hotspot,
168    Cached,
169}
170
171// Use sampling config from sampling module
172pub use crate::lockfree::sampling::SamplingConfig;
173
174// SamplingConfig is now defined in sampling.rs
175
176// Thread-local tracker that operates completely independently
177// Uses RefCell for interior mutability without locks
178thread_local! {
179    static THREAD_TRACKER: std::cell::RefCell<Option<ThreadLocalTracker>> =
180        const { std::cell::RefCell::new(None) };
181}
182
183/// Thread-local memory tracker with enhanced metadata collection
184#[derive(Debug)]
185pub struct ThreadLocalTracker {
186    /// Thread ID for identification
187    thread_id: u64,
188    /// Event buffer for batch writing
189    event_buffer: Vec<Event>,
190    /// Call stack frequency tracking
191    call_stack_frequencies: HashMap<u64, u64>,
192    /// Call stack size tracking for intelligent sampling
193    call_stack_sizes: HashMap<u64, usize>,
194    /// Call stack size ranges for statistics
195    call_stack_size_ranges: HashMap<u64, (usize, usize)>,
196    /// Call stack time ranges for temporal analysis
197    call_stack_time_ranges: HashMap<u64, (u64, u64)>,
198    /// Call stack CPU time accumulation
199    call_stack_cpu_times: HashMap<u64, u64>,
200    /// Buffer size before flushing to disk
201    buffer_size: usize,
202    /// File handle for writing events
203    file_path: std::path::PathBuf,
204    /// Sampling configuration
205    config: SamplingConfig,
206    /// Random number generator for sampling decisions
207    rng_state: u64,
208    /// Thread name if detected
209    thread_name: Option<String>,
210    /// Start time for CPU time calculations
211    start_time: std::time::Instant,
212    /// Thread history for advanced analysis
213    #[cfg(feature = "advanced-analysis")]
214    thread_history: HashMap<u64, (u64, u64)>, // call_stack_hash -> (last_time, count)
215    /// Performance sampling counter to limit overhead
216    performance_sample_counter: u64,
217}
218
219impl ThreadLocalTracker {
220    /// Creates a new thread-local tracker with specified configuration
221    ///
222    /// # Arguments
223    /// * `output_dir` - Directory for storing thread-specific binary files
224    /// * `config` - Sampling configuration for intelligent allocation tracking
225    ///
226    /// # Returns
227    /// Result containing the configured tracker or IO error
228    pub fn new(output_dir: &std::path::Path, config: SamplingConfig) -> std::io::Result<Self> {
229        let thread_id = get_thread_id();
230        let file_path = output_dir.join(format!("memscope_thread_{}.bin", thread_id));
231
232        // Ensure output directory exists before creating tracker
233        if let Some(parent) = file_path.parent() {
234            std::fs::create_dir_all(parent)?;
235        }
236
237        // Pre-allocate buffer for optimal performance
238        let event_buffer = Vec::with_capacity(1000);
239
240        // Try to get thread name
241        let thread_name = std::thread::current().name().map(|s| s.to_string());
242
243        Ok(Self {
244            thread_id,
245            event_buffer,
246            call_stack_frequencies: HashMap::new(),
247            call_stack_sizes: HashMap::new(),
248            call_stack_size_ranges: HashMap::new(),
249            call_stack_time_ranges: HashMap::new(),
250            call_stack_cpu_times: HashMap::new(),
251            buffer_size: 1000,
252            file_path,
253            config,
254            rng_state: thread_id, // Thread ID as deterministic seed
255            thread_name,
256            start_time: std::time::Instant::now(),
257            #[cfg(feature = "advanced-analysis")]
258            thread_history: HashMap::new(),
259            performance_sample_counter: 0,
260        })
261    }
262
263    /// Tracks allocation with enhanced metadata collection
264    ///
265    /// # Arguments
266    /// * `ptr` - Memory pointer address
267    /// * `size` - Allocation size in bytes
268    /// * `call_stack` - Full call stack for detailed tracking
269    ///
270    /// # Returns
271    /// Result indicating success or error during tracking/flushing
272    pub fn track_allocation(
273        &mut self,
274        ptr: usize,
275        size: usize,
276        call_stack: &[usize],
277    ) -> Result<(), Box<dyn std::error::Error>> {
278        let call_stack_hash = calculate_call_stack_hash(call_stack);
279        // Update frequency tracking for intelligent sampling
280        let frequency = self
281            .call_stack_frequencies
282            .entry(call_stack_hash)
283            .or_insert(0);
284        *frequency += 1;
285        let current_frequency = *frequency;
286        self.call_stack_sizes.insert(call_stack_hash, size);
287
288        // Update size ranges for this call stack
289        let size_range = self
290            .call_stack_size_ranges
291            .entry(call_stack_hash)
292            .or_insert((size, size));
293        size_range.0 = size_range.0.min(size);
294        size_range.1 = size_range.1.max(size);
295
296        // Update time ranges
297        let timestamp = get_timestamp();
298        let time_range = self
299            .call_stack_time_ranges
300            .entry(call_stack_hash)
301            .or_insert((timestamp, timestamp));
302        time_range.0 = time_range.0.min(timestamp);
303        time_range.1 = time_range.1.max(timestamp);
304
305        // Calculate CPU time elapsed
306        let cpu_time_ns = self.start_time.elapsed().as_nanos() as u64;
307        *self
308            .call_stack_cpu_times
309            .entry(call_stack_hash)
310            .or_insert(0) += cpu_time_ns / 1000; // Rough estimate
311
312        // For demo purposes, force sampling of more allocations
313        if self.should_sample_allocation(size, current_frequency) || current_frequency <= 10 {
314            // Update performance sampling counter
315            self.performance_sample_counter += 1;
316            let _should_collect_enhanced = self.performance_sample_counter % 10 == 0; // Sample every 10th allocation
317
318            // Update thread history for advanced analysis
319            #[cfg(feature = "advanced-analysis")]
320            {
321                self.thread_history
322                    .insert(call_stack_hash, (timestamp, current_frequency));
323            }
324
325            let event = Event {
326                timestamp,
327                ptr,
328                size,
329                call_stack_hash,
330                event_type: EventType::Allocation,
331                thread_id: self.thread_id,
332                call_stack: call_stack.to_vec(),
333                cpu_time_ns,
334                alignment: get_alignment_for_size(size),
335                allocation_category: categorize_allocation(size),
336                thread_name: self.thread_name.clone(),
337                memory_stats: get_memory_stats(),
338
339                // Enhanced data collection (performance-gated)
340                #[cfg(feature = "backtrace")]
341                real_call_stack: if _should_collect_enhanced {
342                    capture_real_call_stack()
343                } else {
344                    None
345                },
346
347                #[cfg(feature = "system-metrics")]
348                system_metrics: if _should_collect_enhanced {
349                    collect_system_metrics()
350                } else {
351                    None
352                },
353
354                #[cfg(feature = "advanced-analysis")]
355                analysis_data: if _should_collect_enhanced {
356                    analyze_allocation_pattern(
357                        size,
358                        current_frequency,
359                        call_stack_hash,
360                        &self.thread_history,
361                    )
362                } else {
363                    None
364                },
365            };
366
367            self.event_buffer.push(event);
368
369            // Flush buffer when full to prevent memory bloat
370            if self.event_buffer.len() >= self.buffer_size {
371                self.flush_buffer()?;
372            }
373        }
374
375        Ok(())
376    }
377
378    /// Tracks deallocation events for memory balance analysis
379    ///
380    /// # Arguments
381    /// * `ptr` - Memory pointer address being deallocated
382    /// * `call_stack` - Full call stack for correlation
383    ///
384    /// # Returns
385    /// Result indicating success or error during tracking/flushing
386    pub fn track_deallocation(
387        &mut self,
388        ptr: usize,
389        call_stack: &[usize],
390    ) -> Result<(), Box<dyn std::error::Error>> {
391        let call_stack_hash = calculate_call_stack_hash(call_stack);
392        // Use consistent sampling logic for deallocations with forced early sampling
393        let frequency = self
394            .call_stack_frequencies
395            .get(&call_stack_hash)
396            .copied()
397            .unwrap_or(1);
398        let size = self
399            .call_stack_sizes
400            .get(&call_stack_hash)
401            .copied()
402            .unwrap_or(0);
403
404        if self.should_sample_allocation(size, frequency) || frequency <= 10 {
405            let timestamp = get_timestamp();
406            let cpu_time_ns = self.start_time.elapsed().as_nanos() as u64;
407
408            // Performance-gated enhanced collection for deallocations too
409            self.performance_sample_counter += 1;
410            let _should_collect_enhanced = self.performance_sample_counter % 20 == 0; // Less frequent for deallocations
411
412            let event = Event {
413                timestamp,
414                ptr,
415                size: 0, // Size not required for deallocation events
416                call_stack_hash,
417                event_type: EventType::Deallocation,
418                thread_id: self.thread_id,
419                call_stack: call_stack.to_vec(),
420                cpu_time_ns,
421                alignment: 0, // Not applicable for deallocations
422                allocation_category: AllocationCategory::Small, // Default for deallocations
423                thread_name: self.thread_name.clone(),
424                memory_stats: get_memory_stats(),
425
426                // Enhanced data collection (performance-gated)
427                #[cfg(feature = "backtrace")]
428                real_call_stack: if _should_collect_enhanced {
429                    capture_real_call_stack()
430                } else {
431                    None
432                },
433
434                #[cfg(feature = "system-metrics")]
435                system_metrics: if _should_collect_enhanced {
436                    collect_system_metrics()
437                } else {
438                    None
439                },
440
441                #[cfg(feature = "advanced-analysis")]
442                analysis_data: if _should_collect_enhanced {
443                    self.thread_history
444                        .get(&call_stack_hash)
445                        .and_then(|(_, freq)| {
446                            analyze_allocation_pattern(
447                                size,
448                                *freq,
449                                call_stack_hash,
450                                &self.thread_history,
451                            )
452                        })
453                } else {
454                    None
455                },
456            };
457
458            self.event_buffer.push(event);
459
460            // Flush buffer when full to prevent memory bloat
461            if self.event_buffer.len() >= self.buffer_size {
462                self.flush_buffer()?;
463            }
464        }
465
466        Ok(())
467    }
468
469    /// Determines sampling decision using dual-dimension analysis
470    ///
471    /// Combines size-based and frequency-based sampling for intelligent allocation tracking.
472    /// Large allocations and high-frequency patterns receive higher sampling rates.
473    ///
474    /// # Arguments
475    /// * `size` - Allocation size in bytes
476    /// * `frequency` - Current frequency count for this call stack
477    ///
478    /// # Returns
479    /// Boolean indicating whether to sample this allocation
480    fn should_sample_allocation(&mut self, size: usize, frequency: u64) -> bool {
481        // Determine base sampling rate by allocation size
482        let size_based_rate = match size {
483            s if s >= self.config.large_threshold => self.config.large_allocation_rate,
484            s if s >= self.config.medium_threshold => self.config.medium_allocation_rate,
485            _ => self.config.small_allocation_rate,
486        };
487
488        // Apply frequency-based boost for performance-critical patterns
489        let frequency_multiplier = if frequency > self.config.frequency_threshold {
490            // High frequency allocations indicate performance hotspots
491            (frequency as f64 / self.config.frequency_threshold as f64).min(10.0)
492        } else {
493            1.0
494        };
495
496        let final_rate = (size_based_rate * frequency_multiplier).min(1.0);
497
498        // For demo config with 100% sampling rates, always sample
499        if size_based_rate >= 1.0 {
500            return true;
501        }
502
503        // Fast linear congruential generator for sampling decision
504        self.rng_state = self.rng_state.wrapping_mul(1103515245).wrapping_add(12345);
505        let random_value = (self.rng_state >> 16) as f64 / 65536.0;
506
507        // For demo purposes, be much more generous with sampling
508        let adjusted_rate = if final_rate > 0.8 {
509            1.0
510        } else {
511            (final_rate * 2.0).min(1.0) // Double the sampling rate
512        };
513
514        random_value < adjusted_rate
515    }
516
517    /// Flush event buffer to binary file
518    fn flush_buffer(&mut self) -> Result<(), Box<dyn std::error::Error>> {
519        if self.event_buffer.is_empty() {
520            return Ok(());
521        }
522
523        // Serialize events using bincode (zero-overhead binary format)
524        let serialized = bincode::encode_to_vec(&self.event_buffer, bincode::config::standard())?;
525
526        // Append to file (create if doesn't exist)
527        use std::fs::OpenOptions;
528        use std::io::Write;
529
530        let mut file = OpenOptions::new()
531            .create(true)
532            .append(true)
533            .open(&self.file_path)?;
534
535        // Write length header (4 bytes) followed by data
536        let len = serialized.len() as u32;
537        file.write_all(&len.to_le_bytes())?;
538        file.write_all(&serialized)?;
539        file.flush()?;
540
541        // Clear buffer
542        self.event_buffer.clear();
543
544        Ok(())
545    }
546
547    /// Export enhanced frequency data at the end of tracking
548    pub fn export_frequency_data(&self) -> Result<(), Box<dyn std::error::Error>> {
549        let frequency_data: Vec<FrequencyData> = self
550            .call_stack_frequencies
551            .iter()
552            .map(|(&call_stack_hash, &frequency)| {
553                let size = self
554                    .call_stack_sizes
555                    .get(&call_stack_hash)
556                    .copied()
557                    .unwrap_or(0);
558                let total_size = size * frequency as usize;
559                let size_range = self
560                    .call_stack_size_ranges
561                    .get(&call_stack_hash)
562                    .copied()
563                    .unwrap_or((size, size));
564                let time_range = self
565                    .call_stack_time_ranges
566                    .get(&call_stack_hash)
567                    .copied()
568                    .unwrap_or((0, 0));
569                let total_cpu_time = self
570                    .call_stack_cpu_times
571                    .get(&call_stack_hash)
572                    .copied()
573                    .unwrap_or(0);
574
575                FrequencyData {
576                    call_stack_hash,
577                    frequency,
578                    total_size,
579                    thread_id: self.thread_id,
580                    avg_size: if frequency > 0 {
581                        total_size as f64 / frequency as f64
582                    } else {
583                        0.0
584                    },
585                    size_range,
586                    time_range,
587                    total_cpu_time,
588                }
589            })
590            .collect();
591
592        let frequency_file = self.file_path.with_extension("freq");
593        let serialized = bincode::encode_to_vec(&frequency_data, bincode::config::standard())?;
594
595        std::fs::write(frequency_file, serialized)?;
596        Ok(())
597    }
598
599    /// Force flush any remaining events
600    pub fn finalize(&mut self) -> Result<(), Box<dyn std::error::Error>> {
601        self.flush_buffer()?;
602        self.export_frequency_data()?;
603        Ok(())
604    }
605}
606
607impl Drop for ThreadLocalTracker {
608    fn drop(&mut self) {
609        // Ensure all data is flushed on drop
610        let _ = self.finalize();
611    }
612}
613
614/// Get unique thread ID
615fn get_thread_id() -> u64 {
616    static THREAD_COUNTER: AtomicU64 = AtomicU64::new(1);
617
618    thread_local! {
619        static THREAD_ID: u64 = THREAD_COUNTER.fetch_add(1, Ordering::Relaxed);
620    }
621
622    THREAD_ID.with(|&id| id)
623}
624
625/// Gets current timestamp in nanoseconds with fallback to zero
626///
627/// Uses system time but handles clock errors gracefully without panicking
628fn get_timestamp() -> u64 {
629    SystemTime::now()
630        .duration_since(UNIX_EPOCH)
631        .map(|duration| duration.as_nanos() as u64)
632        .unwrap_or(0)
633}
634
635/// Determine appropriate alignment for allocation size
636fn get_alignment_for_size(size: usize) -> usize {
637    match size {
638        0..=8 => 8,
639        9..=16 => 16,
640        17..=32 => 32,
641        33..=64 => 64,
642        _ => 64, // Default alignment for larger allocations
643    }
644}
645
646/// Categorize allocation by size
647fn categorize_allocation(size: usize) -> AllocationCategory {
648    match size {
649        0..=2048 => AllocationCategory::Small,
650        2049..=65536 => AllocationCategory::Medium,
651        _ => AllocationCategory::Large,
652    }
653}
654
655/// Capture real call stack with symbols
656#[cfg(feature = "backtrace")]
657fn capture_real_call_stack() -> Option<RealCallStack> {
658    let mut addresses = Vec::new();
659    let mut symbols = Vec::new();
660
661    // Capture backtrace with limited depth for performance
662    backtrace::trace(|frame| {
663        let addr = frame.ip() as usize;
664        addresses.push(addr);
665
666        // Resolve symbols for this frame
667        backtrace::resolve_frame(frame, |symbol| {
668            let function_name = symbol.name().map(|n| format!("{}", n));
669            let filename = symbol.filename().and_then(|f| f.to_str().map(String::from));
670            let line_number = symbol.lineno();
671
672            symbols.push(StackFrame {
673                function_name,
674                filename,
675                line_number,
676                address: addr,
677            });
678        });
679
680        // Limit call stack depth for performance
681        addresses.len() < 16
682    });
683
684    if addresses.is_empty() {
685        None
686    } else {
687        let depth = symbols.len();
688        Some(RealCallStack {
689            addresses,
690            symbols,
691            depth,
692        })
693    }
694}
695
696/// Collect system performance metrics
697#[cfg(feature = "system-metrics")]
698fn collect_system_metrics() -> Option<SystemMetrics> {
699    use sysinfo::{Pid, System};
700
701    // Use thread-local system info to avoid repeated initialization
702    thread_local! {
703        static SYSTEM: std::cell::RefCell<System> = std::cell::RefCell::new(System::new_all());
704    }
705
706    SYSTEM.with(|sys| {
707        let mut system = sys.borrow_mut();
708        system.refresh_cpu_all();
709        system.refresh_memory();
710        system.refresh_processes(sysinfo::ProcessesToUpdate::All, true);
711
712        // Get CPU usage (sysinfo 0.30+ API)
713        let cpu_usage = system.global_cpu_usage();
714        let available_memory = system.available_memory();
715        let total_memory = system.total_memory();
716
717        // Get load average using new API
718        let load_avg = System::load_average();
719
720        // Count processes instead of threads (more reliable)
721        let current_pid = sysinfo::get_current_pid().ok()?;
722        let thread_count = if system
723            .process(Pid::from_u32(current_pid.as_u32()))
724            .is_some()
725        {
726            // Estimate thread count as we can't access tasks directly
727            num_cpus::get()
728        } else {
729            1
730        };
731
732        // Calculate fragmentation ratio estimate
733        let used_memory = total_memory - available_memory;
734        let fragmentation_ratio = if total_memory > 0 {
735            (used_memory as f32 / total_memory as f32).min(1.0)
736        } else {
737            0.0
738        };
739
740        Some(SystemMetrics {
741            cpu_usage,
742            available_memory,
743            total_memory,
744            load_average: (load_avg.one, load_avg.five, load_avg.fifteen),
745            thread_count,
746            fragmentation_ratio,
747        })
748    })
749}
750
751/// Perform advanced analysis on allocation pattern
752#[cfg(feature = "advanced-analysis")]
753fn analyze_allocation_pattern(
754    size: usize,
755    frequency: u64,
756    _call_stack_hash: u64,
757    _thread_history: &HashMap<u64, (u64, u64)>, // (last_time, count)
758) -> Option<AnalysisData> {
759    // Predict allocation lifetime based on size and frequency
760    let predicted_lifetime_ms = match size {
761        0..=1024 => 10 + (frequency * 2), // Small allocations are short-lived
762        1025..=32768 => 100 + (frequency * 5), // Medium allocations
763        _ => 1000 + (frequency * 10),     // Large allocations live longer
764    };
765
766    // Determine frequency pattern
767    let frequency_pattern = match frequency {
768        1..=5 => FrequencyPattern::Sporadic,
769        6..=20 => FrequencyPattern::Regular,
770        21..=100 => FrequencyPattern::Burst,
771        _ => FrequencyPattern::Constant,
772    };
773
774    // Estimate sharing likelihood based on call stack commonality
775    let sharing_likelihood = if frequency > 50 {
776        0.8 // High frequency suggests shared usage
777    } else if frequency > 10 {
778        0.4
779    } else {
780        0.1
781    };
782
783    // Predict access pattern based on size and frequency
784    let access_pattern = match (size, frequency) {
785        (0..=64, f) if f > 100 => AccessPattern::Hotspot,
786        (65..=4096, _) => AccessPattern::Sequential,
787        (_, f) if f > 20 => AccessPattern::Cached,
788        _ => AccessPattern::Random,
789    };
790
791    // Calculate performance impact score
792    let performance_impact =
793        ((frequency.min(100) as f64 * size.min(100000) as f64) / 10000.0) as u8;
794
795    Some(AnalysisData {
796        predicted_lifetime_ms,
797        frequency_pattern,
798        sharing_likelihood,
799        access_pattern,
800        performance_impact,
801    })
802}
803
804/// Get current process memory statistics
805fn get_memory_stats() -> MemoryStats {
806    #[cfg(target_os = "linux")]
807    {
808        // Try to read from /proc/self/status on Linux
809        if let Ok(status) = std::fs::read_to_string("/proc/self/status") {
810            let mut vm_size = 0;
811            let mut vm_rss = 0;
812
813            for line in status.lines() {
814                if line.starts_with("VmSize:") {
815                    if let Some(kb_str) = line.split_whitespace().nth(1) {
816                        vm_size = kb_str.parse::<usize>().unwrap_or(0) * 1024;
817                    }
818                } else if line.starts_with("VmRSS:") {
819                    if let Some(kb_str) = line.split_whitespace().nth(1) {
820                        vm_rss = kb_str.parse::<usize>().unwrap_or(0) * 1024;
821                    }
822                }
823            }
824
825            MemoryStats {
826                virtual_memory: vm_size,
827                resident_memory: vm_rss,
828                heap_memory: vm_rss / 2, // Rough estimate
829                page_faults: 0,          // Would need separate syscall
830            }
831        } else {
832            MemoryStats {
833                virtual_memory: 0,
834                resident_memory: 0,
835                heap_memory: 0,
836                page_faults: 0,
837            }
838        }
839    }
840
841    #[cfg(target_os = "macos")]
842    {
843        // Basic memory estimation for macOS
844        MemoryStats {
845            virtual_memory: 100 * 1024 * 1024, // 100MB estimate
846            resident_memory: 50 * 1024 * 1024, // 50MB estimate
847            heap_memory: 25 * 1024 * 1024,     // 25MB estimate
848            page_faults: 0,
849        }
850    }
851
852    #[cfg(not(any(target_os = "linux", target_os = "macos")))]
853    {
854        // Default fallback for other platforms
855        MemoryStats {
856            virtual_memory: 0,
857            resident_memory: 0,
858            heap_memory: 0,
859            page_faults: 0,
860        }
861    }
862}
863
864/// Calculate hash for call stack (simplified for now)
865pub fn calculate_call_stack_hash(call_stack: &[usize]) -> u64 {
866    let mut hasher = DefaultHasher::new();
867    call_stack.hash(&mut hasher);
868    hasher.finish()
869}
870
871/// Global API functions for thread-local tracking
872/// Initialize thread-local tracker for current thread
873pub fn init_thread_tracker(
874    output_dir: &std::path::Path,
875    config: Option<SamplingConfig>,
876) -> Result<(), Box<dyn std::error::Error>> {
877    let config = config.unwrap_or_default();
878
879    THREAD_TRACKER.with(|tracker| {
880        let mut tracker_ref = tracker.borrow_mut();
881        if tracker_ref.is_none() {
882            *tracker_ref = Some(ThreadLocalTracker::new(output_dir, config)?);
883        }
884        Ok(())
885    })
886}
887
888/// Track allocation in current thread using lock-free approach
889pub fn track_allocation_lockfree(
890    ptr: usize,
891    size: usize,
892    call_stack: &[usize],
893) -> Result<(), Box<dyn std::error::Error>> {
894    THREAD_TRACKER.with(|tracker| {
895        let mut tracker_ref = tracker.borrow_mut();
896        if let Some(ref mut t) = *tracker_ref {
897            t.track_allocation(ptr, size, call_stack)
898        } else {
899            Err("Thread tracker not initialized. Call init_thread_tracker() first.".into())
900        }
901    })
902}
903
904/// Track deallocation in current thread using lock-free approach
905pub fn track_deallocation_lockfree(
906    ptr: usize,
907    call_stack: &[usize],
908) -> Result<(), Box<dyn std::error::Error>> {
909    THREAD_TRACKER.with(|tracker| {
910        let mut tracker_ref = tracker.borrow_mut();
911        if let Some(ref mut t) = *tracker_ref {
912            t.track_deallocation(ptr, call_stack)
913        } else {
914            Err("Thread tracker not initialized. Call init_thread_tracker() first.".into())
915        }
916    })
917}
918
919/// Finalize tracking for current thread
920pub fn finalize_thread_tracker() -> Result<(), Box<dyn std::error::Error>> {
921    THREAD_TRACKER.with(|tracker| {
922        let mut tracker_ref = tracker.borrow_mut();
923        if let Some(ref mut t) = *tracker_ref {
924            t.finalize()
925        } else {
926            Ok(()) // No tracker initialized, nothing to finalize
927        }
928    })
929}
930
931#[cfg(test)]
932mod tests {
933    use super::*;
934    use std::sync::atomic::{AtomicUsize, Ordering};
935    use std::sync::Arc;
936    use std::thread;
937
938    #[test]
939    fn test_thread_local_tracking_basic() {
940        let temp_dir = std::env::temp_dir().join("memscope_test");
941        std::fs::create_dir_all(&temp_dir).unwrap();
942
943        // Get thread ID before initializing tracker
944        let thread_id = get_thread_id();
945
946        let config = SamplingConfig::demo(); // Use demo config for 100% sampling
947        init_thread_tracker(&temp_dir, Some(config)).unwrap();
948
949        let call_stack = vec![0x1000, 0x2000, 0x3000];
950        track_allocation_lockfree(0x4000, 1024, &call_stack)
951            .expect("Allocation tracking should succeed");
952        track_deallocation_lockfree(0x4000, &call_stack)
953            .expect("Deallocation tracking should succeed");
954
955        finalize_thread_tracker().unwrap();
956
957        // Check that files were created using the thread ID we got earlier
958        let event_file = temp_dir.join(format!("memscope_thread_{}.bin", thread_id));
959        let freq_file = temp_dir.join(format!("memscope_thread_{}.freq", thread_id));
960
961        // Debug: list files in directory if assertion fails
962        if !event_file.exists() || !freq_file.exists() {
963            println!("Files in temp directory:");
964            if let Ok(entries) = std::fs::read_dir(&temp_dir) {
965                for entry in entries.flatten() {
966                    println!("  - {:?}", entry.file_name());
967                }
968            }
969        }
970
971        assert!(
972            event_file.exists(),
973            "Event file should exist: {:?}",
974            event_file
975        );
976        assert!(
977            freq_file.exists(),
978            "Frequency file should exist: {:?}",
979            freq_file
980        );
981
982        // Cleanup
983        let _ = std::fs::remove_dir_all(&temp_dir);
984    }
985
986    #[test]
987    fn test_multi_thread_independence() {
988        let temp_dir = std::env::temp_dir().join("memscope_multithread_test");
989        std::fs::create_dir_all(&temp_dir).unwrap();
990
991        let thread_count = 10;
992        let allocations_per_thread = 100;
993        let counter = Arc::new(AtomicUsize::new(0));
994        let thread_ids = Arc::new(std::sync::Mutex::new(Vec::new()));
995
996        let handles: Vec<_> = (0..thread_count)
997            .map(|thread_idx| {
998                let temp_dir = temp_dir.clone();
999                let counter = Arc::clone(&counter);
1000                let thread_ids = Arc::clone(&thread_ids);
1001
1002                thread::spawn(move || {
1003                    // Initialize tracker for this thread
1004                    init_thread_tracker(&temp_dir, None).unwrap();
1005
1006                    // Get and store the actual thread ID
1007                    let actual_thread_id = get_thread_id();
1008                    thread_ids.lock().unwrap().push(actual_thread_id);
1009
1010                    for i in 0..allocations_per_thread {
1011                        let ptr = thread_idx * 10000 + i * 8;
1012                        let size = 64 + (i % 10) * 64; // Varying sizes
1013                        let call_stack = vec![0x1000 + thread_idx, 0x2000 + i];
1014
1015                        track_allocation_lockfree(ptr, size, &call_stack)
1016                            .expect("Allocation tracking should succeed");
1017
1018                        // Simulate some deallocations
1019                        if i % 3 == 0 {
1020                            track_deallocation_lockfree(ptr, &call_stack)
1021                                .expect("Deallocation tracking should succeed");
1022                        }
1023
1024                        counter.fetch_add(1, Ordering::Relaxed);
1025                    }
1026
1027                    finalize_thread_tracker().unwrap();
1028                })
1029            })
1030            .collect();
1031
1032        // Wait for all threads to complete
1033        for handle in handles {
1034            handle.join().unwrap();
1035        }
1036
1037        // Verify all operations completed
1038        assert_eq!(
1039            counter.load(Ordering::Relaxed),
1040            thread_count * allocations_per_thread
1041        );
1042
1043        // Verify each thread created its own files using actual thread IDs
1044        let actual_thread_ids = thread_ids.lock().unwrap();
1045        for &thread_id in actual_thread_ids.iter() {
1046            let event_file = temp_dir.join(format!("memscope_thread_{}.bin", thread_id));
1047            let freq_file = temp_dir.join(format!("memscope_thread_{}.freq", thread_id));
1048
1049            // Debug: list files in directory if assertion fails
1050            if !event_file.exists() || !freq_file.exists() {
1051                println!("Files in temp directory for thread {}:", thread_id);
1052                if let Ok(entries) = std::fs::read_dir(&temp_dir) {
1053                    for entry in entries.flatten() {
1054                        println!("  - {:?}", entry.file_name());
1055                    }
1056                }
1057            }
1058
1059            assert!(
1060                event_file.exists(),
1061                "Event file missing for thread {}",
1062                thread_id
1063            );
1064            assert!(
1065                freq_file.exists(),
1066                "Frequency file missing for thread {}",
1067                thread_id
1068            );
1069        }
1070
1071        // Cleanup
1072        let _ = std::fs::remove_dir_all(&temp_dir);
1073    }
1074
1075    #[test]
1076    fn test_intelligent_sampling() {
1077        let config = SamplingConfig {
1078            small_allocation_rate: 0.1,  // 10% for small
1079            medium_allocation_rate: 0.5, // 50% for medium
1080            large_allocation_rate: 1.0,  // 100% for large
1081            ..Default::default()
1082        };
1083
1084        let temp_dir = std::env::temp_dir().join("memscope_sampling_test");
1085        std::fs::create_dir_all(&temp_dir).unwrap();
1086
1087        let mut tracker = ThreadLocalTracker::new(&temp_dir, config).unwrap();
1088
1089        let mut sampled_small = 0;
1090        let mut sampled_large = 0;
1091        let total_small = 100;
1092        let total_large = 10;
1093
1094        // Test small allocations (should have lower sample rate)
1095        for _i in 0..total_small {
1096            let size = 512; // Small allocation
1097            let was_sampled = tracker.should_sample_allocation(size, 1);
1098            if was_sampled {
1099                sampled_small += 1;
1100            }
1101        }
1102
1103        // Test large allocations (should have higher sample rate)
1104        for _i in 0..total_large {
1105            let size = 20 * 1024; // Large allocation
1106            let was_sampled = tracker.should_sample_allocation(size, 1);
1107            if was_sampled {
1108                sampled_large += 1;
1109            }
1110        }
1111
1112        // Large allocations should be sampled more frequently
1113        assert!(
1114            sampled_large as f64 / total_large as f64 > sampled_small as f64 / total_small as f64
1115        );
1116
1117        // Cleanup
1118        let _ = std::fs::remove_dir_all(&temp_dir);
1119    }
1120}