memscope-rs 0.2.0

A memory tracking library for Rust applications.
Documentation
use crate::analysis::unknown::types::*;
use crate::capture::types::{AllocationInfo, ImplementationDifficulty};
use std::collections::HashMap;

pub struct UnknownMemoryAnalyzer {
    pub known_system_regions: HashMap<(usize, usize), SystemRegionInfo>,
    pub library_mappings: HashMap<String, LibraryMappingInfo>,
    pub thread_memory_ranges: HashMap<u64, Vec<(usize, usize)>>,
}

impl Default for UnknownMemoryAnalyzer {
    fn default() -> Self {
        Self::new()
    }
}

impl UnknownMemoryAnalyzer {
    pub fn new() -> Self {
        Self {
            known_system_regions: HashMap::new(),
            library_mappings: HashMap::new(),
            thread_memory_ranges: HashMap::new(),
        }
    }

    pub fn analyze_unknown_regions(
        &mut self,
        allocations: &[AllocationInfo],
    ) -> UnknownMemoryRegionAnalysis {
        let total_memory: usize = allocations.iter().map(|a| a.size).sum();
        let unknown_allocations = self.identify_unknown_allocations(allocations);
        let total_unknown: usize = unknown_allocations.iter().map(|a| a.size).sum();
        let unknown_percentage = (total_unknown as f64 / total_memory as f64) * 100.0;
        let unknown_categories = self.categorize_unknown_regions(&unknown_allocations);
        let potential_causes = self.identify_potential_causes(&unknown_allocations);
        let reduction_strategies = self.generate_reduction_strategies(&unknown_categories);

        UnknownMemoryRegionAnalysis {
            total_unknown_bytes: total_unknown,
            unknown_percentage,
            unknown_categories,
            potential_causes,
            reduction_strategies,
        }
    }

    fn identify_unknown_allocations<'a>(
        &self,
        allocations: &'a [AllocationInfo],
    ) -> Vec<&'a AllocationInfo> {
        allocations
            .iter()
            .filter(|alloc| self.is_unknown_allocation(alloc))
            .collect()
    }

    fn is_unknown_allocation(&self, allocation: &AllocationInfo) -> bool {
        if self.is_in_stack_region(allocation.ptr) || self.is_in_heap_region(allocation.ptr) {
            return false;
        }

        if self.is_known_system_region(allocation.ptr) {
            return false;
        }

        true
    }

    fn categorize_unknown_regions(
        &self,
        unknown_allocations: &[&AllocationInfo],
    ) -> Vec<UnknownMemoryCategory> {
        let mut categories = Vec::new();

        let mmap_allocations = self.identify_memory_mapped_regions(unknown_allocations);
        if !mmap_allocations.is_empty() {
            categories.push(UnknownMemoryCategory {
                category_type: UnknownRegionType::MemoryMappedRegions,
                description: "Memory-mapped files, shared memory, and anonymous mappings"
                    .to_string(),
                estimated_size: mmap_allocations.iter().map(|a| a.size).sum(),
                confidence_level: 0.8,
                examples: self.generate_examples(&mmap_allocations, "Memory mapping"),
            });
        }

        let tls_allocations = self.identify_thread_local_storage(unknown_allocations);
        if !tls_allocations.is_empty() {
            categories.push(UnknownMemoryCategory {
                category_type: UnknownRegionType::ThreadLocalStorage,
                description: "Thread-local storage and thread control blocks".to_string(),
                estimated_size: tls_allocations.iter().map(|a| a.size).sum(),
                confidence_level: 0.7,
                examples: self.generate_examples(&tls_allocations, "Thread-local storage"),
            });
        }

        let lib_allocations = self.identify_library_regions(unknown_allocations);
        if !lib_allocations.is_empty() {
            categories.push(UnknownMemoryCategory {
                category_type: UnknownRegionType::DynamicLibraryRegions,
                description: "Code and data sections of dynamically loaded libraries".to_string(),
                estimated_size: lib_allocations.iter().map(|a| a.size).sum(),
                confidence_level: 0.9,
                examples: self.generate_examples(&lib_allocations, "Dynamic library"),
            });
        }

        let ffi_allocations = self.identify_ffi_allocations(unknown_allocations);
        if !ffi_allocations.is_empty() {
            categories.push(UnknownMemoryCategory {
                category_type: UnknownRegionType::ExternalLibraryAllocations,
                description: "Memory allocated by external C/C++ libraries through FFI".to_string(),
                estimated_size: ffi_allocations.iter().map(|a| a.size).sum(),
                confidence_level: 0.6,
                examples: self.generate_examples(&ffi_allocations, "FFI allocation"),
            });
        }

        let system_allocations = self.identify_system_regions(unknown_allocations);
        if !system_allocations.is_empty() {
            categories.push(UnknownMemoryCategory {
                category_type: UnknownRegionType::SystemReservedRegions,
                description: "Kernel buffers, driver memory, and system caches".to_string(),
                estimated_size: system_allocations.iter().map(|a| a.size).sum(),
                confidence_level: 0.5,
                examples: self.generate_examples(&system_allocations, "System region"),
            });
        }

        let pre_tracking = self.identify_pre_tracking_allocations(unknown_allocations);
        if !pre_tracking.is_empty() {
            categories.push(UnknownMemoryCategory {
                category_type: UnknownRegionType::PreTrackingAllocations,
                description: "Memory allocated before tracking was initialized".to_string(),
                estimated_size: pre_tracking.iter().map(|a| a.size).sum(),
                confidence_level: 0.9,
                examples: self.generate_examples(&pre_tracking, "Pre-tracking"),
            });
        }

        categories
    }

    fn identify_potential_causes(
        &self,
        unknown_allocations: &[&AllocationInfo],
    ) -> Vec<UnknownMemoryCause> {
        let mut causes = Vec::new();

        for allocation in unknown_allocations {
            if self.is_likely_ffi_allocation(allocation) {
                causes.push(UnknownMemoryCause::ForeignFunctionInterface {
                    library_name: self
                        .guess_library_name(allocation)
                        .unwrap_or_else(|| "unknown".to_string()),
                    function_name: None,
                });
            }
        }

        if self.has_memory_mapping_pattern(unknown_allocations) {
            causes.push(UnknownMemoryCause::MemoryMapping {
                mapping_type: MappingType::AnonymousMapping,
                file_path: None,
            });
        }

        if self.has_threading_pattern(unknown_allocations) {
            causes.push(UnknownMemoryCause::ThreadingMemory {
                thread_id: None,
                memory_type: ThreadMemoryType::ThreadStack,
            });
        }

        causes.push(UnknownMemoryCause::InstrumentationGaps {
            gap_type: InstrumentationGapType::EarlyBootstrap,
            description: "Memory allocated during early program initialization".to_string(),
        });

        causes
    }

    fn generate_reduction_strategies(
        &self,
        _categories: &[UnknownMemoryCategory],
    ) -> Vec<UnknownRegionReductionStrategy> {
        vec![
            UnknownRegionReductionStrategy {
                strategy_type: ReductionStrategyType::EnhancedInstrumentation,
                description: "Implement more comprehensive memory tracking hooks".to_string(),
                implementation_steps: vec![
                    "Hook into mmap/munmap system calls".to_string(),
                    "Intercept malloc/free in all loaded libraries".to_string(),
                    "Track thread creation and destruction".to_string(),
                    "Monitor dynamic library loading".to_string(),
                ],
                expected_improvement: 60.0,
                implementation_difficulty: ImplementationDifficulty::Hard,
            },
            UnknownRegionReductionStrategy {
                strategy_type: ReductionStrategyType::FfiCallInterception,
                description: "Intercept and track FFI calls to external libraries".to_string(),
                implementation_steps: vec![
                    "Wrap all extern function calls".to_string(),
                    "Track memory allocations in C libraries".to_string(),
                    "Monitor shared library symbol resolution".to_string(),
                ],
                expected_improvement: 25.0,
                implementation_difficulty: ImplementationDifficulty::Medium,
            },
            UnknownRegionReductionStrategy {
                strategy_type: ReductionStrategyType::MemoryMappingTracking,
                description: "Track memory mapping operations comprehensively".to_string(),
                implementation_steps: vec![
                    "Monitor /proc/self/maps changes".to_string(),
                    "Track mmap/mprotect/munmap calls".to_string(),
                    "Analyze virtual memory layout".to_string(),
                ],
                expected_improvement: 20.0,
                implementation_difficulty: ImplementationDifficulty::Medium,
            },
        ]
    }

    fn is_in_stack_region(&self, _ptr: usize) -> bool {
        false
    }

    fn is_in_heap_region(&self, _ptr: usize) -> bool {
        false
    }

    fn is_known_system_region(&self, ptr: usize) -> bool {
        self.known_system_regions
            .iter()
            .any(|((start, end), _)| ptr >= *start && ptr < *end)
    }

    fn identify_memory_mapped_regions<'a>(
        &self,
        allocations: &[&'a AllocationInfo],
    ) -> Vec<&'a AllocationInfo> {
        allocations
            .iter()
            .filter(|alloc| self.is_likely_mmap_allocation(alloc))
            .copied()
            .collect()
    }

    fn identify_thread_local_storage<'a>(
        &self,
        allocations: &[&'a AllocationInfo],
    ) -> Vec<&'a AllocationInfo> {
        allocations
            .iter()
            .filter(|alloc| self.is_likely_tls_allocation(alloc))
            .copied()
            .collect()
    }

    fn identify_library_regions<'a>(
        &self,
        allocations: &[&'a AllocationInfo],
    ) -> Vec<&'a AllocationInfo> {
        allocations
            .iter()
            .filter(|alloc| self.is_likely_library_allocation(alloc))
            .copied()
            .collect()
    }

    fn identify_ffi_allocations<'a>(
        &self,
        allocations: &[&'a AllocationInfo],
    ) -> Vec<&'a AllocationInfo> {
        allocations
            .iter()
            .filter(|alloc| self.is_likely_ffi_allocation(alloc))
            .copied()
            .collect()
    }

    fn identify_system_regions<'a>(
        &self,
        allocations: &[&'a AllocationInfo],
    ) -> Vec<&'a AllocationInfo> {
        allocations
            .iter()
            .filter(|alloc| self.is_likely_system_allocation(alloc))
            .copied()
            .collect()
    }

    fn identify_pre_tracking_allocations<'a>(
        &self,
        allocations: &[&'a AllocationInfo],
    ) -> Vec<&'a AllocationInfo> {
        allocations
            .iter()
            .filter(|alloc| self.is_likely_pre_tracking_allocation(alloc))
            .copied()
            .collect()
    }

    #[allow(clippy::manual_is_multiple_of)]
    fn is_likely_mmap_allocation(&self, allocation: &AllocationInfo) -> bool {
        allocation.size >= 4096 && allocation.ptr % 4096 == 0
    }

    fn is_likely_tls_allocation(&self, allocation: &AllocationInfo) -> bool {
        allocation.size < 1024 && self.is_in_thread_range(allocation.ptr)
    }

    fn is_likely_library_allocation(&self, allocation: &AllocationInfo) -> bool {
        self.library_mappings
            .values()
            .any(|lib| lib.contains_address(allocation.ptr))
    }

    fn is_likely_ffi_allocation(&self, allocation: &AllocationInfo) -> bool {
        allocation.type_name.is_none() && allocation.var_name.is_none()
    }

    fn is_likely_system_allocation(&self, allocation: &AllocationInfo) -> bool {
        allocation.ptr < 0x1000 || allocation.ptr > 0x7fff_0000_0000
    }

    fn is_likely_pre_tracking_allocation(&self, allocation: &AllocationInfo) -> bool {
        allocation.timestamp_alloc < 1000
    }

    fn is_in_thread_range(&self, ptr: usize) -> bool {
        self.thread_memory_ranges.values().any(|ranges| {
            ranges
                .iter()
                .any(|(start, end)| ptr >= *start && ptr < *end)
        })
    }

    fn has_memory_mapping_pattern(&self, allocations: &[&AllocationInfo]) -> bool {
        allocations
            .iter()
            .any(|alloc| self.is_likely_mmap_allocation(alloc))
    }

    fn has_threading_pattern(&self, allocations: &[&AllocationInfo]) -> bool {
        allocations
            .iter()
            .any(|alloc| self.is_likely_tls_allocation(alloc))
    }

    fn guess_library_name(&self, allocation: &AllocationInfo) -> Option<String> {
        for (name, info) in &self.library_mappings {
            if info.contains_address(allocation.ptr) {
                return Some(name.to_string());
            }
        }
        None
    }

    fn generate_examples(
        &self,
        allocations: &[&AllocationInfo],
        origin: &str,
    ) -> Vec<UnknownMemoryExample> {
        allocations
            .iter()
            .take(3)
            .map(|alloc| UnknownMemoryExample {
                address_range: (alloc.ptr, alloc.ptr + alloc.size),
                size: alloc.size,
                suspected_origin: origin.to_string(),
                access_pattern: MemoryAccessPattern::Unknown,
            })
            .collect()
    }
}