Skip to main content

memscope_rs/analysis/unknown/
analyzer.rs

1use crate::analysis::unknown::types::*;
2use crate::capture::types::{AllocationInfo, ImplementationDifficulty};
3use std::collections::HashMap;
4
5pub struct UnknownMemoryAnalyzer {
6    pub known_system_regions: HashMap<(usize, usize), SystemRegionInfo>,
7    pub library_mappings: HashMap<String, LibraryMappingInfo>,
8    pub thread_memory_ranges: HashMap<u64, Vec<(usize, usize)>>,
9}
10
11impl Default for UnknownMemoryAnalyzer {
12    fn default() -> Self {
13        Self::new()
14    }
15}
16
17impl UnknownMemoryAnalyzer {
18    pub fn new() -> Self {
19        Self {
20            known_system_regions: HashMap::new(),
21            library_mappings: HashMap::new(),
22            thread_memory_ranges: HashMap::new(),
23        }
24    }
25
26    pub fn analyze_unknown_regions(
27        &mut self,
28        allocations: &[AllocationInfo],
29    ) -> UnknownMemoryRegionAnalysis {
30        let total_memory: usize = allocations.iter().map(|a| a.size).sum();
31        let unknown_allocations = self.identify_unknown_allocations(allocations);
32        let total_unknown: usize = unknown_allocations.iter().map(|a| a.size).sum();
33        let unknown_percentage = (total_unknown as f64 / total_memory as f64) * 100.0;
34        let unknown_categories = self.categorize_unknown_regions(&unknown_allocations);
35        let potential_causes = self.identify_potential_causes(&unknown_allocations);
36        let reduction_strategies = self.generate_reduction_strategies(&unknown_categories);
37
38        UnknownMemoryRegionAnalysis {
39            total_unknown_bytes: total_unknown,
40            unknown_percentage,
41            unknown_categories,
42            potential_causes,
43            reduction_strategies,
44        }
45    }
46
47    fn identify_unknown_allocations<'a>(
48        &self,
49        allocations: &'a [AllocationInfo],
50    ) -> Vec<&'a AllocationInfo> {
51        allocations
52            .iter()
53            .filter(|alloc| self.is_unknown_allocation(alloc))
54            .collect()
55    }
56
57    fn is_unknown_allocation(&self, allocation: &AllocationInfo) -> bool {
58        if self.is_in_stack_region(allocation.ptr) || self.is_in_heap_region(allocation.ptr) {
59            return false;
60        }
61
62        if self.is_known_system_region(allocation.ptr) {
63            return false;
64        }
65
66        true
67    }
68
69    fn categorize_unknown_regions(
70        &self,
71        unknown_allocations: &[&AllocationInfo],
72    ) -> Vec<UnknownMemoryCategory> {
73        let mut categories = Vec::new();
74
75        let mmap_allocations = self.identify_memory_mapped_regions(unknown_allocations);
76        if !mmap_allocations.is_empty() {
77            categories.push(UnknownMemoryCategory {
78                category_type: UnknownRegionType::MemoryMappedRegions,
79                description: "Memory-mapped files, shared memory, and anonymous mappings"
80                    .to_string(),
81                estimated_size: mmap_allocations.iter().map(|a| a.size).sum(),
82                confidence_level: 0.8,
83                examples: self.generate_examples(&mmap_allocations, "Memory mapping"),
84            });
85        }
86
87        let tls_allocations = self.identify_thread_local_storage(unknown_allocations);
88        if !tls_allocations.is_empty() {
89            categories.push(UnknownMemoryCategory {
90                category_type: UnknownRegionType::ThreadLocalStorage,
91                description: "Thread-local storage and thread control blocks".to_string(),
92                estimated_size: tls_allocations.iter().map(|a| a.size).sum(),
93                confidence_level: 0.7,
94                examples: self.generate_examples(&tls_allocations, "Thread-local storage"),
95            });
96        }
97
98        let lib_allocations = self.identify_library_regions(unknown_allocations);
99        if !lib_allocations.is_empty() {
100            categories.push(UnknownMemoryCategory {
101                category_type: UnknownRegionType::DynamicLibraryRegions,
102                description: "Code and data sections of dynamically loaded libraries".to_string(),
103                estimated_size: lib_allocations.iter().map(|a| a.size).sum(),
104                confidence_level: 0.9,
105                examples: self.generate_examples(&lib_allocations, "Dynamic library"),
106            });
107        }
108
109        let ffi_allocations = self.identify_ffi_allocations(unknown_allocations);
110        if !ffi_allocations.is_empty() {
111            categories.push(UnknownMemoryCategory {
112                category_type: UnknownRegionType::ExternalLibraryAllocations,
113                description: "Memory allocated by external C/C++ libraries through FFI".to_string(),
114                estimated_size: ffi_allocations.iter().map(|a| a.size).sum(),
115                confidence_level: 0.6,
116                examples: self.generate_examples(&ffi_allocations, "FFI allocation"),
117            });
118        }
119
120        let system_allocations = self.identify_system_regions(unknown_allocations);
121        if !system_allocations.is_empty() {
122            categories.push(UnknownMemoryCategory {
123                category_type: UnknownRegionType::SystemReservedRegions,
124                description: "Kernel buffers, driver memory, and system caches".to_string(),
125                estimated_size: system_allocations.iter().map(|a| a.size).sum(),
126                confidence_level: 0.5,
127                examples: self.generate_examples(&system_allocations, "System region"),
128            });
129        }
130
131        let pre_tracking = self.identify_pre_tracking_allocations(unknown_allocations);
132        if !pre_tracking.is_empty() {
133            categories.push(UnknownMemoryCategory {
134                category_type: UnknownRegionType::PreTrackingAllocations,
135                description: "Memory allocated before tracking was initialized".to_string(),
136                estimated_size: pre_tracking.iter().map(|a| a.size).sum(),
137                confidence_level: 0.9,
138                examples: self.generate_examples(&pre_tracking, "Pre-tracking"),
139            });
140        }
141
142        categories
143    }
144
145    fn identify_potential_causes(
146        &self,
147        unknown_allocations: &[&AllocationInfo],
148    ) -> Vec<UnknownMemoryCause> {
149        let mut causes = Vec::new();
150
151        for allocation in unknown_allocations {
152            if self.is_likely_ffi_allocation(allocation) {
153                causes.push(UnknownMemoryCause::ForeignFunctionInterface {
154                    library_name: self
155                        .guess_library_name(allocation)
156                        .unwrap_or_else(|| "unknown".to_string()),
157                    function_name: None,
158                });
159            }
160        }
161
162        if self.has_memory_mapping_pattern(unknown_allocations) {
163            causes.push(UnknownMemoryCause::MemoryMapping {
164                mapping_type: MappingType::AnonymousMapping,
165                file_path: None,
166            });
167        }
168
169        if self.has_threading_pattern(unknown_allocations) {
170            causes.push(UnknownMemoryCause::ThreadingMemory {
171                thread_id: None,
172                memory_type: ThreadMemoryType::ThreadStack,
173            });
174        }
175
176        causes.push(UnknownMemoryCause::InstrumentationGaps {
177            gap_type: InstrumentationGapType::EarlyBootstrap,
178            description: "Memory allocated during early program initialization".to_string(),
179        });
180
181        causes
182    }
183
184    fn generate_reduction_strategies(
185        &self,
186        _categories: &[UnknownMemoryCategory],
187    ) -> Vec<UnknownRegionReductionStrategy> {
188        vec![
189            UnknownRegionReductionStrategy {
190                strategy_type: ReductionStrategyType::EnhancedInstrumentation,
191                description: "Implement more comprehensive memory tracking hooks".to_string(),
192                implementation_steps: vec![
193                    "Hook into mmap/munmap system calls".to_string(),
194                    "Intercept malloc/free in all loaded libraries".to_string(),
195                    "Track thread creation and destruction".to_string(),
196                    "Monitor dynamic library loading".to_string(),
197                ],
198                expected_improvement: 60.0,
199                implementation_difficulty: ImplementationDifficulty::Hard,
200            },
201            UnknownRegionReductionStrategy {
202                strategy_type: ReductionStrategyType::FfiCallInterception,
203                description: "Intercept and track FFI calls to external libraries".to_string(),
204                implementation_steps: vec![
205                    "Wrap all extern function calls".to_string(),
206                    "Track memory allocations in C libraries".to_string(),
207                    "Monitor shared library symbol resolution".to_string(),
208                ],
209                expected_improvement: 25.0,
210                implementation_difficulty: ImplementationDifficulty::Medium,
211            },
212            UnknownRegionReductionStrategy {
213                strategy_type: ReductionStrategyType::MemoryMappingTracking,
214                description: "Track memory mapping operations comprehensively".to_string(),
215                implementation_steps: vec![
216                    "Monitor /proc/self/maps changes".to_string(),
217                    "Track mmap/mprotect/munmap calls".to_string(),
218                    "Analyze virtual memory layout".to_string(),
219                ],
220                expected_improvement: 20.0,
221                implementation_difficulty: ImplementationDifficulty::Medium,
222            },
223        ]
224    }
225
226    fn is_in_stack_region(&self, _ptr: usize) -> bool {
227        false
228    }
229
230    fn is_in_heap_region(&self, _ptr: usize) -> bool {
231        false
232    }
233
234    fn is_known_system_region(&self, ptr: usize) -> bool {
235        self.known_system_regions
236            .iter()
237            .any(|((start, end), _)| ptr >= *start && ptr < *end)
238    }
239
240    fn identify_memory_mapped_regions<'a>(
241        &self,
242        allocations: &[&'a AllocationInfo],
243    ) -> Vec<&'a AllocationInfo> {
244        allocations
245            .iter()
246            .filter(|alloc| self.is_likely_mmap_allocation(alloc))
247            .copied()
248            .collect()
249    }
250
251    fn identify_thread_local_storage<'a>(
252        &self,
253        allocations: &[&'a AllocationInfo],
254    ) -> Vec<&'a AllocationInfo> {
255        allocations
256            .iter()
257            .filter(|alloc| self.is_likely_tls_allocation(alloc))
258            .copied()
259            .collect()
260    }
261
262    fn identify_library_regions<'a>(
263        &self,
264        allocations: &[&'a AllocationInfo],
265    ) -> Vec<&'a AllocationInfo> {
266        allocations
267            .iter()
268            .filter(|alloc| self.is_likely_library_allocation(alloc))
269            .copied()
270            .collect()
271    }
272
273    fn identify_ffi_allocations<'a>(
274        &self,
275        allocations: &[&'a AllocationInfo],
276    ) -> Vec<&'a AllocationInfo> {
277        allocations
278            .iter()
279            .filter(|alloc| self.is_likely_ffi_allocation(alloc))
280            .copied()
281            .collect()
282    }
283
284    fn identify_system_regions<'a>(
285        &self,
286        allocations: &[&'a AllocationInfo],
287    ) -> Vec<&'a AllocationInfo> {
288        allocations
289            .iter()
290            .filter(|alloc| self.is_likely_system_allocation(alloc))
291            .copied()
292            .collect()
293    }
294
295    fn identify_pre_tracking_allocations<'a>(
296        &self,
297        allocations: &[&'a AllocationInfo],
298    ) -> Vec<&'a AllocationInfo> {
299        allocations
300            .iter()
301            .filter(|alloc| self.is_likely_pre_tracking_allocation(alloc))
302            .copied()
303            .collect()
304    }
305
306    #[allow(clippy::manual_is_multiple_of)]
307    fn is_likely_mmap_allocation(&self, allocation: &AllocationInfo) -> bool {
308        allocation.size >= 4096 && allocation.ptr % 4096 == 0
309    }
310
311    fn is_likely_tls_allocation(&self, allocation: &AllocationInfo) -> bool {
312        allocation.size < 1024 && self.is_in_thread_range(allocation.ptr)
313    }
314
315    fn is_likely_library_allocation(&self, allocation: &AllocationInfo) -> bool {
316        self.library_mappings
317            .values()
318            .any(|lib| lib.contains_address(allocation.ptr))
319    }
320
321    fn is_likely_ffi_allocation(&self, allocation: &AllocationInfo) -> bool {
322        allocation.type_name.is_none() && allocation.var_name.is_none()
323    }
324
325    fn is_likely_system_allocation(&self, allocation: &AllocationInfo) -> bool {
326        allocation.ptr < 0x1000 || allocation.ptr > 0x7fff_0000_0000
327    }
328
329    fn is_likely_pre_tracking_allocation(&self, allocation: &AllocationInfo) -> bool {
330        allocation.timestamp_alloc < 1000
331    }
332
333    fn is_in_thread_range(&self, ptr: usize) -> bool {
334        self.thread_memory_ranges.values().any(|ranges| {
335            ranges
336                .iter()
337                .any(|(start, end)| ptr >= *start && ptr < *end)
338        })
339    }
340
341    fn has_memory_mapping_pattern(&self, allocations: &[&AllocationInfo]) -> bool {
342        allocations
343            .iter()
344            .any(|alloc| self.is_likely_mmap_allocation(alloc))
345    }
346
347    fn has_threading_pattern(&self, allocations: &[&AllocationInfo]) -> bool {
348        allocations
349            .iter()
350            .any(|alloc| self.is_likely_tls_allocation(alloc))
351    }
352
353    fn guess_library_name(&self, allocation: &AllocationInfo) -> Option<String> {
354        for (name, info) in &self.library_mappings {
355            if info.contains_address(allocation.ptr) {
356                return Some(name.to_string());
357            }
358        }
359        None
360    }
361
362    fn generate_examples(
363        &self,
364        allocations: &[&AllocationInfo],
365        origin: &str,
366    ) -> Vec<UnknownMemoryExample> {
367        allocations
368            .iter()
369            .take(3)
370            .map(|alloc| UnknownMemoryExample {
371                address_range: (alloc.ptr, alloc.ptr + alloc.size),
372                size: alloc.size,
373                suspected_origin: origin.to_string(),
374                access_pattern: MemoryAccessPattern::Unknown,
375            })
376            .collect()
377    }
378}