memscope_rs/analysis/unknown/
analyzer.rs1use crate::analysis::unknown::types::*;
2use crate::capture::types::{AllocationInfo, ImplementationDifficulty};
3use std::collections::HashMap;
4
5pub struct UnknownMemoryAnalyzer {
6 pub known_system_regions: HashMap<(usize, usize), SystemRegionInfo>,
7 pub library_mappings: HashMap<String, LibraryMappingInfo>,
8 pub thread_memory_ranges: HashMap<u64, Vec<(usize, usize)>>,
9}
10
11impl Default for UnknownMemoryAnalyzer {
12 fn default() -> Self {
13 Self::new()
14 }
15}
16
17impl UnknownMemoryAnalyzer {
18 pub fn new() -> Self {
19 Self {
20 known_system_regions: HashMap::new(),
21 library_mappings: HashMap::new(),
22 thread_memory_ranges: HashMap::new(),
23 }
24 }
25
26 pub fn analyze_unknown_regions(
27 &mut self,
28 allocations: &[AllocationInfo],
29 ) -> UnknownMemoryRegionAnalysis {
30 let total_memory: usize = allocations.iter().map(|a| a.size).sum();
31 let unknown_allocations = self.identify_unknown_allocations(allocations);
32 let total_unknown: usize = unknown_allocations.iter().map(|a| a.size).sum();
33 let unknown_percentage = (total_unknown as f64 / total_memory as f64) * 100.0;
34 let unknown_categories = self.categorize_unknown_regions(&unknown_allocations);
35 let potential_causes = self.identify_potential_causes(&unknown_allocations);
36 let reduction_strategies = self.generate_reduction_strategies(&unknown_categories);
37
38 UnknownMemoryRegionAnalysis {
39 total_unknown_bytes: total_unknown,
40 unknown_percentage,
41 unknown_categories,
42 potential_causes,
43 reduction_strategies,
44 }
45 }
46
47 fn identify_unknown_allocations<'a>(
48 &self,
49 allocations: &'a [AllocationInfo],
50 ) -> Vec<&'a AllocationInfo> {
51 allocations
52 .iter()
53 .filter(|alloc| self.is_unknown_allocation(alloc))
54 .collect()
55 }
56
57 fn is_unknown_allocation(&self, allocation: &AllocationInfo) -> bool {
58 if self.is_in_stack_region(allocation.ptr) || self.is_in_heap_region(allocation.ptr) {
59 return false;
60 }
61
62 if self.is_known_system_region(allocation.ptr) {
63 return false;
64 }
65
66 true
67 }
68
69 fn categorize_unknown_regions(
70 &self,
71 unknown_allocations: &[&AllocationInfo],
72 ) -> Vec<UnknownMemoryCategory> {
73 let mut categories = Vec::new();
74
75 let mmap_allocations = self.identify_memory_mapped_regions(unknown_allocations);
76 if !mmap_allocations.is_empty() {
77 categories.push(UnknownMemoryCategory {
78 category_type: UnknownRegionType::MemoryMappedRegions,
79 description: "Memory-mapped files, shared memory, and anonymous mappings"
80 .to_string(),
81 estimated_size: mmap_allocations.iter().map(|a| a.size).sum(),
82 confidence_level: 0.8,
83 examples: self.generate_examples(&mmap_allocations, "Memory mapping"),
84 });
85 }
86
87 let tls_allocations = self.identify_thread_local_storage(unknown_allocations);
88 if !tls_allocations.is_empty() {
89 categories.push(UnknownMemoryCategory {
90 category_type: UnknownRegionType::ThreadLocalStorage,
91 description: "Thread-local storage and thread control blocks".to_string(),
92 estimated_size: tls_allocations.iter().map(|a| a.size).sum(),
93 confidence_level: 0.7,
94 examples: self.generate_examples(&tls_allocations, "Thread-local storage"),
95 });
96 }
97
98 let lib_allocations = self.identify_library_regions(unknown_allocations);
99 if !lib_allocations.is_empty() {
100 categories.push(UnknownMemoryCategory {
101 category_type: UnknownRegionType::DynamicLibraryRegions,
102 description: "Code and data sections of dynamically loaded libraries".to_string(),
103 estimated_size: lib_allocations.iter().map(|a| a.size).sum(),
104 confidence_level: 0.9,
105 examples: self.generate_examples(&lib_allocations, "Dynamic library"),
106 });
107 }
108
109 let ffi_allocations = self.identify_ffi_allocations(unknown_allocations);
110 if !ffi_allocations.is_empty() {
111 categories.push(UnknownMemoryCategory {
112 category_type: UnknownRegionType::ExternalLibraryAllocations,
113 description: "Memory allocated by external C/C++ libraries through FFI".to_string(),
114 estimated_size: ffi_allocations.iter().map(|a| a.size).sum(),
115 confidence_level: 0.6,
116 examples: self.generate_examples(&ffi_allocations, "FFI allocation"),
117 });
118 }
119
120 let system_allocations = self.identify_system_regions(unknown_allocations);
121 if !system_allocations.is_empty() {
122 categories.push(UnknownMemoryCategory {
123 category_type: UnknownRegionType::SystemReservedRegions,
124 description: "Kernel buffers, driver memory, and system caches".to_string(),
125 estimated_size: system_allocations.iter().map(|a| a.size).sum(),
126 confidence_level: 0.5,
127 examples: self.generate_examples(&system_allocations, "System region"),
128 });
129 }
130
131 let pre_tracking = self.identify_pre_tracking_allocations(unknown_allocations);
132 if !pre_tracking.is_empty() {
133 categories.push(UnknownMemoryCategory {
134 category_type: UnknownRegionType::PreTrackingAllocations,
135 description: "Memory allocated before tracking was initialized".to_string(),
136 estimated_size: pre_tracking.iter().map(|a| a.size).sum(),
137 confidence_level: 0.9,
138 examples: self.generate_examples(&pre_tracking, "Pre-tracking"),
139 });
140 }
141
142 categories
143 }
144
145 fn identify_potential_causes(
146 &self,
147 unknown_allocations: &[&AllocationInfo],
148 ) -> Vec<UnknownMemoryCause> {
149 let mut causes = Vec::new();
150
151 for allocation in unknown_allocations {
152 if self.is_likely_ffi_allocation(allocation) {
153 causes.push(UnknownMemoryCause::ForeignFunctionInterface {
154 library_name: self
155 .guess_library_name(allocation)
156 .unwrap_or_else(|| "unknown".to_string()),
157 function_name: None,
158 });
159 }
160 }
161
162 if self.has_memory_mapping_pattern(unknown_allocations) {
163 causes.push(UnknownMemoryCause::MemoryMapping {
164 mapping_type: MappingType::AnonymousMapping,
165 file_path: None,
166 });
167 }
168
169 if self.has_threading_pattern(unknown_allocations) {
170 causes.push(UnknownMemoryCause::ThreadingMemory {
171 thread_id: None,
172 memory_type: ThreadMemoryType::ThreadStack,
173 });
174 }
175
176 causes.push(UnknownMemoryCause::InstrumentationGaps {
177 gap_type: InstrumentationGapType::EarlyBootstrap,
178 description: "Memory allocated during early program initialization".to_string(),
179 });
180
181 causes
182 }
183
184 fn generate_reduction_strategies(
185 &self,
186 _categories: &[UnknownMemoryCategory],
187 ) -> Vec<UnknownRegionReductionStrategy> {
188 vec![
189 UnknownRegionReductionStrategy {
190 strategy_type: ReductionStrategyType::EnhancedInstrumentation,
191 description: "Implement more comprehensive memory tracking hooks".to_string(),
192 implementation_steps: vec![
193 "Hook into mmap/munmap system calls".to_string(),
194 "Intercept malloc/free in all loaded libraries".to_string(),
195 "Track thread creation and destruction".to_string(),
196 "Monitor dynamic library loading".to_string(),
197 ],
198 expected_improvement: 60.0,
199 implementation_difficulty: ImplementationDifficulty::Hard,
200 },
201 UnknownRegionReductionStrategy {
202 strategy_type: ReductionStrategyType::FfiCallInterception,
203 description: "Intercept and track FFI calls to external libraries".to_string(),
204 implementation_steps: vec![
205 "Wrap all extern function calls".to_string(),
206 "Track memory allocations in C libraries".to_string(),
207 "Monitor shared library symbol resolution".to_string(),
208 ],
209 expected_improvement: 25.0,
210 implementation_difficulty: ImplementationDifficulty::Medium,
211 },
212 UnknownRegionReductionStrategy {
213 strategy_type: ReductionStrategyType::MemoryMappingTracking,
214 description: "Track memory mapping operations comprehensively".to_string(),
215 implementation_steps: vec![
216 "Monitor /proc/self/maps changes".to_string(),
217 "Track mmap/mprotect/munmap calls".to_string(),
218 "Analyze virtual memory layout".to_string(),
219 ],
220 expected_improvement: 20.0,
221 implementation_difficulty: ImplementationDifficulty::Medium,
222 },
223 ]
224 }
225
226 fn is_in_stack_region(&self, _ptr: usize) -> bool {
227 false
228 }
229
230 fn is_in_heap_region(&self, _ptr: usize) -> bool {
231 false
232 }
233
234 fn is_known_system_region(&self, ptr: usize) -> bool {
235 self.known_system_regions
236 .iter()
237 .any(|((start, end), _)| ptr >= *start && ptr < *end)
238 }
239
240 fn identify_memory_mapped_regions<'a>(
241 &self,
242 allocations: &[&'a AllocationInfo],
243 ) -> Vec<&'a AllocationInfo> {
244 allocations
245 .iter()
246 .filter(|alloc| self.is_likely_mmap_allocation(alloc))
247 .copied()
248 .collect()
249 }
250
251 fn identify_thread_local_storage<'a>(
252 &self,
253 allocations: &[&'a AllocationInfo],
254 ) -> Vec<&'a AllocationInfo> {
255 allocations
256 .iter()
257 .filter(|alloc| self.is_likely_tls_allocation(alloc))
258 .copied()
259 .collect()
260 }
261
262 fn identify_library_regions<'a>(
263 &self,
264 allocations: &[&'a AllocationInfo],
265 ) -> Vec<&'a AllocationInfo> {
266 allocations
267 .iter()
268 .filter(|alloc| self.is_likely_library_allocation(alloc))
269 .copied()
270 .collect()
271 }
272
273 fn identify_ffi_allocations<'a>(
274 &self,
275 allocations: &[&'a AllocationInfo],
276 ) -> Vec<&'a AllocationInfo> {
277 allocations
278 .iter()
279 .filter(|alloc| self.is_likely_ffi_allocation(alloc))
280 .copied()
281 .collect()
282 }
283
284 fn identify_system_regions<'a>(
285 &self,
286 allocations: &[&'a AllocationInfo],
287 ) -> Vec<&'a AllocationInfo> {
288 allocations
289 .iter()
290 .filter(|alloc| self.is_likely_system_allocation(alloc))
291 .copied()
292 .collect()
293 }
294
295 fn identify_pre_tracking_allocations<'a>(
296 &self,
297 allocations: &[&'a AllocationInfo],
298 ) -> Vec<&'a AllocationInfo> {
299 allocations
300 .iter()
301 .filter(|alloc| self.is_likely_pre_tracking_allocation(alloc))
302 .copied()
303 .collect()
304 }
305
306 #[allow(clippy::manual_is_multiple_of)]
307 fn is_likely_mmap_allocation(&self, allocation: &AllocationInfo) -> bool {
308 allocation.size >= 4096 && allocation.ptr % 4096 == 0
309 }
310
311 fn is_likely_tls_allocation(&self, allocation: &AllocationInfo) -> bool {
312 allocation.size < 1024 && self.is_in_thread_range(allocation.ptr)
313 }
314
315 fn is_likely_library_allocation(&self, allocation: &AllocationInfo) -> bool {
316 self.library_mappings
317 .values()
318 .any(|lib| lib.contains_address(allocation.ptr))
319 }
320
321 fn is_likely_ffi_allocation(&self, allocation: &AllocationInfo) -> bool {
322 allocation.type_name.is_none() && allocation.var_name.is_none()
323 }
324
325 fn is_likely_system_allocation(&self, allocation: &AllocationInfo) -> bool {
326 allocation.ptr < 0x1000 || allocation.ptr > 0x7fff_0000_0000
327 }
328
329 fn is_likely_pre_tracking_allocation(&self, allocation: &AllocationInfo) -> bool {
330 allocation.timestamp_alloc < 1000
331 }
332
333 fn is_in_thread_range(&self, ptr: usize) -> bool {
334 self.thread_memory_ranges.values().any(|ranges| {
335 ranges
336 .iter()
337 .any(|(start, end)| ptr >= *start && ptr < *end)
338 })
339 }
340
341 fn has_memory_mapping_pattern(&self, allocations: &[&AllocationInfo]) -> bool {
342 allocations
343 .iter()
344 .any(|alloc| self.is_likely_mmap_allocation(alloc))
345 }
346
347 fn has_threading_pattern(&self, allocations: &[&AllocationInfo]) -> bool {
348 allocations
349 .iter()
350 .any(|alloc| self.is_likely_tls_allocation(alloc))
351 }
352
353 fn guess_library_name(&self, allocation: &AllocationInfo) -> Option<String> {
354 for (name, info) in &self.library_mappings {
355 if info.contains_address(allocation.ptr) {
356 return Some(name.to_string());
357 }
358 }
359 None
360 }
361
362 fn generate_examples(
363 &self,
364 allocations: &[&AllocationInfo],
365 origin: &str,
366 ) -> Vec<UnknownMemoryExample> {
367 allocations
368 .iter()
369 .take(3)
370 .map(|alloc| UnknownMemoryExample {
371 address_range: (alloc.ptr, alloc.ptr + alloc.size),
372 size: alloc.size,
373 suspected_origin: origin.to_string(),
374 access_pattern: MemoryAccessPattern::Unknown,
375 })
376 .collect()
377 }
378}