1use crate::analysis::unknown::types::*;
2use crate::capture::types::{AllocationInfo, ImplementationDifficulty};
3use std::collections::HashMap;
4
5pub struct UnknownMemoryAnalyzer {
6 pub known_system_regions: HashMap<(usize, usize), SystemRegionInfo>,
7 pub library_mappings: HashMap<String, LibraryMappingInfo>,
8 pub thread_memory_ranges: HashMap<u64, Vec<(usize, usize)>>,
9}
10
11impl Default for UnknownMemoryAnalyzer {
12 fn default() -> Self {
13 Self::new()
14 }
15}
16
17impl UnknownMemoryAnalyzer {
18 pub fn new() -> Self {
19 Self {
20 known_system_regions: HashMap::new(),
21 library_mappings: HashMap::new(),
22 thread_memory_ranges: HashMap::new(),
23 }
24 }
25
26 pub fn analyze_unknown_regions(
27 &mut self,
28 allocations: &[AllocationInfo],
29 ) -> UnknownMemoryRegionAnalysis {
30 let total_memory: usize = allocations.iter().map(|a| a.size).sum();
31 let unknown_allocations = self.identify_unknown_allocations(allocations);
32 let total_unknown: usize = unknown_allocations.iter().map(|a| a.size).sum();
33 let unknown_percentage = (total_unknown as f64 / total_memory as f64) * 100.0;
34 let unknown_categories = self.categorize_unknown_regions(&unknown_allocations);
35 let potential_causes = self.identify_potential_causes(&unknown_allocations);
36 let reduction_strategies = self.generate_reduction_strategies(&unknown_categories);
37
38 UnknownMemoryRegionAnalysis {
39 total_unknown_bytes: total_unknown,
40 unknown_percentage,
41 unknown_categories,
42 potential_causes,
43 reduction_strategies,
44 }
45 }
46
47 fn identify_unknown_allocations<'a>(
48 &self,
49 allocations: &'a [AllocationInfo],
50 ) -> Vec<&'a AllocationInfo> {
51 allocations
52 .iter()
53 .filter(|alloc| self.is_unknown_allocation(alloc))
54 .collect()
55 }
56
57 fn is_unknown_allocation(&self, allocation: &AllocationInfo) -> bool {
58 if self.is_in_stack_region(allocation.ptr) || self.is_in_heap_region(allocation.ptr) {
59 return false;
60 }
61
62 if self.is_known_system_region(allocation.ptr) {
63 return false;
64 }
65
66 true
67 }
68
69 fn categorize_unknown_regions(
70 &self,
71 unknown_allocations: &[&AllocationInfo],
72 ) -> Vec<UnknownMemoryCategory> {
73 let mut categories = Vec::new();
74
75 let mmap_allocations = self.identify_memory_mapped_regions(unknown_allocations);
76 if !mmap_allocations.is_empty() {
77 categories.push(UnknownMemoryCategory {
78 category_type: UnknownRegionType::MemoryMappedRegions,
79 description: "Memory-mapped files, shared memory, and anonymous mappings"
80 .to_string(),
81 estimated_size: mmap_allocations.iter().map(|a| a.size).sum(),
82 confidence_level: 0.8,
83 examples: self.generate_examples(&mmap_allocations, "Memory mapping"),
84 });
85 }
86
87 let tls_allocations = self.identify_thread_local_storage(unknown_allocations);
88 if !tls_allocations.is_empty() {
89 categories.push(UnknownMemoryCategory {
90 category_type: UnknownRegionType::ThreadLocalStorage,
91 description: "Thread-local storage and thread control blocks".to_string(),
92 estimated_size: tls_allocations.iter().map(|a| a.size).sum(),
93 confidence_level: 0.7,
94 examples: self.generate_examples(&tls_allocations, "Thread-local storage"),
95 });
96 }
97
98 let lib_allocations = self.identify_library_regions(unknown_allocations);
99 if !lib_allocations.is_empty() {
100 categories.push(UnknownMemoryCategory {
101 category_type: UnknownRegionType::DynamicLibraryRegions,
102 description: "Code and data sections of dynamically loaded libraries".to_string(),
103 estimated_size: lib_allocations.iter().map(|a| a.size).sum(),
104 confidence_level: 0.9,
105 examples: self.generate_examples(&lib_allocations, "Dynamic library"),
106 });
107 }
108
109 let ffi_allocations = self.identify_ffi_allocations(unknown_allocations);
110 if !ffi_allocations.is_empty() {
111 categories.push(UnknownMemoryCategory {
112 category_type: UnknownRegionType::ExternalLibraryAllocations,
113 description: "Memory allocated by external C/C++ libraries through FFI".to_string(),
114 estimated_size: ffi_allocations.iter().map(|a| a.size).sum(),
115 confidence_level: 0.6,
116 examples: self.generate_examples(&ffi_allocations, "FFI allocation"),
117 });
118 }
119
120 let system_allocations = self.identify_system_regions(unknown_allocations);
121 if !system_allocations.is_empty() {
122 categories.push(UnknownMemoryCategory {
123 category_type: UnknownRegionType::SystemReservedRegions,
124 description: "Kernel buffers, driver memory, and system caches".to_string(),
125 estimated_size: system_allocations.iter().map(|a| a.size).sum(),
126 confidence_level: 0.5,
127 examples: self.generate_examples(&system_allocations, "System region"),
128 });
129 }
130
131 let pre_tracking = self.identify_pre_tracking_allocations(unknown_allocations);
132 if !pre_tracking.is_empty() {
133 categories.push(UnknownMemoryCategory {
134 category_type: UnknownRegionType::PreTrackingAllocations,
135 description: "Memory allocated before tracking was initialized".to_string(),
136 estimated_size: pre_tracking.iter().map(|a| a.size).sum(),
137 confidence_level: 0.9,
138 examples: self.generate_examples(&pre_tracking, "Pre-tracking"),
139 });
140 }
141
142 categories
143 }
144
145 fn identify_potential_causes(
146 &self,
147 unknown_allocations: &[&AllocationInfo],
148 ) -> Vec<UnknownMemoryCause> {
149 let mut causes = Vec::new();
150
151 for allocation in unknown_allocations {
152 if self.is_likely_ffi_allocation(allocation) {
153 causes.push(UnknownMemoryCause::ForeignFunctionInterface {
154 library_name: self
155 .guess_library_name(allocation)
156 .unwrap_or_else(|| "unknown".to_string()),
157 function_name: None,
158 });
159 }
160 }
161
162 if self.has_memory_mapping_pattern(unknown_allocations) {
163 causes.push(UnknownMemoryCause::MemoryMapping {
164 mapping_type: MappingType::AnonymousMapping,
165 file_path: None,
166 });
167 }
168
169 if self.has_threading_pattern(unknown_allocations) {
170 causes.push(UnknownMemoryCause::ThreadingMemory {
171 thread_id: None,
172 memory_type: ThreadMemoryType::ThreadStack,
173 });
174 }
175
176 causes.push(UnknownMemoryCause::InstrumentationGaps {
177 gap_type: InstrumentationGapType::EarlyBootstrap,
178 description: "Memory allocated during early program initialization".to_string(),
179 });
180
181 causes
182 }
183
184 fn generate_reduction_strategies(
185 &self,
186 _categories: &[UnknownMemoryCategory],
187 ) -> Vec<UnknownRegionReductionStrategy> {
188 vec![
189 UnknownRegionReductionStrategy {
190 strategy_type: ReductionStrategyType::EnhancedInstrumentation,
191 description: "Implement more comprehensive memory tracking hooks".to_string(),
192 implementation_steps: vec![
193 "Hook into mmap/munmap system calls".to_string(),
194 "Intercept malloc/free in all loaded libraries".to_string(),
195 "Track thread creation and destruction".to_string(),
196 "Monitor dynamic library loading".to_string(),
197 ],
198 expected_improvement: 60.0,
199 implementation_difficulty: ImplementationDifficulty::Hard,
200 },
201 UnknownRegionReductionStrategy {
202 strategy_type: ReductionStrategyType::FfiCallInterception,
203 description: "Intercept and track FFI calls to external libraries".to_string(),
204 implementation_steps: vec![
205 "Wrap all extern function calls".to_string(),
206 "Track memory allocations in C libraries".to_string(),
207 "Monitor shared library symbol resolution".to_string(),
208 ],
209 expected_improvement: 25.0,
210 implementation_difficulty: ImplementationDifficulty::Medium,
211 },
212 UnknownRegionReductionStrategy {
213 strategy_type: ReductionStrategyType::MemoryMappingTracking,
214 description: "Track memory mapping operations comprehensively".to_string(),
215 implementation_steps: vec![
216 "Monitor /proc/self/maps changes".to_string(),
217 "Track mmap/mprotect/munmap calls".to_string(),
218 "Analyze virtual memory layout".to_string(),
219 ],
220 expected_improvement: 20.0,
221 implementation_difficulty: ImplementationDifficulty::Medium,
222 },
223 ]
224 }
225
226 fn is_in_stack_region(&self, _ptr: usize) -> bool {
227 false
228 }
229
230 fn is_in_heap_region(&self, _ptr: usize) -> bool {
231 false
232 }
233
234 fn is_known_system_region(&self, ptr: usize) -> bool {
235 self.known_system_regions
236 .iter()
237 .any(|((start, end), _)| ptr >= *start && ptr < *end)
238 }
239
240 fn identify_memory_mapped_regions<'a>(
241 &self,
242 allocations: &[&'a AllocationInfo],
243 ) -> Vec<&'a AllocationInfo> {
244 allocations
245 .iter()
246 .filter(|alloc| self.is_likely_mmap_allocation(alloc))
247 .copied()
248 .collect()
249 }
250
251 fn identify_thread_local_storage<'a>(
252 &self,
253 allocations: &[&'a AllocationInfo],
254 ) -> Vec<&'a AllocationInfo> {
255 allocations
256 .iter()
257 .filter(|alloc| self.is_likely_tls_allocation(alloc))
258 .copied()
259 .collect()
260 }
261
262 fn identify_library_regions<'a>(
263 &self,
264 allocations: &[&'a AllocationInfo],
265 ) -> Vec<&'a AllocationInfo> {
266 allocations
267 .iter()
268 .filter(|alloc| self.is_likely_library_allocation(alloc))
269 .copied()
270 .collect()
271 }
272
273 fn identify_ffi_allocations<'a>(
274 &self,
275 allocations: &[&'a AllocationInfo],
276 ) -> Vec<&'a AllocationInfo> {
277 allocations
278 .iter()
279 .filter(|alloc| self.is_likely_ffi_allocation(alloc))
280 .copied()
281 .collect()
282 }
283
284 fn identify_system_regions<'a>(
285 &self,
286 allocations: &[&'a AllocationInfo],
287 ) -> Vec<&'a AllocationInfo> {
288 allocations
289 .iter()
290 .filter(|alloc| self.is_likely_system_allocation(alloc))
291 .copied()
292 .collect()
293 }
294
295 fn identify_pre_tracking_allocations<'a>(
296 &self,
297 allocations: &[&'a AllocationInfo],
298 ) -> Vec<&'a AllocationInfo> {
299 allocations
300 .iter()
301 .filter(|alloc| self.is_likely_pre_tracking_allocation(alloc))
302 .copied()
303 .collect()
304 }
305
306 #[allow(clippy::manual_is_multiple_of)]
307 fn is_likely_mmap_allocation(&self, allocation: &AllocationInfo) -> bool {
308 allocation.size >= 4096 && allocation.ptr % 4096 == 0
309 }
310
311 fn is_likely_tls_allocation(&self, allocation: &AllocationInfo) -> bool {
312 allocation.size < 1024 && self.is_in_thread_range(allocation.ptr)
313 }
314
315 fn is_likely_library_allocation(&self, allocation: &AllocationInfo) -> bool {
316 self.library_mappings
317 .values()
318 .any(|lib| lib.contains_address(allocation.ptr))
319 }
320
321 fn is_likely_ffi_allocation(&self, allocation: &AllocationInfo) -> bool {
322 allocation.type_name.is_none() && allocation.var_name.is_none()
323 }
324
325 fn is_likely_system_allocation(&self, allocation: &AllocationInfo) -> bool {
326 allocation.ptr < 0x1000 || allocation.ptr > 0x7fff_0000_0000
327 }
328
329 fn is_likely_pre_tracking_allocation(&self, allocation: &AllocationInfo) -> bool {
330 allocation.timestamp_alloc < 1000
331 }
332
333 fn is_in_thread_range(&self, ptr: usize) -> bool {
334 self.thread_memory_ranges.values().any(|ranges| {
335 ranges
336 .iter()
337 .any(|(start, end)| ptr >= *start && ptr < *end)
338 })
339 }
340
341 fn has_memory_mapping_pattern(&self, allocations: &[&AllocationInfo]) -> bool {
342 allocations
343 .iter()
344 .any(|alloc| self.is_likely_mmap_allocation(alloc))
345 }
346
347 fn has_threading_pattern(&self, allocations: &[&AllocationInfo]) -> bool {
348 allocations
349 .iter()
350 .any(|alloc| self.is_likely_tls_allocation(alloc))
351 }
352
353 fn guess_library_name(&self, allocation: &AllocationInfo) -> Option<String> {
354 for (name, info) in &self.library_mappings {
355 if info.contains_address(allocation.ptr) {
356 return Some(name.to_string());
357 }
358 }
359 None
360 }
361
362 fn generate_examples(
363 &self,
364 allocations: &[&AllocationInfo],
365 origin: &str,
366 ) -> Vec<UnknownMemoryExample> {
367 allocations
368 .iter()
369 .take(3)
370 .map(|alloc| UnknownMemoryExample {
371 address_range: (alloc.ptr, alloc.ptr + alloc.size),
372 size: alloc.size,
373 suspected_origin: origin.to_string(),
374 access_pattern: MemoryAccessPattern::Unknown,
375 })
376 .collect()
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383
384 fn create_test_allocation(ptr: usize, size: usize) -> AllocationInfo {
385 AllocationInfo {
386 ptr,
387 size,
388 var_name: None,
389 type_name: None,
390 scope_name: None,
391 timestamp_alloc: 1000,
392 timestamp_dealloc: None,
393 thread_id: std::thread::current().id(),
394 thread_id_u64: 1,
395 borrow_count: 0,
396 stack_trace: None,
397 is_leaked: false,
398 lifetime_ms: None,
399 module_path: None,
400 borrow_info: None,
401 clone_info: None,
402 ownership_history_available: false,
403 smart_pointer_info: None,
404 memory_layout: None,
405 generic_info: None,
406 dynamic_type_info: None,
407 runtime_state: None,
408 stack_allocation: None,
409 temporary_object: None,
410 fragmentation_analysis: None,
411 generic_instantiation: None,
412 type_relationships: None,
413 type_usage: None,
414 function_call_tracking: None,
415 lifecycle_tracking: None,
416 access_tracking: None,
417 drop_chain_analysis: None,
418 stack_ptr: None,
419 task_id: None,
420 }
421 }
422
423 #[test]
426 fn test_analyzer_new() {
427 let analyzer = UnknownMemoryAnalyzer::new();
428 assert!(
429 analyzer.known_system_regions.is_empty(),
430 "System regions should be empty"
431 );
432 assert!(
433 analyzer.library_mappings.is_empty(),
434 "Library mappings should be empty"
435 );
436 assert!(
437 analyzer.thread_memory_ranges.is_empty(),
438 "Thread ranges should be empty"
439 );
440 }
441
442 #[test]
445 fn test_analyzer_default() {
446 let analyzer = UnknownMemoryAnalyzer::default();
447 assert!(
448 analyzer.known_system_regions.is_empty(),
449 "Default should create empty analyzer"
450 );
451 }
452
453 #[test]
456 fn test_analyze_empty_allocations() {
457 let mut analyzer = UnknownMemoryAnalyzer::new();
458 let result = analyzer.analyze_unknown_regions(&[]);
459 assert_eq!(
460 result.total_unknown_bytes, 0,
461 "Empty input should have zero unknown bytes"
462 );
463 assert!(
464 result.unknown_percentage.is_nan() || result.unknown_percentage == 0.0,
465 "Empty input should have NaN or 0% unknown"
466 );
467 }
468
469 #[test]
472 fn test_analyze_single_allocation() {
473 let mut analyzer = UnknownMemoryAnalyzer::new();
474 let allocations = vec![create_test_allocation(0x10000, 1024)];
475 let result = analyzer.analyze_unknown_regions(&allocations);
476 assert!(
477 result.total_unknown_bytes > 0,
478 "Should detect unknown bytes"
479 );
480 assert!(
481 result.unknown_percentage > 0.0,
482 "Should have unknown percentage"
483 );
484 }
485
486 #[test]
489 fn test_mmap_detection() {
490 let mut analyzer = UnknownMemoryAnalyzer::new();
491 let allocations = vec![create_test_allocation(0x1000, 4096)];
492 let result = analyzer.analyze_unknown_regions(&allocations);
493 let mmap_category = result
494 .unknown_categories
495 .iter()
496 .find(|c| matches!(c.category_type, UnknownRegionType::MemoryMappedRegions));
497 assert!(
498 mmap_category.is_some(),
499 "Should detect memory-mapped region"
500 );
501 }
502
503 #[test]
506 fn test_ffi_detection() {
507 let mut analyzer = UnknownMemoryAnalyzer::new();
508 let allocations = vec![create_test_allocation(0x10000, 512)];
509 let result = analyzer.analyze_unknown_regions(&allocations);
510 let ffi_category = result.unknown_categories.iter().find(|c| {
511 matches!(
512 c.category_type,
513 UnknownRegionType::ExternalLibraryAllocations
514 )
515 });
516 assert!(ffi_category.is_some(), "Should detect FFI allocation");
517 }
518
519 #[test]
522 fn test_system_region_detection() {
523 let mut analyzer = UnknownMemoryAnalyzer::new();
524 let allocations = vec![create_test_allocation(0x100, 256)];
525 let result = analyzer.analyze_unknown_regions(&allocations);
526 let system_category = result
527 .unknown_categories
528 .iter()
529 .find(|c| matches!(c.category_type, UnknownRegionType::SystemReservedRegions));
530 assert!(system_category.is_some(), "Should detect system region");
531 }
532
533 #[test]
536 fn test_pre_tracking_detection() {
537 let mut analyzer = UnknownMemoryAnalyzer::new();
538 let mut alloc = create_test_allocation(0x10000, 1024);
539 alloc.timestamp_alloc = 500;
540 let result = analyzer.analyze_unknown_regions(&[alloc]);
541 let pre_tracking = result
542 .unknown_categories
543 .iter()
544 .find(|c| matches!(c.category_type, UnknownRegionType::PreTrackingAllocations));
545 assert!(
546 pre_tracking.is_some(),
547 "Should detect pre-tracking allocation"
548 );
549 }
550
551 #[test]
554 fn test_known_system_region_exclusion() {
555 let mut analyzer = UnknownMemoryAnalyzer::new();
556 analyzer.known_system_regions.insert(
557 (0x10000, 0x20000),
558 SystemRegionInfo {
559 region_type: "test".to_string(),
560 description: "test region".to_string(),
561 read_only: false,
562 },
563 );
564 let allocations = vec![create_test_allocation(0x15000, 1024)];
565 let result = analyzer.analyze_unknown_regions(&allocations);
566 assert_eq!(
567 result.total_unknown_bytes, 0,
568 "Known region should not be unknown"
569 );
570 }
571
572 #[test]
575 fn test_library_mapping_detection() {
576 let mut analyzer = UnknownMemoryAnalyzer::new();
577 analyzer.library_mappings.insert(
578 "test_lib".to_string(),
579 LibraryMappingInfo {
580 start_address: 0x10000,
581 end_address: 0x20000,
582 permissions: "r-x".to_string(),
583 file_path: "/lib/test.so".to_string(),
584 },
585 );
586 let allocations = vec![create_test_allocation(0x15000, 1024)];
587 let result = analyzer.analyze_unknown_regions(&allocations);
588 let lib_category = result
589 .unknown_categories
590 .iter()
591 .find(|c| matches!(c.category_type, UnknownRegionType::DynamicLibraryRegions));
592 assert!(lib_category.is_some(), "Should detect library allocation");
593 }
594
595 #[test]
598 fn test_thread_range_detection() {
599 let mut analyzer = UnknownMemoryAnalyzer::new();
600 analyzer
601 .thread_memory_ranges
602 .insert(1, vec![(0x10000, 0x20000)]);
603 let allocations = vec![create_test_allocation(0x15000, 512)];
604 let result = analyzer.analyze_unknown_regions(&allocations);
605 let tls_category = result
606 .unknown_categories
607 .iter()
608 .find(|c| matches!(c.category_type, UnknownRegionType::ThreadLocalStorage));
609 assert!(tls_category.is_some(), "Should detect TLS allocation");
610 }
611
612 #[test]
615 fn test_reduction_strategies() {
616 let mut analyzer = UnknownMemoryAnalyzer::new();
617 let result = analyzer.analyze_unknown_regions(&[]);
618 assert!(
619 !result.reduction_strategies.is_empty(),
620 "Should generate strategies"
621 );
622 assert!(
623 result.reduction_strategies.iter().any(|s| matches!(
624 s.strategy_type,
625 ReductionStrategyType::EnhancedInstrumentation
626 )),
627 "Should include enhanced instrumentation strategy"
628 );
629 }
630
631 #[test]
634 fn test_potential_causes() {
635 let mut analyzer = UnknownMemoryAnalyzer::new();
636 let allocations = vec![create_test_allocation(0x10000, 512)];
637 let result = analyzer.analyze_unknown_regions(&allocations);
638 assert!(
639 !result.potential_causes.is_empty(),
640 "Should identify potential causes"
641 );
642 }
643
644 #[test]
647 fn test_multiple_allocations() {
648 let mut analyzer = UnknownMemoryAnalyzer::new();
649 let allocations = vec![
650 create_test_allocation(0x10000, 1024),
651 create_test_allocation(0x20000, 2048),
652 create_test_allocation(0x30000, 4096),
653 ];
654 let result = analyzer.analyze_unknown_regions(&allocations);
655 assert_eq!(
656 result.total_unknown_bytes, 7168,
657 "Should sum all unknown bytes"
658 );
659 }
660
661 #[test]
664 fn test_library_mapping_contains() {
665 let mapping = LibraryMappingInfo {
666 start_address: 0x1000,
667 end_address: 0x2000,
668 permissions: "r-x".to_string(),
669 file_path: "/test.so".to_string(),
670 };
671 assert!(
672 mapping.contains_address(0x1000),
673 "Start address should be contained"
674 );
675 assert!(
676 mapping.contains_address(0x1500),
677 "Middle address should be contained"
678 );
679 assert!(
680 !mapping.contains_address(0x2000),
681 "End address should not be contained"
682 );
683 assert!(
684 !mapping.contains_address(0x500),
685 "Address before range should not be contained"
686 );
687 }
688
689 #[test]
692 fn test_large_allocation() {
693 let mut analyzer = UnknownMemoryAnalyzer::new();
694 let allocations = vec![create_test_allocation(0x100000, usize::MAX / 4)];
695 let result = analyzer.analyze_unknown_regions(&allocations);
696 assert!(
697 result.total_unknown_bytes > 0,
698 "Should handle large allocation"
699 );
700 }
701
702 #[test]
705 fn test_percentage_bounds() {
706 let mut analyzer = UnknownMemoryAnalyzer::new();
707 let allocations = vec![
708 create_test_allocation(0x10000, 1024),
709 create_test_allocation(0x20000, 2048),
710 ];
711 let result = analyzer.analyze_unknown_regions(&allocations);
712 assert!(
713 result.unknown_percentage >= 0.0 && result.unknown_percentage <= 100.0,
714 "Percentage should be between 0 and 100"
715 );
716 }
717}