1use crate::analysis::unknown::types::*;
2use crate::capture::types::{AllocationInfo, ImplementationDifficulty};
3use std::collections::HashMap;
4
5pub struct UnknownMemoryAnalyzer {
6 pub known_system_regions: HashMap<(usize, usize), SystemRegionInfo>,
7 pub library_mappings: HashMap<String, LibraryMappingInfo>,
8 pub thread_memory_ranges: HashMap<u64, Vec<(usize, usize)>>,
9}
10
11impl Default for UnknownMemoryAnalyzer {
12 fn default() -> Self {
13 Self::new()
14 }
15}
16
17impl UnknownMemoryAnalyzer {
18 pub fn new() -> Self {
19 Self {
20 known_system_regions: HashMap::new(),
21 library_mappings: HashMap::new(),
22 thread_memory_ranges: HashMap::new(),
23 }
24 }
25
26 pub fn analyze_unknown_regions(
27 &mut self,
28 allocations: &[AllocationInfo],
29 ) -> UnknownMemoryRegionAnalysis {
30 let total_memory: usize = allocations.iter().map(|a| a.size).sum();
31 let unknown_allocations = self.identify_unknown_allocations(allocations);
32 let total_unknown: usize = unknown_allocations.iter().map(|a| a.size).sum();
33 let unknown_percentage = (total_unknown as f64 / total_memory as f64) * 100.0;
34 let unknown_categories = self.categorize_unknown_regions(&unknown_allocations);
35 let potential_causes = self.identify_potential_causes(&unknown_allocations);
36 let reduction_strategies = self.generate_reduction_strategies(&unknown_categories);
37
38 UnknownMemoryRegionAnalysis {
39 total_unknown_bytes: total_unknown,
40 unknown_percentage,
41 unknown_categories,
42 potential_causes,
43 reduction_strategies,
44 }
45 }
46
47 fn identify_unknown_allocations<'a>(
48 &self,
49 allocations: &'a [AllocationInfo],
50 ) -> Vec<&'a AllocationInfo> {
51 allocations
52 .iter()
53 .filter(|alloc| self.is_unknown_allocation(alloc))
54 .collect()
55 }
56
57 fn is_unknown_allocation(&self, allocation: &AllocationInfo) -> bool {
58 if self.is_in_stack_region(allocation.ptr) || self.is_in_heap_region(allocation.ptr) {
59 return false;
60 }
61
62 if self.is_known_system_region(allocation.ptr) {
63 return false;
64 }
65
66 true
67 }
68
69 fn categorize_unknown_regions(
70 &self,
71 unknown_allocations: &[&AllocationInfo],
72 ) -> Vec<UnknownMemoryCategory> {
73 let mut categories = Vec::new();
74
75 let mmap_allocations = self.identify_memory_mapped_regions(unknown_allocations);
76 if !mmap_allocations.is_empty() {
77 categories.push(UnknownMemoryCategory {
78 category_type: UnknownRegionType::MemoryMappedRegions,
79 description: "Memory-mapped files, shared memory, and anonymous mappings"
80 .to_string(),
81 estimated_size: mmap_allocations.iter().map(|a| a.size).sum(),
82 confidence_level: 0.8,
83 examples: self.generate_examples(&mmap_allocations, "Memory mapping"),
84 });
85 }
86
87 let tls_allocations = self.identify_thread_local_storage(unknown_allocations);
88 if !tls_allocations.is_empty() {
89 categories.push(UnknownMemoryCategory {
90 category_type: UnknownRegionType::ThreadLocalStorage,
91 description: "Thread-local storage and thread control blocks".to_string(),
92 estimated_size: tls_allocations.iter().map(|a| a.size).sum(),
93 confidence_level: 0.7,
94 examples: self.generate_examples(&tls_allocations, "Thread-local storage"),
95 });
96 }
97
98 let lib_allocations = self.identify_library_regions(unknown_allocations);
99 if !lib_allocations.is_empty() {
100 categories.push(UnknownMemoryCategory {
101 category_type: UnknownRegionType::DynamicLibraryRegions,
102 description: "Code and data sections of dynamically loaded libraries".to_string(),
103 estimated_size: lib_allocations.iter().map(|a| a.size).sum(),
104 confidence_level: 0.9,
105 examples: self.generate_examples(&lib_allocations, "Dynamic library"),
106 });
107 }
108
109 let ffi_allocations = self.identify_ffi_allocations(unknown_allocations);
110 if !ffi_allocations.is_empty() {
111 categories.push(UnknownMemoryCategory {
112 category_type: UnknownRegionType::ExternalLibraryAllocations,
113 description: "Memory allocated by external C/C++ libraries through FFI".to_string(),
114 estimated_size: ffi_allocations.iter().map(|a| a.size).sum(),
115 confidence_level: 0.6,
116 examples: self.generate_examples(&ffi_allocations, "FFI allocation"),
117 });
118 }
119
120 let system_allocations = self.identify_system_regions(unknown_allocations);
121 if !system_allocations.is_empty() {
122 categories.push(UnknownMemoryCategory {
123 category_type: UnknownRegionType::SystemReservedRegions,
124 description: "Kernel buffers, driver memory, and system caches".to_string(),
125 estimated_size: system_allocations.iter().map(|a| a.size).sum(),
126 confidence_level: 0.5,
127 examples: self.generate_examples(&system_allocations, "System region"),
128 });
129 }
130
131 let pre_tracking = self.identify_pre_tracking_allocations(unknown_allocations);
132 if !pre_tracking.is_empty() {
133 categories.push(UnknownMemoryCategory {
134 category_type: UnknownRegionType::PreTrackingAllocations,
135 description: "Memory allocated before tracking was initialized".to_string(),
136 estimated_size: pre_tracking.iter().map(|a| a.size).sum(),
137 confidence_level: 0.9,
138 examples: self.generate_examples(&pre_tracking, "Pre-tracking"),
139 });
140 }
141
142 categories
143 }
144
145 fn identify_potential_causes(
146 &self,
147 unknown_allocations: &[&AllocationInfo],
148 ) -> Vec<UnknownMemoryCause> {
149 let mut causes = Vec::new();
150
151 for allocation in unknown_allocations {
152 if self.is_likely_ffi_allocation(allocation) {
153 causes.push(UnknownMemoryCause::ForeignFunctionInterface {
154 library_name: self
155 .guess_library_name(allocation)
156 .unwrap_or_else(|| "unknown".to_string()),
157 function_name: None,
158 });
159 }
160 }
161
162 if self.has_memory_mapping_pattern(unknown_allocations) {
163 causes.push(UnknownMemoryCause::MemoryMapping {
164 mapping_type: MappingType::AnonymousMapping,
165 file_path: None,
166 });
167 }
168
169 if self.has_threading_pattern(unknown_allocations) {
170 causes.push(UnknownMemoryCause::ThreadingMemory {
171 thread_id: None,
172 memory_type: ThreadMemoryType::ThreadStack,
173 });
174 }
175
176 causes.push(UnknownMemoryCause::InstrumentationGaps {
177 gap_type: InstrumentationGapType::EarlyBootstrap,
178 description: "Memory allocated during early program initialization".to_string(),
179 });
180
181 causes
182 }
183
184 fn generate_reduction_strategies(
185 &self,
186 _categories: &[UnknownMemoryCategory],
187 ) -> Vec<UnknownRegionReductionStrategy> {
188 vec![
189 UnknownRegionReductionStrategy {
190 strategy_type: ReductionStrategyType::EnhancedInstrumentation,
191 description: "Implement more comprehensive memory tracking hooks".to_string(),
192 implementation_steps: vec![
193 "Hook into mmap/munmap system calls".to_string(),
194 "Intercept malloc/free in all loaded libraries".to_string(),
195 "Track thread creation and destruction".to_string(),
196 "Monitor dynamic library loading".to_string(),
197 ],
198 expected_improvement: 60.0,
199 implementation_difficulty: ImplementationDifficulty::Hard,
200 },
201 UnknownRegionReductionStrategy {
202 strategy_type: ReductionStrategyType::FfiCallInterception,
203 description: "Intercept and track FFI calls to external libraries".to_string(),
204 implementation_steps: vec![
205 "Wrap all extern function calls".to_string(),
206 "Track memory allocations in C libraries".to_string(),
207 "Monitor shared library symbol resolution".to_string(),
208 ],
209 expected_improvement: 25.0,
210 implementation_difficulty: ImplementationDifficulty::Medium,
211 },
212 UnknownRegionReductionStrategy {
213 strategy_type: ReductionStrategyType::MemoryMappingTracking,
214 description: "Track memory mapping operations comprehensively".to_string(),
215 implementation_steps: vec![
216 "Monitor /proc/self/maps changes".to_string(),
217 "Track mmap/mprotect/munmap calls".to_string(),
218 "Analyze virtual memory layout".to_string(),
219 ],
220 expected_improvement: 20.0,
221 implementation_difficulty: ImplementationDifficulty::Medium,
222 },
223 ]
224 }
225
226 fn is_in_stack_region(&self, _ptr: usize) -> bool {
227 false
228 }
229
230 fn is_in_heap_region(&self, _ptr: usize) -> bool {
231 false
232 }
233
234 fn is_known_system_region(&self, ptr: usize) -> bool {
235 self.known_system_regions
236 .iter()
237 .any(|((start, end), _)| ptr >= *start && ptr < *end)
238 }
239
240 fn identify_memory_mapped_regions<'a>(
241 &self,
242 allocations: &[&'a AllocationInfo],
243 ) -> Vec<&'a AllocationInfo> {
244 allocations
245 .iter()
246 .filter(|alloc| self.is_likely_mmap_allocation(alloc))
247 .copied()
248 .collect()
249 }
250
251 fn identify_thread_local_storage<'a>(
252 &self,
253 allocations: &[&'a AllocationInfo],
254 ) -> Vec<&'a AllocationInfo> {
255 allocations
256 .iter()
257 .filter(|alloc| self.is_likely_tls_allocation(alloc))
258 .copied()
259 .collect()
260 }
261
262 fn identify_library_regions<'a>(
263 &self,
264 allocations: &[&'a AllocationInfo],
265 ) -> Vec<&'a AllocationInfo> {
266 allocations
267 .iter()
268 .filter(|alloc| self.is_likely_library_allocation(alloc))
269 .copied()
270 .collect()
271 }
272
273 fn identify_ffi_allocations<'a>(
274 &self,
275 allocations: &[&'a AllocationInfo],
276 ) -> Vec<&'a AllocationInfo> {
277 allocations
278 .iter()
279 .filter(|alloc| self.is_likely_ffi_allocation(alloc))
280 .copied()
281 .collect()
282 }
283
284 fn identify_system_regions<'a>(
285 &self,
286 allocations: &[&'a AllocationInfo],
287 ) -> Vec<&'a AllocationInfo> {
288 allocations
289 .iter()
290 .filter(|alloc| self.is_likely_system_allocation(alloc))
291 .copied()
292 .collect()
293 }
294
295 fn identify_pre_tracking_allocations<'a>(
296 &self,
297 allocations: &[&'a AllocationInfo],
298 ) -> Vec<&'a AllocationInfo> {
299 allocations
300 .iter()
301 .filter(|alloc| self.is_likely_pre_tracking_allocation(alloc))
302 .copied()
303 .collect()
304 }
305
306 #[allow(clippy::manual_is_multiple_of)]
307 fn is_likely_mmap_allocation(&self, allocation: &AllocationInfo) -> bool {
308 allocation.size >= 4096 && allocation.ptr % 4096 == 0
309 }
310
311 fn is_likely_tls_allocation(&self, allocation: &AllocationInfo) -> bool {
312 allocation.size < 1024 && self.is_in_thread_range(allocation.ptr)
313 }
314
315 fn is_likely_library_allocation(&self, allocation: &AllocationInfo) -> bool {
316 self.library_mappings
317 .values()
318 .any(|lib| lib.contains_address(allocation.ptr))
319 }
320
321 fn is_likely_ffi_allocation(&self, allocation: &AllocationInfo) -> bool {
322 allocation.type_name.is_none() && allocation.var_name.is_none()
323 }
324
325 fn is_likely_system_allocation(&self, allocation: &AllocationInfo) -> bool {
326 allocation.ptr < 0x1000 || allocation.ptr > 0x7fff_0000_0000
327 }
328
329 fn is_likely_pre_tracking_allocation(&self, allocation: &AllocationInfo) -> bool {
330 allocation.timestamp_alloc < 1000
331 }
332
333 fn is_in_thread_range(&self, ptr: usize) -> bool {
334 self.thread_memory_ranges.values().any(|ranges| {
335 ranges
336 .iter()
337 .any(|(start, end)| ptr >= *start && ptr < *end)
338 })
339 }
340
341 fn has_memory_mapping_pattern(&self, allocations: &[&AllocationInfo]) -> bool {
342 allocations
343 .iter()
344 .any(|alloc| self.is_likely_mmap_allocation(alloc))
345 }
346
347 fn has_threading_pattern(&self, allocations: &[&AllocationInfo]) -> bool {
348 allocations
349 .iter()
350 .any(|alloc| self.is_likely_tls_allocation(alloc))
351 }
352
353 fn guess_library_name(&self, allocation: &AllocationInfo) -> Option<String> {
354 for (name, info) in &self.library_mappings {
355 if info.contains_address(allocation.ptr) {
356 return Some(name.to_string());
357 }
358 }
359 None
360 }
361
362 fn generate_examples(
363 &self,
364 allocations: &[&AllocationInfo],
365 origin: &str,
366 ) -> Vec<UnknownMemoryExample> {
367 allocations
368 .iter()
369 .take(3)
370 .map(|alloc| UnknownMemoryExample {
371 address_range: (alloc.ptr, alloc.ptr + alloc.size),
372 size: alloc.size,
373 suspected_origin: origin.to_string(),
374 access_pattern: MemoryAccessPattern::Unknown,
375 })
376 .collect()
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383
384 fn create_test_allocation(ptr: usize, size: usize) -> AllocationInfo {
385 AllocationInfo {
386 ptr,
387 size,
388 var_name: None,
389 type_name: None,
390 scope_name: None,
391 timestamp_alloc: 1000,
392 timestamp_dealloc: None,
393 thread_id: std::thread::current().id(),
394 thread_id_u64: 1,
395 borrow_count: 0,
396 stack_trace: None,
397 is_leaked: false,
398 lifetime_ms: None,
399 borrow_info: None,
400 clone_info: None,
401 ownership_history_available: false,
402 smart_pointer_info: None,
403 memory_layout: None,
404 generic_info: None,
405 dynamic_type_info: None,
406 runtime_state: None,
407 stack_allocation: None,
408 temporary_object: None,
409 fragmentation_analysis: None,
410 generic_instantiation: None,
411 type_relationships: None,
412 type_usage: None,
413 function_call_tracking: None,
414 lifecycle_tracking: None,
415 access_tracking: None,
416 drop_chain_analysis: None,
417 }
418 }
419
420 #[test]
423 fn test_analyzer_new() {
424 let analyzer = UnknownMemoryAnalyzer::new();
425 assert!(
426 analyzer.known_system_regions.is_empty(),
427 "System regions should be empty"
428 );
429 assert!(
430 analyzer.library_mappings.is_empty(),
431 "Library mappings should be empty"
432 );
433 assert!(
434 analyzer.thread_memory_ranges.is_empty(),
435 "Thread ranges should be empty"
436 );
437 }
438
439 #[test]
442 fn test_analyzer_default() {
443 let analyzer = UnknownMemoryAnalyzer::default();
444 assert!(
445 analyzer.known_system_regions.is_empty(),
446 "Default should create empty analyzer"
447 );
448 }
449
450 #[test]
453 fn test_analyze_empty_allocations() {
454 let mut analyzer = UnknownMemoryAnalyzer::new();
455 let result = analyzer.analyze_unknown_regions(&[]);
456 assert_eq!(
457 result.total_unknown_bytes, 0,
458 "Empty input should have zero unknown bytes"
459 );
460 assert!(
461 result.unknown_percentage.is_nan() || result.unknown_percentage == 0.0,
462 "Empty input should have NaN or 0% unknown"
463 );
464 }
465
466 #[test]
469 fn test_analyze_single_allocation() {
470 let mut analyzer = UnknownMemoryAnalyzer::new();
471 let allocations = vec![create_test_allocation(0x10000, 1024)];
472 let result = analyzer.analyze_unknown_regions(&allocations);
473 assert!(
474 result.total_unknown_bytes > 0,
475 "Should detect unknown bytes"
476 );
477 assert!(
478 result.unknown_percentage > 0.0,
479 "Should have unknown percentage"
480 );
481 }
482
483 #[test]
486 fn test_mmap_detection() {
487 let mut analyzer = UnknownMemoryAnalyzer::new();
488 let allocations = vec![create_test_allocation(0x1000, 4096)];
489 let result = analyzer.analyze_unknown_regions(&allocations);
490 let mmap_category = result
491 .unknown_categories
492 .iter()
493 .find(|c| matches!(c.category_type, UnknownRegionType::MemoryMappedRegions));
494 assert!(
495 mmap_category.is_some(),
496 "Should detect memory-mapped region"
497 );
498 }
499
500 #[test]
503 fn test_ffi_detection() {
504 let mut analyzer = UnknownMemoryAnalyzer::new();
505 let allocations = vec![create_test_allocation(0x10000, 512)];
506 let result = analyzer.analyze_unknown_regions(&allocations);
507 let ffi_category = result.unknown_categories.iter().find(|c| {
508 matches!(
509 c.category_type,
510 UnknownRegionType::ExternalLibraryAllocations
511 )
512 });
513 assert!(ffi_category.is_some(), "Should detect FFI allocation");
514 }
515
516 #[test]
519 fn test_system_region_detection() {
520 let mut analyzer = UnknownMemoryAnalyzer::new();
521 let allocations = vec![create_test_allocation(0x100, 256)];
522 let result = analyzer.analyze_unknown_regions(&allocations);
523 let system_category = result
524 .unknown_categories
525 .iter()
526 .find(|c| matches!(c.category_type, UnknownRegionType::SystemReservedRegions));
527 assert!(system_category.is_some(), "Should detect system region");
528 }
529
530 #[test]
533 fn test_pre_tracking_detection() {
534 let mut analyzer = UnknownMemoryAnalyzer::new();
535 let mut alloc = create_test_allocation(0x10000, 1024);
536 alloc.timestamp_alloc = 500;
537 let result = analyzer.analyze_unknown_regions(&[alloc]);
538 let pre_tracking = result
539 .unknown_categories
540 .iter()
541 .find(|c| matches!(c.category_type, UnknownRegionType::PreTrackingAllocations));
542 assert!(
543 pre_tracking.is_some(),
544 "Should detect pre-tracking allocation"
545 );
546 }
547
548 #[test]
551 fn test_known_system_region_exclusion() {
552 let mut analyzer = UnknownMemoryAnalyzer::new();
553 analyzer.known_system_regions.insert(
554 (0x10000, 0x20000),
555 SystemRegionInfo {
556 region_type: "test".to_string(),
557 description: "test region".to_string(),
558 read_only: false,
559 },
560 );
561 let allocations = vec![create_test_allocation(0x15000, 1024)];
562 let result = analyzer.analyze_unknown_regions(&allocations);
563 assert_eq!(
564 result.total_unknown_bytes, 0,
565 "Known region should not be unknown"
566 );
567 }
568
569 #[test]
572 fn test_library_mapping_detection() {
573 let mut analyzer = UnknownMemoryAnalyzer::new();
574 analyzer.library_mappings.insert(
575 "test_lib".to_string(),
576 LibraryMappingInfo {
577 start_address: 0x10000,
578 end_address: 0x20000,
579 permissions: "r-x".to_string(),
580 file_path: "/lib/test.so".to_string(),
581 },
582 );
583 let allocations = vec![create_test_allocation(0x15000, 1024)];
584 let result = analyzer.analyze_unknown_regions(&allocations);
585 let lib_category = result
586 .unknown_categories
587 .iter()
588 .find(|c| matches!(c.category_type, UnknownRegionType::DynamicLibraryRegions));
589 assert!(lib_category.is_some(), "Should detect library allocation");
590 }
591
592 #[test]
595 fn test_thread_range_detection() {
596 let mut analyzer = UnknownMemoryAnalyzer::new();
597 analyzer
598 .thread_memory_ranges
599 .insert(1, vec![(0x10000, 0x20000)]);
600 let allocations = vec![create_test_allocation(0x15000, 512)];
601 let result = analyzer.analyze_unknown_regions(&allocations);
602 let tls_category = result
603 .unknown_categories
604 .iter()
605 .find(|c| matches!(c.category_type, UnknownRegionType::ThreadLocalStorage));
606 assert!(tls_category.is_some(), "Should detect TLS allocation");
607 }
608
609 #[test]
612 fn test_reduction_strategies() {
613 let mut analyzer = UnknownMemoryAnalyzer::new();
614 let result = analyzer.analyze_unknown_regions(&[]);
615 assert!(
616 !result.reduction_strategies.is_empty(),
617 "Should generate strategies"
618 );
619 assert!(
620 result.reduction_strategies.iter().any(|s| matches!(
621 s.strategy_type,
622 ReductionStrategyType::EnhancedInstrumentation
623 )),
624 "Should include enhanced instrumentation strategy"
625 );
626 }
627
628 #[test]
631 fn test_potential_causes() {
632 let mut analyzer = UnknownMemoryAnalyzer::new();
633 let allocations = vec![create_test_allocation(0x10000, 512)];
634 let result = analyzer.analyze_unknown_regions(&allocations);
635 assert!(
636 !result.potential_causes.is_empty(),
637 "Should identify potential causes"
638 );
639 }
640
641 #[test]
644 fn test_multiple_allocations() {
645 let mut analyzer = UnknownMemoryAnalyzer::new();
646 let allocations = vec![
647 create_test_allocation(0x10000, 1024),
648 create_test_allocation(0x20000, 2048),
649 create_test_allocation(0x30000, 4096),
650 ];
651 let result = analyzer.analyze_unknown_regions(&allocations);
652 assert_eq!(
653 result.total_unknown_bytes, 7168,
654 "Should sum all unknown bytes"
655 );
656 }
657
658 #[test]
661 fn test_library_mapping_contains() {
662 let mapping = LibraryMappingInfo {
663 start_address: 0x1000,
664 end_address: 0x2000,
665 permissions: "r-x".to_string(),
666 file_path: "/test.so".to_string(),
667 };
668 assert!(
669 mapping.contains_address(0x1000),
670 "Start address should be contained"
671 );
672 assert!(
673 mapping.contains_address(0x1500),
674 "Middle address should be contained"
675 );
676 assert!(
677 !mapping.contains_address(0x2000),
678 "End address should not be contained"
679 );
680 assert!(
681 !mapping.contains_address(0x500),
682 "Address before range should not be contained"
683 );
684 }
685
686 #[test]
689 fn test_large_allocation() {
690 let mut analyzer = UnknownMemoryAnalyzer::new();
691 let allocations = vec![create_test_allocation(0x100000, usize::MAX / 4)];
692 let result = analyzer.analyze_unknown_regions(&allocations);
693 assert!(
694 result.total_unknown_bytes > 0,
695 "Should handle large allocation"
696 );
697 }
698
699 #[test]
702 fn test_percentage_bounds() {
703 let mut analyzer = UnknownMemoryAnalyzer::new();
704 let allocations = vec![
705 create_test_allocation(0x10000, 1024),
706 create_test_allocation(0x20000, 2048),
707 ];
708 let result = analyzer.analyze_unknown_regions(&allocations);
709 assert!(
710 result.unknown_percentage >= 0.0 && result.unknown_percentage <= 100.0,
711 "Percentage should be between 0 and 100"
712 );
713 }
714}