memscope_rs/analysis/unsafe_inference/memory_view.rs
1//! Memory View - Safe Memory Access Layer
2//!
3//! Provides safe, bounds-checked access to memory content for type inference.
4//! All memory access goes through this layer - no raw pointer dereferencing.
5//!
6//! # ValidRegions
7//!
8//! Uses dynamic mmap region detection with static fallback:
9//! - Linux: Reads `/proc/self/maps` for precise regions
10//! - Other platforms: Uses static address range bounds
11
12use std::sync::RwLock;
13
14// Static fallback bounds for different platforms
15#[cfg(all(target_pointer_width = "64", target_os = "linux"))]
16const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_f000;
17
18#[cfg(all(target_pointer_width = "64", target_os = "macos"))]
19const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_f000;
20
21#[cfg(all(target_pointer_width = "64", target_os = "windows"))]
22const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_0000;
23
24#[cfg(all(
25 target_pointer_width = "64",
26 not(any(target_os = "linux", target_os = "macos", target_os = "windows"))
27))]
28const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_ffff;
29
30#[cfg(target_pointer_width = "32")]
31const MAX_USER_ADDR: usize = 0x7fff_ffff;
32
33// Windows-specific heap end addresses for 32-bit vs 64-bit
34#[cfg(all(target_os = "windows", target_pointer_width = "64"))]
35const MAX_HEAP_END: usize = 0x7FFF_FFFF_FFFF_FFFF;
36
37#[cfg(all(target_os = "windows", target_pointer_width = "32"))]
38const MAX_HEAP_END: usize = 0x7FFF_FFFF;
39
40const MIN_VALID_ADDR: usize = 0x1000;
41
42/// Represents a valid memory region from process memory map.
43#[derive(Clone, Debug)]
44pub struct MemoryRegion {
45 pub start: usize,
46 pub end: usize,
47}
48
49/// Valid memory regions for pointer validation.
50///
51/// Uses dynamic detection on supported platforms with static fallback.
52#[derive(Clone, Debug, Default)]
53pub struct ValidRegions {
54 regions: Vec<MemoryRegion>,
55 is_dynamic: bool,
56}
57
58impl ValidRegions {
59 /// Create empty regions (will use static bounds).
60 pub fn empty() -> Self {
61 Self {
62 regions: Vec::new(),
63 is_dynamic: false,
64 }
65 }
66
67 /// Create from a list of memory regions.
68 pub fn from_regions(mut regions: Vec<MemoryRegion>) -> Self {
69 // Sort regions by start address for partition_point to work correctly
70 regions.sort_by_key(|r| r.start);
71 Self {
72 regions,
73 is_dynamic: true,
74 }
75 }
76
77 /// Check if an address falls within valid regions.
78 ///
79 /// If dynamic regions are available, uses precise checking.
80 /// Otherwise, falls back to static bounds.
81 pub fn contains(&self, addr: usize) -> bool {
82 if addr <= MIN_VALID_ADDR {
83 return false;
84 }
85
86 if self.is_dynamic && !self.regions.is_empty() {
87 // Use partition_point to find the first region where start > addr
88 // Then check if the previous region contains addr
89 let idx = self.regions.partition_point(|region| region.start <= addr);
90
91 if idx > 0 {
92 let region = &self.regions[idx - 1];
93 return addr < region.end;
94 }
95 false
96 } else {
97 // Static fallback
98 addr < MAX_USER_ADDR
99 }
100 }
101
102 /// Get the number of regions.
103 pub fn len(&self) -> usize {
104 self.regions.len()
105 }
106
107 /// Check if regions are empty.
108 pub fn is_empty(&self) -> bool {
109 self.regions.is_empty()
110 }
111
112 /// Check if using dynamic detection.
113 pub fn is_dynamic(&self) -> bool {
114 self.is_dynamic
115 }
116
117 /// Debug dump regions to stderr
118 #[cfg(test)]
119 pub fn debug_dump(&self) {
120 eprintln!("ValidRegions (is_dynamic={}):", self.is_dynamic);
121 for (i, region) in self.regions.iter().enumerate() {
122 eprintln!(" Region {}: 0x{:x} - 0x{:x}", i, region.start, region.end);
123 }
124 }
125}
126
127/// Global cached valid regions.
128static VALID_REGIONS: RwLock<Option<ValidRegions>> = RwLock::new(None);
129
130/// Merge overlapping or adjacent memory regions.
131#[cfg(target_os = "linux")]
132fn merge_regions(regions: Vec<MemoryRegion>) -> Vec<MemoryRegion> {
133 if regions.is_empty() {
134 return regions;
135 }
136
137 let mut merged: Vec<MemoryRegion> = Vec::with_capacity(regions.len());
138 // Use first() for safer access pattern
139 let mut current = regions
140 .first()
141 .expect("regions should not be empty after is_empty check")
142 .clone();
143
144 for region in regions.into_iter().skip(1) {
145 if region.start < current.end {
146 current.end = current.end.max(region.end);
147 } else {
148 merged.push(current);
149 current = region;
150 }
151 }
152 merged.push(current);
153
154 merged
155}
156
157/// Get valid memory regions for the current process.
158///
159/// Platform-specific implementation:
160/// - Linux: Reads `/proc/self/maps`
161/// - Other: Returns empty (uses static bounds)
162#[cfg(target_os = "linux")]
163fn get_valid_regions_impl() -> ValidRegions {
164 use std::fs;
165
166 let content = match fs::read_to_string("/proc/self/maps") {
167 Ok(c) => c,
168 Err(_) => {
169 // Fallback: use conservative estimates if /proc/self/maps is unavailable
170 return get_conservative_regions();
171 }
172 };
173
174 let mut regions: Vec<MemoryRegion> = content
175 .lines()
176 .filter_map(|line| {
177 let parts: Vec<&str> = line.split_whitespace().collect();
178 if parts.is_empty() {
179 return None;
180 }
181
182 // Parse address range (e.g., "7f1234567000-7f1234568000")
183 let range: Vec<&str> = parts[0].split('-').collect();
184 let (start_str, end_str) = match (range.first(), range.get(1)) {
185 (Some(&s), Some(&e)) => (s, e),
186 _ => return None,
187 };
188
189 let start = usize::from_str_radix(start_str, 16).ok()?;
190 let end = usize::from_str_radix(end_str, 16).ok()?;
191
192 // Filter to readable regions only (r-- or r-x or rw-)
193 let perms = parts.get(1)?;
194 if !perms.starts_with('r') {
195 return None;
196 }
197
198 Some(MemoryRegion { start, end })
199 })
200 .collect();
201
202 // Sort by start address for binary search
203 regions.sort_by_key(|r| r.start);
204
205 // Merge overlapping/adjacent regions
206 regions = merge_regions(regions);
207
208 // If no regions found, use conservative estimates
209 if regions.is_empty() {
210 return get_conservative_regions();
211 }
212
213 // /proc/self/maps already includes stack, heap, and all mapped regions
214 // No need to add additional regions
215 ValidRegions::from_regions(regions)
216}
217
218/// Get conservative memory regions as fallback
219#[cfg(target_os = "linux")]
220fn get_conservative_regions() -> ValidRegions {
221 let regions = vec![MemoryRegion {
222 start: 0x10000,
223 end: 0x7FFF_FFFF_FFFF_FFFF, // x64 address space
224 }];
225
226 ValidRegions::from_regions(regions)
227}
228
229/// Get valid memory regions for the current process (Windows).
230///
231/// Uses a conservative approach to detect valid memory regions:
232/// - Single wide region covering entire user-space address range
233#[cfg(target_os = "windows")]
234fn get_valid_regions_impl() -> ValidRegions {
235 let mut regions = Vec::new();
236
237 // Add single wide region covering entire user-space
238 // Windows allocators can place memory anywhere in the address space
239 regions.push(MemoryRegion {
240 start: 0x10000,
241 end: MAX_HEAP_END, // Platform-specific: 64-bit or 32-bit
242 });
243
244 ValidRegions::from_regions(regions)
245}
246
247/// Get valid memory regions for the current process (macOS).
248///
249/// Uses a conservative approach to detect valid memory regions:
250/// - Stack region: 8MB below and above current stack pointer
251/// - Heap regions: multiple ranges to cover different allocators
252#[cfg(target_os = "macos")]
253fn get_valid_regions_impl() -> ValidRegions {
254 // Add very wide heap region to cover all possible allocations
255 // macOS can allocate memory in various ranges depending on the allocator
256 // This covers the entire user-space address range
257 let regions = vec![MemoryRegion {
258 start: 0x1000, // Start from a low address
259 end: 0x7FFF_FFFF_FFFF_FFFF, // Up to max 64-bit address
260 }];
261
262 // Note: We use a single wide region instead of separate stack/heap regions
263 // because macOS allocators can place memory anywhere in the address space.
264 // The stack is already covered by this wide range.
265
266 ValidRegions::from_regions(regions)
267}
268
269/// Get valid memory regions for the current process (non-Linux, non-Windows, non-macOS).
270/// Uses conservative approach as fallback for unknown platforms.
271#[cfg(all(
272 not(target_os = "linux"),
273 not(target_os = "windows"),
274 not(target_os = "macos")
275))]
276fn get_valid_regions_impl() -> ValidRegions {
277 let mut regions = Vec::new();
278
279 // Add single wide region covering entire user-space
280 regions.push(MemoryRegion {
281 start: 0x10000,
282 end: 0x7FF_FFFFF_FFFF_FFFF,
283 });
284
285 ValidRegions::from_regions(regions)
286}
287
288/// Get cached valid regions, initializing if needed.
289pub fn get_valid_regions() -> ValidRegions {
290 // Fast path: check if already initialized
291 {
292 let read_guard = match VALID_REGIONS.read() {
293 Ok(guard) => guard,
294 Err(poisoned) => poisoned.into_inner(),
295 };
296 if read_guard.is_some() {
297 return read_guard
298 .as_ref()
299 .cloned()
300 .expect("VALID_REGIONS should be Some after is_some() check (read path)");
301 }
302 }
303
304 // Slow path: need to initialize
305 // Use write lock and double-check to prevent TOCTOU race
306 let mut write_guard = match VALID_REGIONS.write() {
307 Ok(guard) => guard,
308 Err(poisoned) => poisoned.into_inner(),
309 };
310
311 // Double-check after acquiring write lock
312 if write_guard.is_some() {
313 return write_guard
314 .as_ref()
315 .cloned()
316 .expect("VALID_REGIONS should be Some after is_some() check (write path)");
317 }
318
319 // Initialize while holding the write lock
320 let regions = get_valid_regions_impl();
321 *write_guard = Some(regions.clone());
322 regions
323}
324
325/// Check if a pointer value is valid using dynamic regions with static fallback.
326pub fn is_valid_ptr(p: usize) -> bool {
327 get_valid_regions().contains(p)
328}
329
330/// Check if a pointer value is valid using only static bounds.
331pub fn is_valid_ptr_static(p: usize) -> bool {
332 p > MIN_VALID_ADDR && p < MAX_USER_ADDR
333}
334
335/// Memory view for safe memory access.
336pub struct MemoryView<'a> {
337 data: &'a [u8],
338}
339
340/// Owned memory view that owns its data.
341///
342/// This is a non-reference version of `MemoryView` that owns the underlying
343/// buffer. Useful when the memory view needs to outlive the original scope.
344///
345/// # When to Use OwnedMemoryView vs MemoryView
346///
347/// | Scenario | Use |
348/// |----------|-----|
349/// | Temporary analysis within a function | `MemoryView<&[u8]>` |
350/// | Storing memory data for later use | `OwnedMemoryView` |
351/// | Returning memory data from a function | `OwnedMemoryView` |
352/// | Zero-copy analysis | `MemoryView<&[u8]>` |
353///
354/// # Lifetime Management
355///
356/// `OwnedMemoryView` owns its data via `Vec<u8>`, so it has no lifetime parameter.
357/// This means:
358/// - The data remains valid as long as the `OwnedMemoryView` exists
359/// - No need to worry about the underlying data being dropped
360/// - Slightly higher memory overhead due to ownership
361///
362/// # Example
363///
364/// ```rust
365/// use memscope_rs::analysis::unsafe_inference::OwnedMemoryView;
366///
367/// // Create from a vector (takes ownership)
368/// let data = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
369/// let view = OwnedMemoryView::new(data);
370///
371/// // Read values safely
372/// if let Some(value) = view.read_usize(0) {
373/// println!("First usize: {}", value);
374/// }
375///
376/// // Check bounds
377/// if let Some(byte) = view.read_u8(10) {
378/// println!("Byte at offset 10: {}", byte);
379/// } else {
380/// println!("Offset 10 out of bounds");
381/// }
382///
383/// // Access raw slice when needed
384/// let slice = view.as_slice();
385/// println!("Total bytes: {}", slice.len());
386/// ```
387///
388/// # Memory Safety
389///
390/// All read methods perform bounds checking and return `Option` types.
391/// This ensures safe access even with invalid offsets:
392///
393/// ```rust
394/// use memscope_rs::unsafe_inference::OwnedMemoryView;
395///
396/// let view = OwnedMemoryView::new(vec![0u8; 4]);
397///
398/// // This returns None (out of bounds)
399/// assert!(view.read_usize(0).is_none());
400///
401/// // This returns None (offset + size > len)
402/// assert!(view.read_usize(1).is_none());
403/// ```
404pub struct OwnedMemoryView {
405 data: Vec<u8>,
406}
407
408impl OwnedMemoryView {
409 /// Create a new `OwnedMemoryView` from a `Vec<u8>`.
410 ///
411 /// This takes ownership of the vector, so no copying occurs.
412 ///
413 /// # Example
414 ///
415 /// ```rust
416 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
417 /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4]);
418 /// assert_eq!(view.len(), 4);
419 /// ```
420 pub fn new(data: Vec<u8>) -> Self {
421 Self { data }
422 }
423
424 /// Returns the length of the underlying data.
425 ///
426 /// # Example
427 ///
428 /// ```rust
429 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
430 /// let view = OwnedMemoryView::new(vec![1, 2, 3]);
431 /// assert_eq!(view.len(), 3);
432 /// ```
433 pub fn len(&self) -> usize {
434 self.data.len()
435 }
436
437 /// Returns `true` if the underlying data is empty.
438 ///
439 /// # Example
440 ///
441 /// ```rust
442 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
443 /// let view = OwnedMemoryView::new(vec![]);
444 /// assert!(view.is_empty());
445 /// ```
446 pub fn is_empty(&self) -> bool {
447 self.data.is_empty()
448 }
449
450 /// Read a `usize` value from the specified offset.
451 ///
452 /// Reads `std::mem::size_of::<usize>()` bytes starting at `offset`
453 /// and interprets them as a little-endian `usize`.
454 ///
455 /// Returns `None` if the read would exceed the buffer bounds.
456 ///
457 /// # Example
458 ///
459 /// ```rust
460 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
461 /// let data = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
462 /// let view = OwnedMemoryView::new(data);
463 ///
464 /// if let Some(value) = view.read_usize(0) {
465 /// println!("Value: 0x{:x}", value);
466 /// }
467 /// ```
468 pub fn read_usize(&self, offset: usize) -> Option<usize> {
469 let size = std::mem::size_of::<usize>();
470 if offset.saturating_add(size) > self.data.len() {
471 return None;
472 }
473 let mut buf = [0u8; 8];
474 buf[..size].copy_from_slice(&self.data[offset..offset + size]);
475 Some(usize::from_le_bytes(buf))
476 }
477
478 /// Read a single byte from the specified offset.
479 ///
480 /// Returns `None` if the offset is out of bounds.
481 ///
482 /// # Example
483 ///
484 /// ```rust
485 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
486 /// let view = OwnedMemoryView::new(vec![0x10, 0x20, 0x30]);
487 ///
488 /// assert_eq!(view.read_u8(0), Some(0x10));
489 /// assert_eq!(view.read_u8(2), Some(0x30));
490 /// assert_eq!(view.read_u8(3), None); // out of bounds
491 /// ```
492 pub fn read_u8(&self, offset: usize) -> Option<u8> {
493 self.data.get(offset).copied()
494 }
495
496 /// Returns a slice of the underlying data.
497 ///
498 /// This provides direct access to the bytes without copying.
499 ///
500 /// # Example
501 ///
502 /// ```rust
503 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
504 /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4, 5]);
505 /// let slice = view.as_slice();
506 /// assert_eq!(slice, &[1, 2, 3, 4, 5]);
507 /// ```
508 pub fn as_slice(&self) -> &[u8] {
509 &self.data
510 }
511
512 /// Returns an iterator over chunks of the underlying data.
513 ///
514 /// Each chunk has at most `chunk_size` elements.
515 ///
516 /// # Example
517 ///
518 /// ```rust
519 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
520 /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4, 5, 6]);
521 /// let chunks: Vec<_> = view.chunks(2).collect();
522 /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5, 6]]);
523 /// ```
524 pub fn chunks(&self, chunk_size: usize) -> impl Iterator<Item = &[u8]> {
525 self.data.chunks(chunk_size)
526 }
527}
528
529impl<'a> MemoryView<'a> {
530 pub fn new(data: &'a [u8]) -> Self {
531 Self { data }
532 }
533
534 pub fn len(&self) -> usize {
535 self.data.len()
536 }
537
538 pub fn is_empty(&self) -> bool {
539 self.data.is_empty()
540 }
541
542 pub fn read_usize(&self, offset: usize) -> Option<usize> {
543 let size = std::mem::size_of::<usize>();
544 if offset.saturating_add(size) > self.data.len() {
545 return None;
546 }
547 let mut buf = [0u8; 8];
548 buf[..size].copy_from_slice(&self.data[offset..offset + size]);
549 Some(usize::from_le_bytes(buf))
550 }
551
552 pub fn read_u8(&self, offset: usize) -> Option<u8> {
553 self.data.get(offset).copied()
554 }
555
556 pub fn last_byte(&self) -> Option<u8> {
557 self.data.last().copied()
558 }
559
560 pub fn as_slice(&self) -> &'a [u8] {
561 self.data
562 }
563
564 pub fn chunks(&self, chunk_size: usize) -> impl Iterator<Item = &'a [u8]> {
565 self.data.chunks(chunk_size)
566 }
567}
568
569/// Count valid pointers in a memory view.
570pub fn count_valid_pointers(view: &MemoryView) -> usize {
571 let ptr_size = std::mem::size_of::<usize>();
572 let mut count = 0;
573 for chunk in view.chunks(ptr_size) {
574 if chunk.len() < ptr_size {
575 break;
576 }
577 // Use a buffer sized for the platform's pointer size
578 let mut buf = [0u8; 16]; // Max pointer size is 16 bytes (128-bit)
579 buf[..ptr_size].copy_from_slice(chunk);
580 let v = if ptr_size == 8 {
581 usize::from_le_bytes(buf[..8].try_into().unwrap())
582 } else {
583 usize::from_le_bytes({
584 let mut arr = [0u8; 8];
585 arr[..ptr_size].copy_from_slice(&buf[..ptr_size]);
586 arr
587 })
588 };
589 if is_valid_ptr(v) {
590 count += 1;
591 }
592 }
593 count
594}
595
596#[cfg(test)]
597mod tests {
598 use super::*;
599
600 #[test]
601 fn test_memory_view_read_usize() {
602 let data: [u8; 16] = [
603 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
604 0x0e, 0x0f,
605 ];
606 let view = MemoryView::new(&data);
607
608 let val0 = view.read_usize(0).unwrap();
609 let val8 = view.read_usize(8).unwrap();
610
611 assert_eq!(val0, 0x0706050403020100);
612 assert_eq!(val8, 0x0f0e0d0c0b0a0908);
613 }
614
615 #[test]
616 fn test_memory_view_bounds_check() {
617 let data = [0u8; 8];
618 let view = MemoryView::new(&data);
619
620 assert!(view.read_usize(0).is_some());
621 assert!(view.read_usize(1).is_none());
622 assert!(view.read_usize(8).is_none());
623 }
624
625 #[test]
626 #[cfg(target_os = "macos")]
627 fn test_is_valid_ptr_static() {
628 assert!(!is_valid_ptr_static(0));
629 assert!(!is_valid_ptr_static(0x1000));
630 assert!(is_valid_ptr_static(0x10000));
631 assert!(is_valid_ptr_static(0x7fff_ffff_0000));
632 assert!(!is_valid_ptr_static(0xffff_ffff_ffff_ffff));
633 }
634
635 #[test]
636 #[cfg(target_os = "macos")]
637 fn test_is_valid_ptr() {
638 // Should work with either dynamic or static
639 assert!(!is_valid_ptr(0));
640 assert!(!is_valid_ptr(0x1000));
641 // These should pass with static fallback
642 assert!(is_valid_ptr(0x10000));
643 }
644
645 #[test]
646 #[cfg(target_os = "macos")]
647 fn test_count_valid_pointers() {
648 let mut data = [0u8; 24];
649 let valid_ptr: usize = 0x10000;
650 data[..8].copy_from_slice(&valid_ptr.to_le_bytes());
651
652 let view = MemoryView::new(&data);
653 assert_eq!(count_valid_pointers(&view), 1);
654 }
655
656 #[test]
657 fn test_valid_regions_contains() {
658 let regions = ValidRegions::empty();
659 // Empty regions should use static bounds
660 assert!(regions.contains(0x10000));
661 assert!(!regions.contains(0));
662 }
663
664 #[test]
665 fn test_valid_regions_from_regions() {
666 let regions = ValidRegions::from_regions(vec![
667 MemoryRegion {
668 start: 0x1000,
669 end: 0x2000,
670 },
671 MemoryRegion {
672 start: 0x3000,
673 end: 0x4000,
674 },
675 ]);
676
677 assert!(regions.is_dynamic());
678 assert!(regions.contains(0x1500));
679 assert!(regions.contains(0x3500));
680 assert!(!regions.contains(0x2500));
681 assert!(!regions.contains(0x5000));
682 }
683
684 #[test]
685 fn test_get_valid_regions() {
686 let regions = get_valid_regions();
687 // Should return something (dynamic or static)
688 // Just verify it doesn't panic
689 let _ = regions.contains(0x10000);
690 }
691}