memscope_rs/analysis/unsafe_inference/memory_view.rs
1//! Memory View - Safe Memory Access Layer
2//!
3//! Provides safe, bounds-checked access to memory content for type inference.
4//! All memory access goes through this layer - no raw pointer dereferencing.
5//!
6//! # ValidRegions
7//!
8//! Uses dynamic mmap region detection with static fallback:
9//! - Linux: Reads `/proc/self/maps` for precise regions
10//! - Other platforms: Uses static address range bounds
11
12use std::sync::RwLock;
13
14// Static fallback bounds for different platforms
15#[cfg(all(target_pointer_width = "64", target_os = "linux"))]
16const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_f000;
17
18#[cfg(all(target_pointer_width = "64", target_os = "macos"))]
19const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_f000;
20
21#[cfg(all(target_pointer_width = "64", target_os = "windows"))]
22const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_0000;
23
24#[cfg(all(
25 target_pointer_width = "64",
26 not(any(target_os = "linux", target_os = "macos", target_os = "windows"))
27))]
28const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_ffff;
29
30#[cfg(target_pointer_width = "32")]
31const MAX_USER_ADDR: usize = 0x7fff_ffff;
32
33// Windows-specific heap end addresses for 32-bit vs 64-bit
34#[cfg(all(target_os = "windows", target_pointer_width = "64"))]
35const MAX_HEAP_END: usize = 0x7FFF_FFFF_FFFF_FFFF;
36
37#[cfg(all(target_os = "windows", target_pointer_width = "32"))]
38const MAX_HEAP_END: usize = 0x7FFF_FFFF;
39
40const MIN_VALID_ADDR: usize = 0x1000;
41
42/// Represents a valid memory region from process memory map.
43#[derive(Clone, Debug)]
44pub struct MemoryRegion {
45 pub start: usize,
46 pub end: usize,
47}
48
49/// Valid memory regions for pointer validation.
50///
51/// Uses dynamic detection on supported platforms with static fallback.
52#[derive(Clone, Debug, Default)]
53pub struct ValidRegions {
54 regions: Vec<MemoryRegion>,
55 is_dynamic: bool,
56}
57
58impl ValidRegions {
59 /// Create empty regions (will use static bounds).
60 pub fn empty() -> Self {
61 Self {
62 regions: Vec::new(),
63 is_dynamic: false,
64 }
65 }
66
67 /// Create from a list of memory regions.
68 pub fn from_regions(mut regions: Vec<MemoryRegion>) -> Self {
69 // Sort regions by start address for partition_point to work correctly
70 regions.sort_by_key(|r| r.start);
71 Self {
72 regions,
73 is_dynamic: true,
74 }
75 }
76
77 /// Check if an address falls within valid regions.
78 ///
79 /// If dynamic regions are available, uses precise checking.
80 /// Otherwise, falls back to static bounds.
81 pub fn contains(&self, addr: usize) -> bool {
82 if addr <= MIN_VALID_ADDR {
83 return false;
84 }
85
86 if self.is_dynamic && !self.regions.is_empty() {
87 // Use partition_point to find the first region where start > addr
88 // Then check if the previous region contains addr
89 let idx = self.regions.partition_point(|region| region.start <= addr);
90
91 if idx > 0 {
92 let region = &self.regions[idx - 1];
93 return addr < region.end;
94 }
95 false
96 } else {
97 // Static fallback
98 addr < MAX_USER_ADDR
99 }
100 }
101
102 /// Get the number of regions.
103 pub fn len(&self) -> usize {
104 self.regions.len()
105 }
106
107 /// Check if regions are empty.
108 pub fn is_empty(&self) -> bool {
109 self.regions.is_empty()
110 }
111
112 /// Check if using dynamic detection.
113 pub fn is_dynamic(&self) -> bool {
114 self.is_dynamic
115 }
116
117 /// Debug dump regions to stderr
118 #[cfg(test)]
119 pub fn debug_dump(&self) {
120 eprintln!("ValidRegions (is_dynamic={}):", self.is_dynamic);
121 for (i, region) in self.regions.iter().enumerate() {
122 eprintln!(" Region {}: 0x{:x} - 0x{:x}", i, region.start, region.end);
123 }
124 }
125}
126
127/// Global cached valid regions.
128static VALID_REGIONS: RwLock<Option<ValidRegions>> = RwLock::new(None);
129
130/// Merge overlapping or adjacent memory regions.
131#[cfg(target_os = "linux")]
132fn merge_regions(regions: Vec<MemoryRegion>) -> Vec<MemoryRegion> {
133 if regions.is_empty() {
134 return regions;
135 }
136
137 let mut merged: Vec<MemoryRegion> = Vec::with_capacity(regions.len());
138 let mut current = regions[0].clone();
139
140 for region in regions.into_iter().skip(1) {
141 if region.start < current.end {
142 current.end = current.end.max(region.end);
143 } else {
144 merged.push(current);
145 current = region;
146 }
147 }
148 merged.push(current);
149
150 merged
151}
152
153/// Get valid memory regions for the current process.
154///
155/// Platform-specific implementation:
156/// - Linux: Reads `/proc/self/maps`
157/// - Other: Returns empty (uses static bounds)
158#[cfg(target_os = "linux")]
159fn get_valid_regions_impl() -> ValidRegions {
160 use std::fs;
161
162 let content = match fs::read_to_string("/proc/self/maps") {
163 Ok(c) => c,
164 Err(_) => {
165 // Fallback: use conservative estimates if /proc/self/maps is unavailable
166 return get_conservative_regions();
167 }
168 };
169
170 let mut regions: Vec<MemoryRegion> = content
171 .lines()
172 .filter_map(|line| {
173 let parts: Vec<&str> = line.split_whitespace().collect();
174 if parts.is_empty() {
175 return None;
176 }
177
178 let range: Vec<&str> = parts[0].split('-').collect();
179 if range.len() != 2 {
180 return None;
181 }
182
183 let start = usize::from_str_radix(range[0], 16).ok()?;
184 let end = usize::from_str_radix(range[1], 16).ok()?;
185
186 // Filter to readable regions only (r-- or r-x or rw-)
187 if parts.len() < 2 {
188 return None;
189 }
190 let perms = parts[1];
191 if !perms.starts_with('r') {
192 return None;
193 }
194
195 Some(MemoryRegion { start, end })
196 })
197 .collect();
198
199 // Sort by start address for binary search
200 regions.sort_by_key(|r| r.start);
201
202 // Merge overlapping/adjacent regions
203 regions = merge_regions(regions);
204
205 // If no regions found, use conservative estimates
206 if regions.is_empty() {
207 return get_conservative_regions();
208 }
209
210 // /proc/self/maps already includes stack, heap, and all mapped regions
211 // No need to add additional regions
212 ValidRegions::from_regions(regions)
213}
214
215/// Get conservative memory regions as fallback
216#[cfg(target_os = "linux")]
217fn get_conservative_regions() -> ValidRegions {
218 let regions = vec![MemoryRegion {
219 start: 0x10000,
220 end: 0x7FFF_FFFF_FFFF_FFFF, // x64 address space
221 }];
222
223 ValidRegions::from_regions(regions)
224}
225
226/// Get valid memory regions for the current process (Windows).
227///
228/// Uses a conservative approach to detect valid memory regions:
229/// - Single wide region covering entire user-space address range
230#[cfg(target_os = "windows")]
231fn get_valid_regions_impl() -> ValidRegions {
232 let mut regions = Vec::new();
233
234 // Add single wide region covering entire user-space
235 // Windows allocators can place memory anywhere in the address space
236 regions.push(MemoryRegion {
237 start: 0x10000,
238 end: MAX_HEAP_END, // Platform-specific: 64-bit or 32-bit
239 });
240
241 ValidRegions::from_regions(regions)
242}
243
244/// Get valid memory regions for the current process (macOS).
245///
246/// Uses a conservative approach to detect valid memory regions:
247/// - Stack region: 8MB below and above current stack pointer
248/// - Heap regions: multiple ranges to cover different allocators
249#[cfg(target_os = "macos")]
250fn get_valid_regions_impl() -> ValidRegions {
251 // Add very wide heap region to cover all possible allocations
252 // macOS can allocate memory in various ranges depending on the allocator
253 // This covers the entire user-space address range
254 let regions = vec![MemoryRegion {
255 start: 0x1000, // Start from a low address
256 end: 0x7FFF_FFFF_FFFF_FFFF, // Up to max 64-bit address
257 }];
258
259 // Note: We use a single wide region instead of separate stack/heap regions
260 // because macOS allocators can place memory anywhere in the address space.
261 // The stack is already covered by this wide range.
262
263 ValidRegions::from_regions(regions)
264}
265
266/// Get valid memory regions for the current process (non-Linux, non-Windows, non-macOS).
267/// Uses conservative approach as fallback for unknown platforms.
268#[cfg(all(
269 not(target_os = "linux"),
270 not(target_os = "windows"),
271 not(target_os = "macos")
272))]
273fn get_valid_regions_impl() -> ValidRegions {
274 let mut regions = Vec::new();
275
276 // Add single wide region covering entire user-space
277 regions.push(MemoryRegion {
278 start: 0x10000,
279 end: 0x7FF_FFFFF_FFFF_FFFF,
280 });
281
282 ValidRegions::from_regions(regions)
283}
284
285/// Get cached valid regions, initializing if needed.
286pub fn get_valid_regions() -> ValidRegions {
287 // Fast path: check if already initialized
288 {
289 let read_guard = match VALID_REGIONS.read() {
290 Ok(guard) => guard,
291 Err(poisoned) => poisoned.into_inner(),
292 };
293 if read_guard.is_some() {
294 return read_guard
295 .as_ref()
296 .cloned()
297 .expect("VALID_REGIONS should be Some after is_some() check (read path)");
298 }
299 }
300
301 // Slow path: need to initialize
302 // Use write lock and double-check to prevent TOCTOU race
303 let mut write_guard = match VALID_REGIONS.write() {
304 Ok(guard) => guard,
305 Err(poisoned) => poisoned.into_inner(),
306 };
307
308 // Double-check after acquiring write lock
309 if write_guard.is_some() {
310 return write_guard
311 .as_ref()
312 .cloned()
313 .expect("VALID_REGIONS should be Some after is_some() check (write path)");
314 }
315
316 // Initialize while holding the write lock
317 let regions = get_valid_regions_impl();
318 *write_guard = Some(regions.clone());
319 regions
320}
321
322/// Check if a pointer value is valid using dynamic regions with static fallback.
323pub fn is_valid_ptr(p: usize) -> bool {
324 get_valid_regions().contains(p)
325}
326
327/// Check if a pointer value is valid using only static bounds.
328pub fn is_valid_ptr_static(p: usize) -> bool {
329 p > MIN_VALID_ADDR && p < MAX_USER_ADDR
330}
331
332/// Memory view for safe memory access.
333pub struct MemoryView<'a> {
334 data: &'a [u8],
335}
336
337/// Owned memory view that owns its data.
338///
339/// This is a non-reference version of `MemoryView` that owns the underlying
340/// buffer. Useful when the memory view needs to outlive the original scope.
341///
342/// # When to Use OwnedMemoryView vs MemoryView
343///
344/// | Scenario | Use |
345/// |----------|-----|
346/// | Temporary analysis within a function | `MemoryView<&[u8]>` |
347/// | Storing memory data for later use | `OwnedMemoryView` |
348/// | Returning memory data from a function | `OwnedMemoryView` |
349/// | Zero-copy analysis | `MemoryView<&[u8]>` |
350///
351/// # Lifetime Management
352///
353/// `OwnedMemoryView` owns its data via `Vec<u8>`, so it has no lifetime parameter.
354/// This means:
355/// - The data remains valid as long as the `OwnedMemoryView` exists
356/// - No need to worry about the underlying data being dropped
357/// - Slightly higher memory overhead due to ownership
358///
359/// # Example
360///
361/// ```rust
362/// use memscope_rs::analysis::unsafe_inference::OwnedMemoryView;
363///
364/// // Create from a vector (takes ownership)
365/// let data = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
366/// let view = OwnedMemoryView::new(data);
367///
368/// // Read values safely
369/// if let Some(value) = view.read_usize(0) {
370/// println!("First usize: {}", value);
371/// }
372///
373/// // Check bounds
374/// if let Some(byte) = view.read_u8(10) {
375/// println!("Byte at offset 10: {}", byte);
376/// } else {
377/// println!("Offset 10 out of bounds");
378/// }
379///
380/// // Access raw slice when needed
381/// let slice = view.as_slice();
382/// println!("Total bytes: {}", slice.len());
383/// ```
384///
385/// # Memory Safety
386///
387/// All read methods perform bounds checking and return `Option` types.
388/// This ensures safe access even with invalid offsets:
389///
390/// ```rust
391/// use memscope_rs::unsafe_inference::OwnedMemoryView;
392///
393/// let view = OwnedMemoryView::new(vec![0u8; 4]);
394///
395/// // This returns None (out of bounds)
396/// assert!(view.read_usize(0).is_none());
397///
398/// // This returns None (offset + size > len)
399/// assert!(view.read_usize(1).is_none());
400/// ```
401pub struct OwnedMemoryView {
402 data: Vec<u8>,
403}
404
405impl OwnedMemoryView {
406 /// Create a new `OwnedMemoryView` from a `Vec<u8>`.
407 ///
408 /// This takes ownership of the vector, so no copying occurs.
409 ///
410 /// # Example
411 ///
412 /// ```rust
413 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
414 /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4]);
415 /// assert_eq!(view.len(), 4);
416 /// ```
417 pub fn new(data: Vec<u8>) -> Self {
418 Self { data }
419 }
420
421 /// Returns the length of the underlying data.
422 ///
423 /// # Example
424 ///
425 /// ```rust
426 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
427 /// let view = OwnedMemoryView::new(vec![1, 2, 3]);
428 /// assert_eq!(view.len(), 3);
429 /// ```
430 pub fn len(&self) -> usize {
431 self.data.len()
432 }
433
434 /// Returns `true` if the underlying data is empty.
435 ///
436 /// # Example
437 ///
438 /// ```rust
439 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
440 /// let view = OwnedMemoryView::new(vec![]);
441 /// assert!(view.is_empty());
442 /// ```
443 pub fn is_empty(&self) -> bool {
444 self.data.is_empty()
445 }
446
447 /// Read a `usize` value from the specified offset.
448 ///
449 /// Reads `std::mem::size_of::<usize>()` bytes starting at `offset`
450 /// and interprets them as a little-endian `usize`.
451 ///
452 /// Returns `None` if the read would exceed the buffer bounds.
453 ///
454 /// # Example
455 ///
456 /// ```rust
457 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
458 /// let data = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
459 /// let view = OwnedMemoryView::new(data);
460 ///
461 /// if let Some(value) = view.read_usize(0) {
462 /// println!("Value: 0x{:x}", value);
463 /// }
464 /// ```
465 pub fn read_usize(&self, offset: usize) -> Option<usize> {
466 let size = std::mem::size_of::<usize>();
467 if offset.saturating_add(size) > self.data.len() {
468 return None;
469 }
470 let mut buf = [0u8; 8];
471 buf[..size].copy_from_slice(&self.data[offset..offset + size]);
472 Some(usize::from_le_bytes(buf))
473 }
474
475 /// Read a single byte from the specified offset.
476 ///
477 /// Returns `None` if the offset is out of bounds.
478 ///
479 /// # Example
480 ///
481 /// ```rust
482 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
483 /// let view = OwnedMemoryView::new(vec![0x10, 0x20, 0x30]);
484 ///
485 /// assert_eq!(view.read_u8(0), Some(0x10));
486 /// assert_eq!(view.read_u8(2), Some(0x30));
487 /// assert_eq!(view.read_u8(3), None); // out of bounds
488 /// ```
489 pub fn read_u8(&self, offset: usize) -> Option<u8> {
490 self.data.get(offset).copied()
491 }
492
493 /// Returns a slice of the underlying data.
494 ///
495 /// This provides direct access to the bytes without copying.
496 ///
497 /// # Example
498 ///
499 /// ```rust
500 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
501 /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4, 5]);
502 /// let slice = view.as_slice();
503 /// assert_eq!(slice, &[1, 2, 3, 4, 5]);
504 /// ```
505 pub fn as_slice(&self) -> &[u8] {
506 &self.data
507 }
508
509 /// Returns an iterator over chunks of the underlying data.
510 ///
511 /// Each chunk has at most `chunk_size` elements.
512 ///
513 /// # Example
514 ///
515 /// ```rust
516 /// use memscope_rs::unsafe_inference::OwnedMemoryView;
517 /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4, 5, 6]);
518 /// let chunks: Vec<_> = view.chunks(2).collect();
519 /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5, 6]]);
520 /// ```
521 pub fn chunks(&self, chunk_size: usize) -> impl Iterator<Item = &[u8]> {
522 self.data.chunks(chunk_size)
523 }
524}
525
526impl<'a> MemoryView<'a> {
527 pub fn new(data: &'a [u8]) -> Self {
528 Self { data }
529 }
530
531 pub fn len(&self) -> usize {
532 self.data.len()
533 }
534
535 pub fn is_empty(&self) -> bool {
536 self.data.is_empty()
537 }
538
539 pub fn read_usize(&self, offset: usize) -> Option<usize> {
540 let size = std::mem::size_of::<usize>();
541 if offset.saturating_add(size) > self.data.len() {
542 return None;
543 }
544 let mut buf = [0u8; 8];
545 buf[..size].copy_from_slice(&self.data[offset..offset + size]);
546 Some(usize::from_le_bytes(buf))
547 }
548
549 pub fn read_u8(&self, offset: usize) -> Option<u8> {
550 self.data.get(offset).copied()
551 }
552
553 pub fn last_byte(&self) -> Option<u8> {
554 self.data.last().copied()
555 }
556
557 pub fn as_slice(&self) -> &'a [u8] {
558 self.data
559 }
560
561 pub fn chunks(&self, chunk_size: usize) -> impl Iterator<Item = &'a [u8]> {
562 self.data.chunks(chunk_size)
563 }
564}
565
566/// Count valid pointers in a memory view.
567pub fn count_valid_pointers(view: &MemoryView) -> usize {
568 let ptr_size = std::mem::size_of::<usize>();
569 let mut count = 0;
570 for chunk in view.chunks(ptr_size) {
571 if chunk.len() < ptr_size {
572 break;
573 }
574 // Use a buffer sized for the platform's pointer size
575 let mut buf = [0u8; 16]; // Max pointer size is 16 bytes (128-bit)
576 buf[..ptr_size].copy_from_slice(chunk);
577 let v = if ptr_size == 8 {
578 usize::from_le_bytes(buf[..8].try_into().unwrap())
579 } else {
580 usize::from_le_bytes({
581 let mut arr = [0u8; 8];
582 arr[..ptr_size].copy_from_slice(&buf[..ptr_size]);
583 arr
584 })
585 };
586 if is_valid_ptr(v) {
587 count += 1;
588 }
589 }
590 count
591}
592
593#[cfg(test)]
594mod tests {
595 use super::*;
596
597 #[test]
598 fn test_memory_view_read_usize() {
599 let data: [u8; 16] = [
600 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
601 0x0e, 0x0f,
602 ];
603 let view = MemoryView::new(&data);
604
605 let val0 = view.read_usize(0).unwrap();
606 let val8 = view.read_usize(8).unwrap();
607
608 assert_eq!(val0, 0x0706050403020100);
609 assert_eq!(val8, 0x0f0e0d0c0b0a0908);
610 }
611
612 #[test]
613 fn test_memory_view_bounds_check() {
614 let data = [0u8; 8];
615 let view = MemoryView::new(&data);
616
617 assert!(view.read_usize(0).is_some());
618 assert!(view.read_usize(1).is_none());
619 assert!(view.read_usize(8).is_none());
620 }
621
622 #[test]
623 #[cfg(target_os = "macos")]
624 fn test_is_valid_ptr_static() {
625 assert!(!is_valid_ptr_static(0));
626 assert!(!is_valid_ptr_static(0x1000));
627 assert!(is_valid_ptr_static(0x10000));
628 assert!(is_valid_ptr_static(0x7fff_ffff_0000));
629 assert!(!is_valid_ptr_static(0xffff_ffff_ffff_ffff));
630 }
631
632 #[test]
633 #[cfg(target_os = "macos")]
634 fn test_is_valid_ptr() {
635 // Should work with either dynamic or static
636 assert!(!is_valid_ptr(0));
637 assert!(!is_valid_ptr(0x1000));
638 // These should pass with static fallback
639 assert!(is_valid_ptr(0x10000));
640 }
641
642 #[test]
643 #[cfg(target_os = "macos")]
644 fn test_count_valid_pointers() {
645 let mut data = [0u8; 24];
646 let valid_ptr: usize = 0x10000;
647 data[..8].copy_from_slice(&valid_ptr.to_le_bytes());
648
649 let view = MemoryView::new(&data);
650 assert_eq!(count_valid_pointers(&view), 1);
651 }
652
653 #[test]
654 fn test_valid_regions_contains() {
655 let regions = ValidRegions::empty();
656 // Empty regions should use static bounds
657 assert!(regions.contains(0x10000));
658 assert!(!regions.contains(0));
659 }
660
661 #[test]
662 fn test_valid_regions_from_regions() {
663 let regions = ValidRegions::from_regions(vec![
664 MemoryRegion {
665 start: 0x1000,
666 end: 0x2000,
667 },
668 MemoryRegion {
669 start: 0x3000,
670 end: 0x4000,
671 },
672 ]);
673
674 assert!(regions.is_dynamic());
675 assert!(regions.contains(0x1500));
676 assert!(regions.contains(0x3500));
677 assert!(!regions.contains(0x2500));
678 assert!(!regions.contains(0x5000));
679 }
680
681 #[test]
682 fn test_get_valid_regions() {
683 let regions = get_valid_regions();
684 // Should return something (dynamic or static)
685 // Just verify it doesn't panic
686 let _ = regions.contains(0x10000);
687 }
688}