Skip to main content

memscope_rs/analysis/unsafe_inference/
memory_view.rs

1//! Memory View - Safe Memory Access Layer
2//!
3//! Provides safe, bounds-checked access to memory content for type inference.
4//! All memory access goes through this layer - no raw pointer dereferencing.
5//!
6//! # ValidRegions
7//!
8//! Uses dynamic mmap region detection with static fallback:
9//! - Linux: Reads `/proc/self/maps` for precise regions
10//! - Other platforms: Uses static address range bounds
11
12use std::sync::RwLock;
13
14// Static fallback bounds for different platforms
15#[cfg(all(target_pointer_width = "64", target_os = "linux"))]
16const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_f000;
17
18#[cfg(all(target_pointer_width = "64", target_os = "macos"))]
19const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_f000;
20
21#[cfg(all(target_pointer_width = "64", target_os = "windows"))]
22const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_0000;
23
24#[cfg(all(
25    target_pointer_width = "64",
26    not(any(target_os = "linux", target_os = "macos", target_os = "windows"))
27))]
28const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_ffff;
29
30#[cfg(target_pointer_width = "32")]
31const MAX_USER_ADDR: usize = 0x7fff_ffff;
32
33// Windows-specific heap end addresses for 32-bit vs 64-bit
34#[cfg(all(target_os = "windows", target_pointer_width = "64"))]
35const MAX_HEAP_END: usize = 0x7FFF_FFFF_FFFF_FFFF;
36
37#[cfg(all(target_os = "windows", target_pointer_width = "32"))]
38const MAX_HEAP_END: usize = 0x7FFF_FFFF;
39
40const MIN_VALID_ADDR: usize = 0x1000;
41
42/// Represents a valid memory region from process memory map.
43#[derive(Clone, Debug)]
44pub struct MemoryRegion {
45    pub start: usize,
46    pub end: usize,
47}
48
49/// Valid memory regions for pointer validation.
50///
51/// Uses dynamic detection on supported platforms with static fallback.
52#[derive(Clone, Debug, Default)]
53pub struct ValidRegions {
54    regions: Vec<MemoryRegion>,
55    is_dynamic: bool,
56}
57
58impl ValidRegions {
59    /// Create empty regions (will use static bounds).
60    pub fn empty() -> Self {
61        Self {
62            regions: Vec::new(),
63            is_dynamic: false,
64        }
65    }
66
67    /// Create from a list of memory regions.
68    pub fn from_regions(mut regions: Vec<MemoryRegion>) -> Self {
69        // Sort regions by start address for partition_point to work correctly
70        regions.sort_by_key(|r| r.start);
71        Self {
72            regions,
73            is_dynamic: true,
74        }
75    }
76
77    /// Check if an address falls within valid regions.
78    ///
79    /// If dynamic regions are available, uses precise checking.
80    /// Otherwise, falls back to static bounds.
81    pub fn contains(&self, addr: usize) -> bool {
82        if addr <= MIN_VALID_ADDR {
83            return false;
84        }
85
86        if self.is_dynamic && !self.regions.is_empty() {
87            // Use partition_point to find the first region where start > addr
88            // Then check if the previous region contains addr
89            let idx = self.regions.partition_point(|region| region.start <= addr);
90
91            if idx > 0 {
92                let region = &self.regions[idx - 1];
93                return addr < region.end;
94            }
95            false
96        } else {
97            // Static fallback
98            addr < MAX_USER_ADDR
99        }
100    }
101
102    /// Get the number of regions.
103    pub fn len(&self) -> usize {
104        self.regions.len()
105    }
106
107    /// Check if regions are empty.
108    pub fn is_empty(&self) -> bool {
109        self.regions.is_empty()
110    }
111
112    /// Check if using dynamic detection.
113    pub fn is_dynamic(&self) -> bool {
114        self.is_dynamic
115    }
116
117    /// Debug dump regions to stderr
118    #[cfg(test)]
119    pub fn debug_dump(&self) {
120        eprintln!("ValidRegions (is_dynamic={}):", self.is_dynamic);
121        for (i, region) in self.regions.iter().enumerate() {
122            eprintln!("  Region {}: 0x{:x} - 0x{:x}", i, region.start, region.end);
123        }
124    }
125}
126
127/// Global cached valid regions.
128static VALID_REGIONS: RwLock<Option<ValidRegions>> = RwLock::new(None);
129
130/// Merge overlapping or adjacent memory regions.
131#[cfg(target_os = "linux")]
132fn merge_regions(regions: Vec<MemoryRegion>) -> Vec<MemoryRegion> {
133    if regions.is_empty() {
134        return regions;
135    }
136
137    let mut merged: Vec<MemoryRegion> = Vec::with_capacity(regions.len());
138    let mut current = regions[0].clone();
139
140    for region in regions.into_iter().skip(1) {
141        if region.start < current.end {
142            current.end = current.end.max(region.end);
143        } else {
144            merged.push(current);
145            current = region;
146        }
147    }
148    merged.push(current);
149
150    merged
151}
152
153/// Get valid memory regions for the current process.
154///
155/// Platform-specific implementation:
156/// - Linux: Reads `/proc/self/maps`
157/// - Other: Returns empty (uses static bounds)
158#[cfg(target_os = "linux")]
159fn get_valid_regions_impl() -> ValidRegions {
160    use std::fs;
161
162    let content = match fs::read_to_string("/proc/self/maps") {
163        Ok(c) => c,
164        Err(_) => {
165            // Fallback: use conservative estimates if /proc/self/maps is unavailable
166            return get_conservative_regions();
167        }
168    };
169
170    let mut regions: Vec<MemoryRegion> = content
171        .lines()
172        .filter_map(|line| {
173            let parts: Vec<&str> = line.split_whitespace().collect();
174            if parts.is_empty() {
175                return None;
176            }
177
178            let range: Vec<&str> = parts[0].split('-').collect();
179            if range.len() != 2 {
180                return None;
181            }
182
183            let start = usize::from_str_radix(range[0], 16).ok()?;
184            let end = usize::from_str_radix(range[1], 16).ok()?;
185
186            // Filter to readable regions only (r-- or r-x or rw-)
187            if parts.len() < 2 {
188                return None;
189            }
190            let perms = parts[1];
191            if !perms.starts_with('r') {
192                return None;
193            }
194
195            Some(MemoryRegion { start, end })
196        })
197        .collect();
198
199    // Sort by start address for binary search
200    regions.sort_by_key(|r| r.start);
201
202    // Merge overlapping/adjacent regions
203    regions = merge_regions(regions);
204
205    // If no regions found, use conservative estimates
206    if regions.is_empty() {
207        return get_conservative_regions();
208    }
209
210    // /proc/self/maps already includes stack, heap, and all mapped regions
211    // No need to add additional regions
212    ValidRegions::from_regions(regions)
213}
214
215/// Get conservative memory regions as fallback
216#[cfg(target_os = "linux")]
217fn get_conservative_regions() -> ValidRegions {
218    let regions = vec![MemoryRegion {
219        start: 0x10000,
220        end: 0x7FFF_FFFF_FFFF_FFFF, // x64 address space
221    }];
222
223    ValidRegions::from_regions(regions)
224}
225
226/// Get valid memory regions for the current process (Windows).
227///
228/// Uses a conservative approach to detect valid memory regions:
229/// - Single wide region covering entire user-space address range
230#[cfg(target_os = "windows")]
231fn get_valid_regions_impl() -> ValidRegions {
232    let mut regions = Vec::new();
233
234    // Add single wide region covering entire user-space
235    // Windows allocators can place memory anywhere in the address space
236    regions.push(MemoryRegion {
237        start: 0x10000,
238        end: MAX_HEAP_END, // Platform-specific: 64-bit or 32-bit
239    });
240
241    ValidRegions::from_regions(regions)
242}
243
244/// Get valid memory regions for the current process (macOS).
245///
246/// Uses a conservative approach to detect valid memory regions:
247/// - Stack region: 8MB below and above current stack pointer
248/// - Heap regions: multiple ranges to cover different allocators
249#[cfg(target_os = "macos")]
250fn get_valid_regions_impl() -> ValidRegions {
251    // Add very wide heap region to cover all possible allocations
252    // macOS can allocate memory in various ranges depending on the allocator
253    // This covers the entire user-space address range
254    let regions = vec![MemoryRegion {
255        start: 0x1000,              // Start from a low address
256        end: 0x7FFF_FFFF_FFFF_FFFF, // Up to max 64-bit address
257    }];
258
259    // Note: We use a single wide region instead of separate stack/heap regions
260    // because macOS allocators can place memory anywhere in the address space.
261    // The stack is already covered by this wide range.
262
263    ValidRegions::from_regions(regions)
264}
265
266/// Get valid memory regions for the current process (non-Linux, non-Windows, non-macOS).
267/// Uses conservative approach as fallback for unknown platforms.
268#[cfg(all(
269    not(target_os = "linux"),
270    not(target_os = "windows"),
271    not(target_os = "macos")
272))]
273fn get_valid_regions_impl() -> ValidRegions {
274    let mut regions = Vec::new();
275
276    // Add single wide region covering entire user-space
277    regions.push(MemoryRegion {
278        start: 0x10000,
279        end: 0x7FF_FFFFF_FFFF_FFFF,
280    });
281
282    ValidRegions::from_regions(regions)
283}
284
285/// Get cached valid regions, initializing if needed.
286pub fn get_valid_regions() -> ValidRegions {
287    // Fast path: check if already initialized
288    {
289        let read_guard = match VALID_REGIONS.read() {
290            Ok(guard) => guard,
291            Err(poisoned) => poisoned.into_inner(),
292        };
293        if read_guard.is_some() {
294            return read_guard
295                .as_ref()
296                .cloned()
297                .expect("VALID_REGIONS should be Some after is_some() check (read path)");
298        }
299    }
300
301    // Slow path: need to initialize
302    // Use write lock and double-check to prevent TOCTOU race
303    let mut write_guard = match VALID_REGIONS.write() {
304        Ok(guard) => guard,
305        Err(poisoned) => poisoned.into_inner(),
306    };
307
308    // Double-check after acquiring write lock
309    if write_guard.is_some() {
310        return write_guard
311            .as_ref()
312            .cloned()
313            .expect("VALID_REGIONS should be Some after is_some() check (write path)");
314    }
315
316    // Initialize while holding the write lock
317    let regions = get_valid_regions_impl();
318    *write_guard = Some(regions.clone());
319    regions
320}
321
322/// Check if a pointer value is valid using dynamic regions with static fallback.
323pub fn is_valid_ptr(p: usize) -> bool {
324    get_valid_regions().contains(p)
325}
326
327/// Check if a pointer value is valid using only static bounds.
328pub fn is_valid_ptr_static(p: usize) -> bool {
329    p > MIN_VALID_ADDR && p < MAX_USER_ADDR
330}
331
332/// Memory view for safe memory access.
333pub struct MemoryView<'a> {
334    data: &'a [u8],
335}
336
337/// Owned memory view that owns its data.
338///
339/// This is a non-reference version of `MemoryView` that owns the underlying
340/// buffer. Useful when the memory view needs to outlive the original scope.
341///
342/// # When to Use OwnedMemoryView vs MemoryView
343///
344/// | Scenario | Use |
345/// |----------|-----|
346/// | Temporary analysis within a function | `MemoryView<&[u8]>` |
347/// | Storing memory data for later use | `OwnedMemoryView` |
348/// | Returning memory data from a function | `OwnedMemoryView` |
349/// | Zero-copy analysis | `MemoryView<&[u8]>` |
350///
351/// # Lifetime Management
352///
353/// `OwnedMemoryView` owns its data via `Vec<u8>`, so it has no lifetime parameter.
354/// This means:
355/// - The data remains valid as long as the `OwnedMemoryView` exists
356/// - No need to worry about the underlying data being dropped
357/// - Slightly higher memory overhead due to ownership
358///
359/// # Example
360///
361/// ```rust
362/// use memscope_rs::analysis::unsafe_inference::OwnedMemoryView;
363///
364/// // Create from a vector (takes ownership)
365/// let data = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
366/// let view = OwnedMemoryView::new(data);
367///
368/// // Read values safely
369/// if let Some(value) = view.read_usize(0) {
370///     println!("First usize: {}", value);
371/// }
372///
373/// // Check bounds
374/// if let Some(byte) = view.read_u8(10) {
375///     println!("Byte at offset 10: {}", byte);
376/// } else {
377///     println!("Offset 10 out of bounds");
378/// }
379///
380/// // Access raw slice when needed
381/// let slice = view.as_slice();
382/// println!("Total bytes: {}", slice.len());
383/// ```
384///
385/// # Memory Safety
386///
387/// All read methods perform bounds checking and return `Option` types.
388/// This ensures safe access even with invalid offsets:
389///
390/// ```rust
391/// use memscope_rs::unsafe_inference::OwnedMemoryView;
392///
393/// let view = OwnedMemoryView::new(vec![0u8; 4]);
394///
395/// // This returns None (out of bounds)
396/// assert!(view.read_usize(0).is_none());
397///
398/// // This returns None (offset + size > len)
399/// assert!(view.read_usize(1).is_none());
400/// ```
401pub struct OwnedMemoryView {
402    data: Vec<u8>,
403}
404
405impl OwnedMemoryView {
406    /// Create a new `OwnedMemoryView` from a `Vec<u8>`.
407    ///
408    /// This takes ownership of the vector, so no copying occurs.
409    ///
410    /// # Example
411    ///
412    /// ```rust
413    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
414    /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4]);
415    /// assert_eq!(view.len(), 4);
416    /// ```
417    pub fn new(data: Vec<u8>) -> Self {
418        Self { data }
419    }
420
421    /// Returns the length of the underlying data.
422    ///
423    /// # Example
424    ///
425    /// ```rust
426    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
427    /// let view = OwnedMemoryView::new(vec![1, 2, 3]);
428    /// assert_eq!(view.len(), 3);
429    /// ```
430    pub fn len(&self) -> usize {
431        self.data.len()
432    }
433
434    /// Returns `true` if the underlying data is empty.
435    ///
436    /// # Example
437    ///
438    /// ```rust
439    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
440    /// let view = OwnedMemoryView::new(vec![]);
441    /// assert!(view.is_empty());
442    /// ```
443    pub fn is_empty(&self) -> bool {
444        self.data.is_empty()
445    }
446
447    /// Read a `usize` value from the specified offset.
448    ///
449    /// Reads `std::mem::size_of::<usize>()` bytes starting at `offset`
450    /// and interprets them as a little-endian `usize`.
451    ///
452    /// Returns `None` if the read would exceed the buffer bounds.
453    ///
454    /// # Example
455    ///
456    /// ```rust
457    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
458    /// let data = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
459    /// let view = OwnedMemoryView::new(data);
460    ///
461    /// if let Some(value) = view.read_usize(0) {
462    ///     println!("Value: 0x{:x}", value);
463    /// }
464    /// ```
465    pub fn read_usize(&self, offset: usize) -> Option<usize> {
466        let size = std::mem::size_of::<usize>();
467        if offset.saturating_add(size) > self.data.len() {
468            return None;
469        }
470        let mut buf = [0u8; 8];
471        buf[..size].copy_from_slice(&self.data[offset..offset + size]);
472        Some(usize::from_le_bytes(buf))
473    }
474
475    /// Read a single byte from the specified offset.
476    ///
477    /// Returns `None` if the offset is out of bounds.
478    ///
479    /// # Example
480    ///
481    /// ```rust
482    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
483    /// let view = OwnedMemoryView::new(vec![0x10, 0x20, 0x30]);
484    ///
485    /// assert_eq!(view.read_u8(0), Some(0x10));
486    /// assert_eq!(view.read_u8(2), Some(0x30));
487    /// assert_eq!(view.read_u8(3), None); // out of bounds
488    /// ```
489    pub fn read_u8(&self, offset: usize) -> Option<u8> {
490        self.data.get(offset).copied()
491    }
492
493    /// Returns a slice of the underlying data.
494    ///
495    /// This provides direct access to the bytes without copying.
496    ///
497    /// # Example
498    ///
499    /// ```rust
500    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
501    /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4, 5]);
502    /// let slice = view.as_slice();
503    /// assert_eq!(slice, &[1, 2, 3, 4, 5]);
504    /// ```
505    pub fn as_slice(&self) -> &[u8] {
506        &self.data
507    }
508
509    /// Returns an iterator over chunks of the underlying data.
510    ///
511    /// Each chunk has at most `chunk_size` elements.
512    ///
513    /// # Example
514    ///
515    /// ```rust
516    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
517    /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4, 5, 6]);
518    /// let chunks: Vec<_> = view.chunks(2).collect();
519    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5, 6]]);
520    /// ```
521    pub fn chunks(&self, chunk_size: usize) -> impl Iterator<Item = &[u8]> {
522        self.data.chunks(chunk_size)
523    }
524}
525
526impl<'a> MemoryView<'a> {
527    pub fn new(data: &'a [u8]) -> Self {
528        Self { data }
529    }
530
531    pub fn len(&self) -> usize {
532        self.data.len()
533    }
534
535    pub fn is_empty(&self) -> bool {
536        self.data.is_empty()
537    }
538
539    pub fn read_usize(&self, offset: usize) -> Option<usize> {
540        let size = std::mem::size_of::<usize>();
541        if offset.saturating_add(size) > self.data.len() {
542            return None;
543        }
544        let mut buf = [0u8; 8];
545        buf[..size].copy_from_slice(&self.data[offset..offset + size]);
546        Some(usize::from_le_bytes(buf))
547    }
548
549    pub fn read_u8(&self, offset: usize) -> Option<u8> {
550        self.data.get(offset).copied()
551    }
552
553    pub fn last_byte(&self) -> Option<u8> {
554        self.data.last().copied()
555    }
556
557    pub fn as_slice(&self) -> &'a [u8] {
558        self.data
559    }
560
561    pub fn chunks(&self, chunk_size: usize) -> impl Iterator<Item = &'a [u8]> {
562        self.data.chunks(chunk_size)
563    }
564}
565
566/// Count valid pointers in a memory view.
567pub fn count_valid_pointers(view: &MemoryView) -> usize {
568    let ptr_size = std::mem::size_of::<usize>();
569    let mut count = 0;
570    for chunk in view.chunks(ptr_size) {
571        if chunk.len() < ptr_size {
572            break;
573        }
574        // Use a buffer sized for the platform's pointer size
575        let mut buf = [0u8; 16]; // Max pointer size is 16 bytes (128-bit)
576        buf[..ptr_size].copy_from_slice(chunk);
577        let v = if ptr_size == 8 {
578            usize::from_le_bytes(buf[..8].try_into().unwrap())
579        } else {
580            usize::from_le_bytes({
581                let mut arr = [0u8; 8];
582                arr[..ptr_size].copy_from_slice(&buf[..ptr_size]);
583                arr
584            })
585        };
586        if is_valid_ptr(v) {
587            count += 1;
588        }
589    }
590    count
591}
592
593#[cfg(test)]
594mod tests {
595    use super::*;
596
597    #[test]
598    fn test_memory_view_read_usize() {
599        let data: [u8; 16] = [
600            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
601            0x0e, 0x0f,
602        ];
603        let view = MemoryView::new(&data);
604
605        let val0 = view.read_usize(0).unwrap();
606        let val8 = view.read_usize(8).unwrap();
607
608        assert_eq!(val0, 0x0706050403020100);
609        assert_eq!(val8, 0x0f0e0d0c0b0a0908);
610    }
611
612    #[test]
613    fn test_memory_view_bounds_check() {
614        let data = [0u8; 8];
615        let view = MemoryView::new(&data);
616
617        assert!(view.read_usize(0).is_some());
618        assert!(view.read_usize(1).is_none());
619        assert!(view.read_usize(8).is_none());
620    }
621
622    #[test]
623    #[cfg(target_os = "macos")]
624    fn test_is_valid_ptr_static() {
625        assert!(!is_valid_ptr_static(0));
626        assert!(!is_valid_ptr_static(0x1000));
627        assert!(is_valid_ptr_static(0x10000));
628        assert!(is_valid_ptr_static(0x7fff_ffff_0000));
629        assert!(!is_valid_ptr_static(0xffff_ffff_ffff_ffff));
630    }
631
632    #[test]
633    #[cfg(target_os = "macos")]
634    fn test_is_valid_ptr() {
635        // Should work with either dynamic or static
636        assert!(!is_valid_ptr(0));
637        assert!(!is_valid_ptr(0x1000));
638        // These should pass with static fallback
639        assert!(is_valid_ptr(0x10000));
640    }
641
642    #[test]
643    #[cfg(target_os = "macos")]
644    fn test_count_valid_pointers() {
645        let mut data = [0u8; 24];
646        let valid_ptr: usize = 0x10000;
647        data[..8].copy_from_slice(&valid_ptr.to_le_bytes());
648
649        let view = MemoryView::new(&data);
650        assert_eq!(count_valid_pointers(&view), 1);
651    }
652
653    #[test]
654    fn test_valid_regions_contains() {
655        let regions = ValidRegions::empty();
656        // Empty regions should use static bounds
657        assert!(regions.contains(0x10000));
658        assert!(!regions.contains(0));
659    }
660
661    #[test]
662    fn test_valid_regions_from_regions() {
663        let regions = ValidRegions::from_regions(vec![
664            MemoryRegion {
665                start: 0x1000,
666                end: 0x2000,
667            },
668            MemoryRegion {
669                start: 0x3000,
670                end: 0x4000,
671            },
672        ]);
673
674        assert!(regions.is_dynamic());
675        assert!(regions.contains(0x1500));
676        assert!(regions.contains(0x3500));
677        assert!(!regions.contains(0x2500));
678        assert!(!regions.contains(0x5000));
679    }
680
681    #[test]
682    fn test_get_valid_regions() {
683        let regions = get_valid_regions();
684        // Should return something (dynamic or static)
685        // Just verify it doesn't panic
686        let _ = regions.contains(0x10000);
687    }
688}