Skip to main content

memscope_rs/analysis/unsafe_inference/
memory_view.rs

1//! Memory View - Safe Memory Access Layer
2//!
3//! Provides safe, bounds-checked access to memory content for type inference.
4//! All memory access goes through this layer - no raw pointer dereferencing.
5//!
6//! # ValidRegions
7//!
8//! Uses dynamic mmap region detection with static fallback:
9//! - Linux: Reads `/proc/self/maps` for precise regions
10//! - Other platforms: Uses static address range bounds
11
12use std::sync::RwLock;
13
14// Static fallback bounds for different platforms
15#[cfg(all(target_pointer_width = "64", target_os = "linux"))]
16const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_f000;
17
18#[cfg(all(target_pointer_width = "64", target_os = "macos"))]
19const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_f000;
20
21#[cfg(all(target_pointer_width = "64", target_os = "windows"))]
22const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_0000;
23
24#[cfg(all(
25    target_pointer_width = "64",
26    not(any(target_os = "linux", target_os = "macos", target_os = "windows"))
27))]
28const MAX_USER_ADDR: usize = 0x0000_7fff_ffff_ffff;
29
30#[cfg(target_pointer_width = "32")]
31const MAX_USER_ADDR: usize = 0x7fff_ffff;
32
33// Windows-specific heap end addresses for 32-bit vs 64-bit
34#[cfg(all(target_os = "windows", target_pointer_width = "64"))]
35const MAX_HEAP_END: usize = 0x7FFF_FFFF_FFFF_FFFF;
36
37#[cfg(all(target_os = "windows", target_pointer_width = "32"))]
38const MAX_HEAP_END: usize = 0x7FFF_FFFF;
39
40const MIN_VALID_ADDR: usize = 0x1000;
41
42/// Represents a valid memory region from process memory map.
43#[derive(Clone, Debug)]
44pub struct MemoryRegion {
45    pub start: usize,
46    pub end: usize,
47}
48
49/// Valid memory regions for pointer validation.
50///
51/// Uses dynamic detection on supported platforms with static fallback.
52#[derive(Clone, Debug, Default)]
53pub struct ValidRegions {
54    regions: Vec<MemoryRegion>,
55    is_dynamic: bool,
56}
57
58impl ValidRegions {
59    /// Create empty regions (will use static bounds).
60    pub fn empty() -> Self {
61        Self {
62            regions: Vec::new(),
63            is_dynamic: false,
64        }
65    }
66
67    /// Create from a list of memory regions.
68    pub fn from_regions(mut regions: Vec<MemoryRegion>) -> Self {
69        // Sort regions by start address for partition_point to work correctly
70        regions.sort_by_key(|r| r.start);
71        Self {
72            regions,
73            is_dynamic: true,
74        }
75    }
76
77    /// Check if an address falls within valid regions.
78    ///
79    /// If dynamic regions are available, uses precise checking.
80    /// Otherwise, falls back to static bounds.
81    pub fn contains(&self, addr: usize) -> bool {
82        if addr <= MIN_VALID_ADDR {
83            return false;
84        }
85
86        if self.is_dynamic && !self.regions.is_empty() {
87            // Use partition_point to find the first region where start > addr
88            // Then check if the previous region contains addr
89            let idx = self.regions.partition_point(|region| region.start <= addr);
90
91            if idx > 0 {
92                let region = &self.regions[idx - 1];
93                return addr < region.end;
94            }
95            false
96        } else {
97            // Static fallback
98            addr < MAX_USER_ADDR
99        }
100    }
101
102    /// Get the number of regions.
103    pub fn len(&self) -> usize {
104        self.regions.len()
105    }
106
107    /// Check if regions are empty.
108    pub fn is_empty(&self) -> bool {
109        self.regions.is_empty()
110    }
111
112    /// Check if using dynamic detection.
113    pub fn is_dynamic(&self) -> bool {
114        self.is_dynamic
115    }
116
117    /// Debug dump regions to stderr
118    #[cfg(test)]
119    pub fn debug_dump(&self) {
120        eprintln!("ValidRegions (is_dynamic={}):", self.is_dynamic);
121        for (i, region) in self.regions.iter().enumerate() {
122            eprintln!("  Region {}: 0x{:x} - 0x{:x}", i, region.start, region.end);
123        }
124    }
125}
126
127/// Global cached valid regions.
128static VALID_REGIONS: RwLock<Option<ValidRegions>> = RwLock::new(None);
129
130/// Merge overlapping or adjacent memory regions.
131#[cfg(target_os = "linux")]
132fn merge_regions(regions: Vec<MemoryRegion>) -> Vec<MemoryRegion> {
133    if regions.is_empty() {
134        return regions;
135    }
136
137    let mut merged: Vec<MemoryRegion> = Vec::with_capacity(regions.len());
138    // Use first() for safer access pattern
139    let mut current = regions
140        .first()
141        .expect("regions should not be empty after is_empty check")
142        .clone();
143
144    for region in regions.into_iter().skip(1) {
145        if region.start < current.end {
146            current.end = current.end.max(region.end);
147        } else {
148            merged.push(current);
149            current = region;
150        }
151    }
152    merged.push(current);
153
154    merged
155}
156
157/// Get valid memory regions for the current process.
158///
159/// Platform-specific implementation:
160/// - Linux: Reads `/proc/self/maps`
161/// - Other: Returns empty (uses static bounds)
162#[cfg(target_os = "linux")]
163fn get_valid_regions_impl() -> ValidRegions {
164    use std::fs;
165
166    let content = match fs::read_to_string("/proc/self/maps") {
167        Ok(c) => c,
168        Err(_) => {
169            // Fallback: use conservative estimates if /proc/self/maps is unavailable
170            return get_conservative_regions();
171        }
172    };
173
174    let mut regions: Vec<MemoryRegion> = content
175        .lines()
176        .filter_map(|line| {
177            let parts: Vec<&str> = line.split_whitespace().collect();
178            if parts.is_empty() {
179                return None;
180            }
181
182            // Parse address range (e.g., "7f1234567000-7f1234568000")
183            let range: Vec<&str> = parts[0].split('-').collect();
184            let (start_str, end_str) = match (range.first(), range.get(1)) {
185                (Some(&s), Some(&e)) => (s, e),
186                _ => return None,
187            };
188
189            let start = usize::from_str_radix(start_str, 16).ok()?;
190            let end = usize::from_str_radix(end_str, 16).ok()?;
191
192            // Filter to readable regions only (r-- or r-x or rw-)
193            let perms = parts.get(1)?;
194            if !perms.starts_with('r') {
195                return None;
196            }
197
198            Some(MemoryRegion { start, end })
199        })
200        .collect();
201
202    // Sort by start address for binary search
203    regions.sort_by_key(|r| r.start);
204
205    // Merge overlapping/adjacent regions
206    regions = merge_regions(regions);
207
208    // If no regions found, use conservative estimates
209    if regions.is_empty() {
210        return get_conservative_regions();
211    }
212
213    // /proc/self/maps already includes stack, heap, and all mapped regions
214    // No need to add additional regions
215    ValidRegions::from_regions(regions)
216}
217
218/// Get conservative memory regions as fallback
219#[cfg(target_os = "linux")]
220fn get_conservative_regions() -> ValidRegions {
221    let regions = vec![MemoryRegion {
222        start: 0x10000,
223        end: 0x7FFF_FFFF_FFFF_FFFF, // x64 address space
224    }];
225
226    ValidRegions::from_regions(regions)
227}
228
229/// Get valid memory regions for the current process (Windows).
230///
231/// Uses a conservative approach to detect valid memory regions:
232/// - Single wide region covering entire user-space address range
233#[cfg(target_os = "windows")]
234fn get_valid_regions_impl() -> ValidRegions {
235    let mut regions = Vec::new();
236
237    // Add single wide region covering entire user-space
238    // Windows allocators can place memory anywhere in the address space
239    regions.push(MemoryRegion {
240        start: 0x10000,
241        end: MAX_HEAP_END, // Platform-specific: 64-bit or 32-bit
242    });
243
244    ValidRegions::from_regions(regions)
245}
246
247/// Get valid memory regions for the current process (macOS).
248///
249/// Uses a conservative approach to detect valid memory regions:
250/// - Stack region: 8MB below and above current stack pointer
251/// - Heap regions: multiple ranges to cover different allocators
252#[cfg(target_os = "macos")]
253fn get_valid_regions_impl() -> ValidRegions {
254    // Add very wide heap region to cover all possible allocations
255    // macOS can allocate memory in various ranges depending on the allocator
256    // This covers the entire user-space address range
257    let regions = vec![MemoryRegion {
258        start: 0x1000,              // Start from a low address
259        end: 0x7FFF_FFFF_FFFF_FFFF, // Up to max 64-bit address
260    }];
261
262    // Note: We use a single wide region instead of separate stack/heap regions
263    // because macOS allocators can place memory anywhere in the address space.
264    // The stack is already covered by this wide range.
265
266    ValidRegions::from_regions(regions)
267}
268
269/// Get valid memory regions for the current process (non-Linux, non-Windows, non-macOS).
270/// Uses conservative approach as fallback for unknown platforms.
271#[cfg(all(
272    not(target_os = "linux"),
273    not(target_os = "windows"),
274    not(target_os = "macos")
275))]
276fn get_valid_regions_impl() -> ValidRegions {
277    let mut regions = Vec::new();
278
279    // Add single wide region covering entire user-space
280    regions.push(MemoryRegion {
281        start: 0x10000,
282        end: 0x7FF_FFFFF_FFFF_FFFF,
283    });
284
285    ValidRegions::from_regions(regions)
286}
287
288/// Get cached valid regions, initializing if needed.
289pub fn get_valid_regions() -> ValidRegions {
290    // Fast path: check if already initialized
291    {
292        let read_guard = match VALID_REGIONS.read() {
293            Ok(guard) => guard,
294            Err(poisoned) => poisoned.into_inner(),
295        };
296        if read_guard.is_some() {
297            return read_guard
298                .as_ref()
299                .cloned()
300                .expect("VALID_REGIONS should be Some after is_some() check (read path)");
301        }
302    }
303
304    // Slow path: need to initialize
305    // Use write lock and double-check to prevent TOCTOU race
306    let mut write_guard = match VALID_REGIONS.write() {
307        Ok(guard) => guard,
308        Err(poisoned) => poisoned.into_inner(),
309    };
310
311    // Double-check after acquiring write lock
312    if write_guard.is_some() {
313        return write_guard
314            .as_ref()
315            .cloned()
316            .expect("VALID_REGIONS should be Some after is_some() check (write path)");
317    }
318
319    // Initialize while holding the write lock
320    let regions = get_valid_regions_impl();
321    *write_guard = Some(regions.clone());
322    regions
323}
324
325/// Check if a pointer value is valid using dynamic regions with static fallback.
326pub fn is_valid_ptr(p: usize) -> bool {
327    get_valid_regions().contains(p)
328}
329
330/// Check if a pointer value is valid using only static bounds.
331pub fn is_valid_ptr_static(p: usize) -> bool {
332    p > MIN_VALID_ADDR && p < MAX_USER_ADDR
333}
334
335/// Memory view for safe memory access.
336pub struct MemoryView<'a> {
337    data: &'a [u8],
338}
339
340/// Owned memory view that owns its data.
341///
342/// This is a non-reference version of `MemoryView` that owns the underlying
343/// buffer. Useful when the memory view needs to outlive the original scope.
344///
345/// # When to Use OwnedMemoryView vs MemoryView
346///
347/// | Scenario | Use |
348/// |----------|-----|
349/// | Temporary analysis within a function | `MemoryView<&[u8]>` |
350/// | Storing memory data for later use | `OwnedMemoryView` |
351/// | Returning memory data from a function | `OwnedMemoryView` |
352/// | Zero-copy analysis | `MemoryView<&[u8]>` |
353///
354/// # Lifetime Management
355///
356/// `OwnedMemoryView` owns its data via `Vec<u8>`, so it has no lifetime parameter.
357/// This means:
358/// - The data remains valid as long as the `OwnedMemoryView` exists
359/// - No need to worry about the underlying data being dropped
360/// - Slightly higher memory overhead due to ownership
361///
362/// # Example
363///
364/// ```rust
365/// use memscope_rs::analysis::unsafe_inference::OwnedMemoryView;
366///
367/// // Create from a vector (takes ownership)
368/// let data = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
369/// let view = OwnedMemoryView::new(data);
370///
371/// // Read values safely
372/// if let Some(value) = view.read_usize(0) {
373///     println!("First usize: {}", value);
374/// }
375///
376/// // Check bounds
377/// if let Some(byte) = view.read_u8(10) {
378///     println!("Byte at offset 10: {}", byte);
379/// } else {
380///     println!("Offset 10 out of bounds");
381/// }
382///
383/// // Access raw slice when needed
384/// let slice = view.as_slice();
385/// println!("Total bytes: {}", slice.len());
386/// ```
387///
388/// # Memory Safety
389///
390/// All read methods perform bounds checking and return `Option` types.
391/// This ensures safe access even with invalid offsets:
392///
393/// ```rust
394/// use memscope_rs::unsafe_inference::OwnedMemoryView;
395///
396/// let view = OwnedMemoryView::new(vec![0u8; 4]);
397///
398/// // This returns None (out of bounds)
399/// assert!(view.read_usize(0).is_none());
400///
401/// // This returns None (offset + size > len)
402/// assert!(view.read_usize(1).is_none());
403/// ```
404pub struct OwnedMemoryView {
405    data: Vec<u8>,
406}
407
408impl OwnedMemoryView {
409    /// Create a new `OwnedMemoryView` from a `Vec<u8>`.
410    ///
411    /// This takes ownership of the vector, so no copying occurs.
412    ///
413    /// # Example
414    ///
415    /// ```rust
416    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
417    /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4]);
418    /// assert_eq!(view.len(), 4);
419    /// ```
420    pub fn new(data: Vec<u8>) -> Self {
421        Self { data }
422    }
423
424    /// Returns the length of the underlying data.
425    ///
426    /// # Example
427    ///
428    /// ```rust
429    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
430    /// let view = OwnedMemoryView::new(vec![1, 2, 3]);
431    /// assert_eq!(view.len(), 3);
432    /// ```
433    pub fn len(&self) -> usize {
434        self.data.len()
435    }
436
437    /// Returns `true` if the underlying data is empty.
438    ///
439    /// # Example
440    ///
441    /// ```rust
442    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
443    /// let view = OwnedMemoryView::new(vec![]);
444    /// assert!(view.is_empty());
445    /// ```
446    pub fn is_empty(&self) -> bool {
447        self.data.is_empty()
448    }
449
450    /// Read a `usize` value from the specified offset.
451    ///
452    /// Reads `std::mem::size_of::<usize>()` bytes starting at `offset`
453    /// and interprets them as a little-endian `usize`.
454    ///
455    /// Returns `None` if the read would exceed the buffer bounds.
456    ///
457    /// # Example
458    ///
459    /// ```rust
460    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
461    /// let data = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
462    /// let view = OwnedMemoryView::new(data);
463    ///
464    /// if let Some(value) = view.read_usize(0) {
465    ///     println!("Value: 0x{:x}", value);
466    /// }
467    /// ```
468    pub fn read_usize(&self, offset: usize) -> Option<usize> {
469        let size = std::mem::size_of::<usize>();
470        if offset.saturating_add(size) > self.data.len() {
471            return None;
472        }
473        let mut buf = [0u8; 8];
474        buf[..size].copy_from_slice(&self.data[offset..offset + size]);
475        Some(usize::from_le_bytes(buf))
476    }
477
478    /// Read a single byte from the specified offset.
479    ///
480    /// Returns `None` if the offset is out of bounds.
481    ///
482    /// # Example
483    ///
484    /// ```rust
485    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
486    /// let view = OwnedMemoryView::new(vec![0x10, 0x20, 0x30]);
487    ///
488    /// assert_eq!(view.read_u8(0), Some(0x10));
489    /// assert_eq!(view.read_u8(2), Some(0x30));
490    /// assert_eq!(view.read_u8(3), None); // out of bounds
491    /// ```
492    pub fn read_u8(&self, offset: usize) -> Option<u8> {
493        self.data.get(offset).copied()
494    }
495
496    /// Returns a slice of the underlying data.
497    ///
498    /// This provides direct access to the bytes without copying.
499    ///
500    /// # Example
501    ///
502    /// ```rust
503    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
504    /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4, 5]);
505    /// let slice = view.as_slice();
506    /// assert_eq!(slice, &[1, 2, 3, 4, 5]);
507    /// ```
508    pub fn as_slice(&self) -> &[u8] {
509        &self.data
510    }
511
512    /// Returns an iterator over chunks of the underlying data.
513    ///
514    /// Each chunk has at most `chunk_size` elements.
515    ///
516    /// # Example
517    ///
518    /// ```rust
519    /// use memscope_rs::unsafe_inference::OwnedMemoryView;
520    /// let view = OwnedMemoryView::new(vec![1, 2, 3, 4, 5, 6]);
521    /// let chunks: Vec<_> = view.chunks(2).collect();
522    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5, 6]]);
523    /// ```
524    pub fn chunks(&self, chunk_size: usize) -> impl Iterator<Item = &[u8]> {
525        self.data.chunks(chunk_size)
526    }
527}
528
529impl<'a> MemoryView<'a> {
530    pub fn new(data: &'a [u8]) -> Self {
531        Self { data }
532    }
533
534    pub fn len(&self) -> usize {
535        self.data.len()
536    }
537
538    pub fn is_empty(&self) -> bool {
539        self.data.is_empty()
540    }
541
542    pub fn read_usize(&self, offset: usize) -> Option<usize> {
543        let size = std::mem::size_of::<usize>();
544        if offset.saturating_add(size) > self.data.len() {
545            return None;
546        }
547        let mut buf = [0u8; 8];
548        buf[..size].copy_from_slice(&self.data[offset..offset + size]);
549        Some(usize::from_le_bytes(buf))
550    }
551
552    pub fn read_u8(&self, offset: usize) -> Option<u8> {
553        self.data.get(offset).copied()
554    }
555
556    pub fn last_byte(&self) -> Option<u8> {
557        self.data.last().copied()
558    }
559
560    pub fn as_slice(&self) -> &'a [u8] {
561        self.data
562    }
563
564    pub fn chunks(&self, chunk_size: usize) -> impl Iterator<Item = &'a [u8]> {
565        self.data.chunks(chunk_size)
566    }
567}
568
569/// Count valid pointers in a memory view.
570pub fn count_valid_pointers(view: &MemoryView) -> usize {
571    let ptr_size = std::mem::size_of::<usize>();
572    let mut count = 0;
573    for chunk in view.chunks(ptr_size) {
574        if chunk.len() < ptr_size {
575            break;
576        }
577        // Use a buffer sized for the platform's pointer size
578        let mut buf = [0u8; 16]; // Max pointer size is 16 bytes (128-bit)
579        buf[..ptr_size].copy_from_slice(chunk);
580        let v = if ptr_size == 8 {
581            usize::from_le_bytes(buf[..8].try_into().unwrap())
582        } else {
583            usize::from_le_bytes({
584                let mut arr = [0u8; 8];
585                arr[..ptr_size].copy_from_slice(&buf[..ptr_size]);
586                arr
587            })
588        };
589        if is_valid_ptr(v) {
590            count += 1;
591        }
592    }
593    count
594}
595
596#[cfg(test)]
597mod tests {
598    use super::*;
599
600    #[test]
601    fn test_memory_view_read_usize() {
602        let data: [u8; 16] = [
603            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
604            0x0e, 0x0f,
605        ];
606        let view = MemoryView::new(&data);
607
608        let val0 = view.read_usize(0).unwrap();
609        let val8 = view.read_usize(8).unwrap();
610
611        assert_eq!(val0, 0x0706050403020100);
612        assert_eq!(val8, 0x0f0e0d0c0b0a0908);
613    }
614
615    #[test]
616    fn test_memory_view_bounds_check() {
617        let data = [0u8; 8];
618        let view = MemoryView::new(&data);
619
620        assert!(view.read_usize(0).is_some());
621        assert!(view.read_usize(1).is_none());
622        assert!(view.read_usize(8).is_none());
623    }
624
625    #[test]
626    #[cfg(target_os = "macos")]
627    fn test_is_valid_ptr_static() {
628        assert!(!is_valid_ptr_static(0));
629        assert!(!is_valid_ptr_static(0x1000));
630        assert!(is_valid_ptr_static(0x10000));
631        assert!(is_valid_ptr_static(0x7fff_ffff_0000));
632        assert!(!is_valid_ptr_static(0xffff_ffff_ffff_ffff));
633    }
634
635    #[test]
636    #[cfg(target_os = "macos")]
637    fn test_is_valid_ptr() {
638        // Should work with either dynamic or static
639        assert!(!is_valid_ptr(0));
640        assert!(!is_valid_ptr(0x1000));
641        // These should pass with static fallback
642        assert!(is_valid_ptr(0x10000));
643    }
644
645    #[test]
646    #[cfg(target_os = "macos")]
647    fn test_count_valid_pointers() {
648        let mut data = [0u8; 24];
649        let valid_ptr: usize = 0x10000;
650        data[..8].copy_from_slice(&valid_ptr.to_le_bytes());
651
652        let view = MemoryView::new(&data);
653        assert_eq!(count_valid_pointers(&view), 1);
654    }
655
656    #[test]
657    fn test_valid_regions_contains() {
658        let regions = ValidRegions::empty();
659        // Empty regions should use static bounds
660        assert!(regions.contains(0x10000));
661        assert!(!regions.contains(0));
662    }
663
664    #[test]
665    fn test_valid_regions_from_regions() {
666        let regions = ValidRegions::from_regions(vec![
667            MemoryRegion {
668                start: 0x1000,
669                end: 0x2000,
670            },
671            MemoryRegion {
672                start: 0x3000,
673                end: 0x4000,
674            },
675        ]);
676
677        assert!(regions.is_dynamic());
678        assert!(regions.contains(0x1500));
679        assert!(regions.contains(0x3500));
680        assert!(!regions.contains(0x2500));
681        assert!(!regions.contains(0x5000));
682    }
683
684    #[test]
685    fn test_get_valid_regions() {
686        let regions = get_valid_regions();
687        // Should return something (dynamic or static)
688        // Just verify it doesn't panic
689        let _ = regions.contains(0x10000);
690    }
691}