Skip to main content

memscope_rs/analysis/heap_scanner/
reader.rs

1//! Safe heap memory reader with page-wise validation.
2//!
3//! Provides the `HeapScanner` which reads allocation memory content
4//! while preventing segfaults through ValidRegions checks and
5//! atomic system calls for fault tolerance.
6
7use crate::analysis::unsafe_inference::is_valid_ptr;
8use crate::snapshot::types::ActiveAllocation;
9
10/// Maximum bytes to read per allocation. Metadata headers are always
11/// within the first few dozen bytes; 4KB is more than sufficient.
12const MAX_READ_BYTES: usize = 4096;
13
14/// Page size for memory validation checks.
15const PAGE_SIZE: usize = 4096;
16
17/// Result of scanning a single allocation.
18#[derive(Debug)]
19pub struct ScanResult {
20    /// Pointer address of the allocation.
21    pub ptr: usize,
22    /// Original allocation size.
23    pub size: usize,
24    /// Memory content that was successfully read (capped at MAX_READ_BYTES).
25    pub memory: Option<Vec<u8>>,
26}
27
28/// HeapScanner reads heap memory for active allocations during snapshot analysis.
29///
30/// All memory reads are validated through `ValidRegions` to prevent segfaults.
31/// This module only operates during offline analysis and has zero runtime overhead.
32pub struct HeapScanner;
33
34impl HeapScanner {
35    /// Scan a list of active allocations, reading their memory content.
36    ///
37    /// Returns a `ScanResult` for each allocation. Allocations whose pointers
38    /// fall outside valid regions will have `memory: None`.
39    ///
40    /// # Arguments
41    ///
42    /// * `allocations` - List of active allocations to scan.
43    ///
44    /// # Example
45    ///
46    /// ```ignore
47    /// let results = HeapScanner::scan(&allocations);
48    /// for result in results {
49    ///     if let Some(ref memory) = result.memory {
50    ///         let view = MemoryView::new(memory);
51    ///         // ... pass to UTI Engine
52    ///     }
53    /// }
54    /// ```
55    pub fn scan(allocations: &[ActiveAllocation]) -> Vec<ScanResult> {
56        allocations
57            .iter()
58            .map(|alloc| {
59                let memory = safe_read_memory(alloc.ptr, alloc.size);
60                ScanResult {
61                    ptr: alloc.ptr,
62                    size: alloc.size,
63                    memory,
64                }
65            })
66            .collect()
67    }
68}
69
70/// Safely read memory at `ptr` for up to `size` bytes.
71///
72/// Returns `None` if the address is not in valid regions, or if any
73/// page within the read range is unmapped.
74///
75/// On Linux, uses `process_vm_readv` which is an atomic syscall that cannot
76/// be interrupted by signals mid-read, eliminating TOCTOU issues.
77///
78/// On other platforms, falls back to volatile byte-by-byte reads with
79/// pre-validation of all pages before reading begins.
80fn safe_read_memory(ptr: usize, size: usize) -> Option<Vec<u8>> {
81    if size == 0 || ptr == 0 {
82        return None;
83    }
84
85    if !is_valid_ptr(ptr) {
86        return None;
87    }
88
89    let read_size = size.min(MAX_READ_BYTES);
90    if !are_pages_valid(ptr, read_size) {
91        return None;
92    }
93
94    let mut buf = vec![0u8; read_size];
95
96    #[cfg(target_os = "linux")]
97    {
98        if safe_read_linux(ptr, &mut buf) {
99            Some(buf)
100        } else {
101            None
102        }
103    }
104
105    #[cfg(not(target_os = "linux"))]
106    {
107        if read_bytes_volatile(ptr, &mut buf) {
108            Some(buf)
109        } else {
110            None
111        }
112    }
113}
114
115#[cfg(target_os = "linux")]
116mod linux_read {
117    use libc::{iovec, process_vm_readv};
118
119    /// Read memory from the current process using process_vm_readv.
120    ///
121    /// This uses pid=0 which refers to the calling process itself.
122    /// According to Linux man page: "The caller must have the CAP_SYS_PTRACE
123    /// capability, OR the real, effective, and saved-set user ID of the caller
124    /// must match the real user ID of the target process."
125    ///
126    /// For pid=0 (current process), the user IDs always match, so no special
127    /// privileges are required. This works in most environments including
128    /// containers (unless seccomp filters explicitly block process_vm_readv).
129    ///
130    /// The function is named `_local` to clarify it's for self-reading,
131    /// not for reading remote processes.
132    pub fn safe_read_linux_local(
133        remote_ptr: *const libc::c_void,
134        local_ptr: *mut libc::c_void,
135        len: usize,
136    ) -> isize {
137        let local_iov = iovec {
138            iov_base: local_ptr,
139            iov_len: len,
140        };
141        let remote_iov = iovec {
142            iov_base: remote_ptr as *mut libc::c_void,
143            iov_len: len,
144        };
145
146        // pid=0 means the calling process reads its own memory
147        // No CAP_SYS_PTRACE required for self-reading
148        unsafe { process_vm_readv(0, &local_iov, 1, &remote_iov, 1, 0) }
149    }
150}
151
152#[cfg(target_os = "linux")]
153fn safe_read_linux(ptr: usize, buf: &mut [u8]) -> bool {
154    use linux_read::safe_read_linux_local;
155
156    let len = buf.len();
157
158    let result = safe_read_linux_local(
159        ptr as *const libc::c_void,
160        buf.as_mut_ptr() as *mut libc::c_void,
161        len,
162    );
163
164    result == len as isize
165}
166
167#[cfg(not(target_os = "linux"))]
168#[allow(dead_code)] // Stub for non-Linux platforms; used when building on macOS/Windows
169fn safe_read_linux(_ptr: usize, _buf: &mut [u8]) -> bool {
170    false
171}
172
173#[cfg(not(target_os = "linux"))]
174fn read_bytes_volatile(ptr: usize, buf: &mut [u8]) -> bool {
175    // Pre-check: verify the entire range is valid before reading
176    if !are_pages_valid(ptr, buf.len()) {
177        return false;
178    }
179
180    // Use a safer approach: try-catch with signal handling would be ideal,
181    // but Rust doesn't have that. Instead, we rely on pre-validation.
182    // On macOS, direct volatile reads should work if pages are valid.
183    unsafe {
184        let src = ptr as *const u8;
185        for (i, byte) in buf.iter_mut().enumerate() {
186            *byte = std::ptr::read_volatile(src.add(i));
187        }
188    }
189    true
190}
191
192/// Check that every page in [ptr, ptr + size) is in a valid region.
193fn are_pages_valid(ptr: usize, size: usize) -> bool {
194    let page_start = ptr & !(PAGE_SIZE - 1);
195    let page_end = (ptr + size + PAGE_SIZE - 1) & !(PAGE_SIZE - 1);
196
197    let mut p = page_start;
198    while p < page_end {
199        if !is_valid_ptr(p) {
200            return false;
201        }
202        p += PAGE_SIZE;
203    }
204    true
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210
211    #[test]
212    fn test_safe_read_memory_zero_size() {
213        assert!(safe_read_memory(0x10000, 0).is_none());
214    }
215
216    #[test]
217    fn test_safe_read_memory_null_ptr() {
218        assert!(safe_read_memory(0, 100).is_none());
219    }
220
221    #[test]
222    #[cfg(target_os = "macos")]
223    fn test_are_pages_valid_single_page() {
224        assert!(are_pages_valid(0x10000, 100));
225    }
226
227    #[test]
228    #[cfg(target_os = "macos")]
229    fn test_are_pages_valid_cross_page() {
230        // Use a heap-like address that should be valid on all platforms
231        let ptr = 0x10000;
232        let size = 200;
233        assert!(are_pages_valid(ptr, size));
234    }
235
236    #[test]
237    fn test_scan_result_creation() {
238        let result = ScanResult {
239            ptr: 0x1000,
240            size: 64,
241            memory: None,
242        };
243        assert_eq!(result.ptr, 0x1000);
244        assert_eq!(result.size, 64);
245        assert!(result.memory.is_none());
246    }
247
248    #[test]
249    #[cfg(not(target_os = "linux"))]
250    fn test_heap_scanner_scan_real_allocations() {
251        let data1 = vec![42u8; 64];
252        let data2 = vec![99u8; 128];
253        let ptr1 = data1.as_ptr() as usize;
254        let ptr2 = data2.as_ptr() as usize;
255
256        let allocations = vec![
257            ActiveAllocation {
258                ptr: ptr1,
259                size: 64,
260                allocated_at: 1000,
261                var_name: None,
262                type_name: None,
263                thread_id: 0,
264                call_stack_hash: None,
265            },
266            ActiveAllocation {
267                ptr: ptr2,
268                size: 128,
269                allocated_at: 2000,
270                var_name: None,
271                type_name: None,
272                thread_id: 0,
273                call_stack_hash: None,
274            },
275        ];
276
277        let results = HeapScanner::scan(&allocations);
278        assert_eq!(results.len(), 2);
279
280        assert!(results[0].memory.is_some(), "Should read memory at ptr1");
281        assert!(results[1].memory.is_some(), "Should read memory at ptr2");
282
283        drop(data1);
284        drop(data2);
285    }
286
287    #[test]
288    fn test_heap_scanner_scan_empty_allocations() {
289        let allocations: Vec<ActiveAllocation> = vec![];
290        let results = HeapScanner::scan(&allocations);
291        assert!(results.is_empty());
292    }
293
294    #[test]
295    fn test_heap_scanner_scan_zero_size_allocation() {
296        let allocations = vec![ActiveAllocation {
297            ptr: 0x10000,
298            size: 0,
299            allocated_at: 1000,
300            var_name: None,
301            type_name: None,
302            thread_id: 0,
303            call_stack_hash: None,
304        }];
305
306        let results = HeapScanner::scan(&allocations);
307        assert_eq!(results.len(), 1);
308        // Zero-size allocation should return None for memory (nothing to read).
309        assert!(results[0].memory.is_none());
310    }
311
312    #[test]
313    #[cfg(not(target_os = "linux"))]
314    fn test_heap_scanner_content_preserved_after_scan() {
315        let data = vec![0xDE, 0xAD, 0xBE, 0xEF, 0xCA, 0xFE, 0xBA, 0xBE];
316        let ptr = data.as_ptr() as usize;
317        let size = data.len();
318
319        let alloc = ActiveAllocation {
320            ptr,
321            size,
322            allocated_at: 1000,
323            var_name: None,
324            type_name: None,
325            thread_id: 0,
326            call_stack_hash: None,
327        };
328
329        let results = HeapScanner::scan(&[alloc]);
330        assert_eq!(results.len(), 1);
331
332        let mem = results[0]
333            .memory
334            .as_ref()
335            .expect("Should read memory at allocated address");
336        assert_eq!(mem.len(), size, "Should read expected number of bytes");
337
338        drop(data);
339    }
340}