Skip to main content

openentropy_core/sources/io/
nvme_raw_device.rs

1//! NVMe raw block device reads — bypass filesystem for closer NAND timing.
2//!
3//! Reads directly from `/dev/rdiskN` (macOS) or `/dev/nvmeXnYpZ` (Linux) using
4//! `libc` FFI with page-aligned buffers and `F_NOCACHE` (macOS) / `O_DIRECT`
5//! (Linux) to bypass the OS buffer cache. This removes the filesystem and buffer
6//! cache layers from the timing path, getting closer to NVMe controller + NAND
7//! flash timing.
8//!
9//! ## Entropy mechanism
10//!
11//! - **NVMe controller timing**: Command submission, FTL lookup, wear leveling
12//! - **NAND page read latency**: Charge sensing through quantum tunneling
13//!   (Fowler-Nordheim for writes, threshold voltage sensing for reads)
14//! - **Cross-die variation**: Reads at widely-spaced offsets (1 MB apart) hit
15//!   different NAND dies/planes with independent timing characteristics
16//!
17//! ## Entropy quality
18//!
19//! With the filesystem layer removed, a larger fraction of the measured timing
20//! comes from NVMe controller firmware (FTL, GC, wear leveling) and NAND access
21//! latency. The NAND charge sensing physics has quantum-mechanical underpinnings
22//! (electron tunneling), but the dominant variance is classical firmware timing.
23//! There is also no guarantee that reads are served from NAND rather than the
24//! controller's DRAM cache.
25
26use std::sync::OnceLock;
27
28use crate::source::{EntropySource, Platform, Requirement, SourceCategory, SourceInfo};
29use crate::sources::helpers::extract_timing_entropy;
30
31static NVME_RAW_DEVICE_INFO: SourceInfo = SourceInfo {
32    name: "nvme_raw_device",
33    description: "Direct raw block device reads bypassing filesystem with page-aligned I/O",
34    physics: "Reads directly from raw block devices (/dev/rdiskN on macOS, /dev/nvmeXnYpZ on \
35              Linux) with page-aligned buffers and cache bypass (F_NOCACHE/O_DIRECT). This \
36              eliminates the filesystem, buffer cache, and VFS layers from the timing path. \
37              The remaining timing variance comes from NVMe controller firmware (FTL lookup, \
38              wear leveling, garbage collection) and NAND flash page read latency. NAND reads \
39              involve charge sensing where threshold voltage depends on trapped electron count \
40              from Fowler-Nordheim tunneling. Note: the dominant timing variance is classical \
41              (firmware scheduling, DRAM cache hits) — the quantum-mechanical contribution \
42              from charge sensing cannot be isolated without specialized metrology equipment.",
43    category: SourceCategory::IO,
44    platform: Platform::Any,
45    requirements: &[Requirement::RawBlockDevice],
46    entropy_rate_estimate: 2.0,
47    composite: false,
48    is_fast: false,
49};
50
51/// Number of widely-spaced offsets to cycle through (hit different NAND dies).
52const N_OFFSETS: usize = 8;
53/// Block size for aligned reads.
54const BLOCK_SIZE: usize = 4096;
55/// Spacing between offsets to hit different NAND dies/planes.
56const OFFSET_STRIDE: u64 = 1024 * 1024; // 1 MB
57
58/// NVMe raw block device entropy source.
59pub struct NvmeRawDeviceSource;
60
61/// Try to find and open a readable raw block device.
62/// Returns the fd on success.
63fn try_open_raw_device() -> Option<i32> {
64    #[cfg(target_os = "macos")]
65    {
66        let devices = ["/dev/rdisk0", "/dev/rdisk1", "/dev/rdisk2"];
67        for dev in &devices {
68            let c_path = match std::ffi::CString::new(*dev) {
69                Ok(s) => s,
70                Err(_) => continue,
71            };
72            // SAFETY: open() with O_RDONLY on a device path. May fail with EACCES
73            // if not root, which is handled by checking the return value.
74            let fd = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY) };
75            if fd >= 0 {
76                // Disable buffer cache (macOS-specific).
77                // SAFETY: fcntl F_NOCACHE is a valid operation on an open fd.
78                unsafe { libc::fcntl(fd, libc::F_NOCACHE, 1) };
79                return Some(fd);
80            }
81        }
82    }
83
84    #[cfg(target_os = "linux")]
85    {
86        let devices = ["/dev/nvme0n1", "/dev/nvme1n1", "/dev/sda", "/dev/sdb"];
87        for dev in &devices {
88            let c_path = match std::ffi::CString::new(*dev) {
89                Ok(s) => s,
90                Err(_) => continue,
91            };
92            // SAFETY: open() with O_RDONLY | O_DIRECT on a device path.
93            // O_DIRECT bypasses the page cache on Linux.
94            let fd = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY | libc::O_DIRECT) };
95            if fd >= 0 {
96                return Some(fd);
97            }
98        }
99    }
100
101    None
102}
103
104/// Check if raw block device access is available (without keeping the fd).
105fn can_open_raw_device() -> bool {
106    if let Some(fd) = try_open_raw_device() {
107        // SAFETY: close() on a valid fd.
108        unsafe { libc::close(fd) };
109        true
110    } else {
111        false
112    }
113}
114
115/// Perform timed reads on a raw block device fd.
116/// Returns a vec of timing values (CNTVCT ticks on macOS, nanos on Linux).
117fn timed_raw_reads(fd: i32, count: usize) -> Vec<u64> {
118    use crate::sources::helpers::read_cntvct;
119
120    // Allocate a page-aligned buffer.
121    let mut aligned_buf: *mut libc::c_void = std::ptr::null_mut();
122    // SAFETY: posix_memalign allocates an aligned buffer. We check the return value.
123    let ret = unsafe { libc::posix_memalign(&mut aligned_buf, BLOCK_SIZE, BLOCK_SIZE) };
124    if ret != 0 || aligned_buf.is_null() {
125        return Vec::new();
126    }
127
128    // Pre-compute offsets (widely spaced to hit different NAND dies/planes).
129    let offsets: Vec<i64> = (0..N_OFFSETS)
130        .map(|i| (i as u64 * OFFSET_STRIDE) as i64)
131        .collect();
132
133    let mut timings = Vec::with_capacity(count);
134
135    for i in 0..count {
136        let offset = offsets[i % N_OFFSETS];
137
138        // Seek to the target offset.
139        // SAFETY: lseek on a valid fd with a valid offset.
140        let seek_result = unsafe { libc::lseek(fd, offset, libc::SEEK_SET) };
141        if seek_result < 0 {
142            // If seek fails (offset beyond device), wrap around to offset 0.
143            unsafe { libc::lseek(fd, 0, libc::SEEK_SET) };
144        }
145
146        let t_before = read_cntvct();
147
148        // SAFETY: read() into a valid aligned buffer of BLOCK_SIZE.
149        let _bytes_read = unsafe { libc::read(fd, aligned_buf, BLOCK_SIZE) };
150
151        let t_after = read_cntvct();
152        timings.push(t_after.wrapping_sub(t_before));
153    }
154
155    // SAFETY: free() on a buffer allocated by posix_memalign.
156    unsafe { libc::free(aligned_buf) };
157
158    timings
159}
160
161impl EntropySource for NvmeRawDeviceSource {
162    fn info(&self) -> &SourceInfo {
163        &NVME_RAW_DEVICE_INFO
164    }
165
166    fn is_available(&self) -> bool {
167        static RAW_DEVICE_AVAILABLE: OnceLock<bool> = OnceLock::new();
168        *RAW_DEVICE_AVAILABLE.get_or_init(can_open_raw_device)
169    }
170
171    fn collect(&self, n_samples: usize) -> Vec<u8> {
172        let fd = match try_open_raw_device() {
173            Some(fd) => fd,
174            None => return Vec::new(),
175        };
176
177        // Over-sample: ~4x raw readings per output byte.
178        let raw_count = n_samples * 4 + 64;
179        let timings = timed_raw_reads(fd, raw_count);
180
181        // SAFETY: close() on a valid fd.
182        unsafe { libc::close(fd) };
183
184        extract_timing_entropy(&timings, n_samples)
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191
192    #[test]
193    fn info() {
194        let src = NvmeRawDeviceSource;
195        assert_eq!(src.name(), "nvme_raw_device");
196        assert_eq!(src.info().category, SourceCategory::IO);
197        assert!(!src.info().composite);
198        assert_eq!(src.info().platform, Platform::Any);
199    }
200
201    #[test]
202    fn physics_mentions_nand() {
203        let src = NvmeRawDeviceSource;
204        assert!(src.info().physics.contains("NAND"));
205        assert!(src.info().physics.contains("Fowler-Nordheim"));
206        assert!(src.info().physics.contains("raw block"));
207    }
208
209    #[test]
210    #[ignore] // Requires root or disk group membership for raw device access
211    fn collects_bytes() {
212        let src = NvmeRawDeviceSource;
213        if src.is_available() {
214            let data = src.collect(64);
215            assert!(!data.is_empty());
216            assert!(data.len() <= 64);
217        }
218    }
219}