Skip to main content

openentropy_core/sources/io/
nvme_passthrough_linux.rs

1//! NVMe admin passthrough — raw NVMe commands on Linux via ioctl.
2//!
3//! Submits raw NVMe admin commands (Get Log Page for SMART/Health) via
4//! `ioctl(NVME_IOCTL_ADMIN_CMD)` on `/dev/nvme0`. This bypasses the filesystem,
5//! block layer, and I/O scheduler entirely — the timing path is:
6//! userspace → NVMe kernel driver → NVMe controller → NAND.
7//!
8//! ## Entropy mechanism
9//!
10//! - **NVMe command round-trip timing**: Minimal host overhead, dominated by
11//!   controller firmware processing and NAND access
12//! - **SMART temperature ADC noise**: On-die temperature sensor quantization noise
13//! - **Controller internal state**: FTL state, GC scheduling, wear leveling
14//!   all affect command latency nondeterministically
15//!
16//! ## Entropy quality
17//!
18//! This is the closest to NVMe hardware achievable from userspace on Linux.
19//! The filesystem and block layers are completely eliminated. The dominant
20//! timing variance comes from NVMe driver submission/completion overhead (~4us)
21//! and controller firmware processing (FTL, GC). The NAND charge sensing
22//! physics has quantum-mechanical underpinnings, but quantifying the fraction
23//! of timing variance attributable to quantum effects is not possible without
24//! specialized metrology equipment.
25
26use crate::source::{EntropySource, Platform, Requirement, SourceCategory, SourceInfo};
27#[cfg(target_os = "linux")]
28use crate::sources::helpers::extract_timing_entropy;
29
30static NVME_PASSTHROUGH_INFO: SourceInfo = SourceInfo {
31    name: "nvme_passthrough_linux",
32    description: "Raw NVMe admin commands via ioctl passthrough on Linux (closest to NAND hardware)",
33    physics: "Submits NVMe admin commands (Get Log Page for SMART/Health Information, Log ID 02h) \
34              via ioctl(NVME_IOCTL_ADMIN_CMD) on /dev/nvme0. This bypasses the filesystem, block \
35              layer, and I/O scheduler entirely. The timing path is: userspace \u{2192} NVMe kernel \
36              driver \u{2192} NVMe controller \u{2192} NAND flash. Command round-trip timing is \
37              dominated by NVMe controller firmware processing (FTL lookup, wear leveling, garbage \
38              collection scheduling) and NAND flash page access. NAND charge sensing has quantum-\
39              mechanical underpinnings (Fowler-Nordheim tunneling), but the dominant timing variance \
40              is classical (driver overhead, firmware scheduling). SMART temperature values provide \
41              additional ADC quantization noise.",
42    category: SourceCategory::IO,
43    platform: Platform::Linux,
44    requirements: &[Requirement::RawBlockDevice],
45    entropy_rate_estimate: 2.0,
46    composite: false,
47    is_fast: true,
48};
49
50/// NVMe admin passthrough entropy source (Linux only).
51pub struct NvmePassthroughLinuxSource;
52
53/// NVMe passthrough implementation for Linux.
54#[cfg(target_os = "linux")]
55mod passthrough {
56    use std::time::Instant;
57
58    /// NVMe passthrough command struct matching `struct nvme_passthru_cmd`
59    /// from `linux/nvme_ioctl.h`.
60    #[repr(C)]
61    #[derive(Default)]
62    struct NvmePassthruCmd {
63        opcode: u8,
64        flags: u8,
65        rsvd1: u16,
66        nsid: u32,
67        cdw2: u32,
68        cdw3: u32,
69        metadata: u64,
70        addr: u64,
71        metadata_len: u32,
72        data_len: u32,
73        cdw10: u32,
74        cdw11: u32,
75        cdw12: u32,
76        cdw13: u32,
77        cdw14: u32,
78        cdw15: u32,
79        timeout_ms: u32,
80        result: u32,
81    }
82
83    // Compile-time check: NvmePassthruCmd must match the kernel's struct layout (72 bytes).
84    const _: () = assert!(std::mem::size_of::<NvmePassthruCmd>() == 72);
85
86    /// NVME_IOCTL_ADMIN_CMD = _IOWR('N', 0x41, struct nvme_passthru_cmd)
87    /// On Linux: direction = _IOWR = 0xC0000000, size = sizeof(nvme_passthru_cmd) = 72 = 0x48
88    /// type = 'N' = 0x4E, nr = 0x41
89    /// ioctl number = 0xC0484E41
90    const NVME_IOCTL_ADMIN_CMD: libc::c_ulong = 0xC048_4E41;
91
92    /// NVMe Admin command opcode: Get Log Page
93    const NVME_ADMIN_GET_LOG_PAGE: u8 = 0x02;
94    /// SMART / Health Information log (Log ID 02h)
95    const NVME_LOG_SMART: u32 = 0x02;
96    /// Size of SMART/Health Information log page
97    const SMART_LOG_SIZE: u32 = 512;
98
99    /// Try to open the NVMe character device.
100    pub fn try_open_nvme() -> Option<i32> {
101        let devices = ["/dev/nvme0", "/dev/nvme1", "/dev/nvme0n1"];
102        for dev in &devices {
103            let c_path = match std::ffi::CString::new(*dev) {
104                Ok(s) => s,
105                Err(_) => continue,
106            };
107            // SAFETY: open() with O_RDONLY on the NVMe character device.
108            // Requires CAP_SYS_ADMIN or being in the nvme/disk group.
109            let fd = unsafe { libc::open(c_path.as_ptr(), libc::O_RDONLY) };
110            if fd >= 0 {
111                return Some(fd);
112            }
113        }
114        None
115    }
116
117    /// Check if NVMe passthrough is available.
118    pub fn has_nvme_passthrough() -> bool {
119        if let Some(fd) = try_open_nvme() {
120            // SAFETY: close() on a valid fd.
121            unsafe { libc::close(fd) };
122            true
123        } else {
124            false
125        }
126    }
127
128    /// Submit a Get Log Page (SMART/Health) command and return the timing.
129    /// Returns (timing_nanos, smart_temperature) or None on failure.
130    fn submit_smart_log_page(fd: i32) -> Option<(u64, u16)> {
131        let mut log_buf = [0u8; SMART_LOG_SIZE as usize];
132
133        // Number of dwords to return (0-based): (512/4 - 1) = 127
134        let numd = (SMART_LOG_SIZE / 4) - 1;
135
136        let mut cmd = NvmePassthruCmd {
137            opcode: NVME_ADMIN_GET_LOG_PAGE,
138            nsid: 0xFFFF_FFFF, // Global log page
139            addr: log_buf.as_mut_ptr() as u64,
140            data_len: SMART_LOG_SIZE,
141            cdw10: (numd << 16) | NVME_LOG_SMART, // NUMDL[15:0] | LID
142            timeout_ms: 1000,
143            ..Default::default()
144        };
145
146        let t_before = Instant::now();
147
148        // SAFETY: ioctl with NVME_IOCTL_ADMIN_CMD on a valid NVMe character device fd.
149        // The cmd struct matches the kernel's expected layout. The log_buf is stack-allocated
150        // and large enough for the SMART log page (512 bytes).
151        let ret =
152            unsafe { libc::ioctl(fd, NVME_IOCTL_ADMIN_CMD, &mut cmd as *mut NvmePassthruCmd) };
153
154        let elapsed_nanos = t_before.elapsed().as_nanos() as u64;
155
156        if ret < 0 {
157            return None;
158        }
159
160        // Extract composite temperature from SMART log (bytes 1-2, Kelvin).
161        let temp_kelvin = u16::from_le_bytes([log_buf[1], log_buf[2]]);
162
163        Some((elapsed_nanos, temp_kelvin))
164    }
165
166    /// Perform multiple SMART log page reads and return timings and temperatures.
167    pub fn timed_smart_reads(fd: i32, count: usize) -> (Vec<u64>, Vec<u16>) {
168        let mut timings = Vec::with_capacity(count);
169        let mut temps = Vec::with_capacity(count);
170
171        for _ in 0..count {
172            match submit_smart_log_page(fd) {
173                Some((timing, temp)) => {
174                    timings.push(timing);
175                    temps.push(temp);
176                }
177                None => {
178                    // Command failed, skip this sample.
179                }
180            }
181        }
182
183        (timings, temps)
184    }
185}
186
187impl EntropySource for NvmePassthroughLinuxSource {
188    fn info(&self) -> &SourceInfo {
189        &NVME_PASSTHROUGH_INFO
190    }
191
192    fn is_available(&self) -> bool {
193        #[cfg(target_os = "linux")]
194        {
195            passthrough::has_nvme_passthrough()
196        }
197        #[cfg(not(target_os = "linux"))]
198        {
199            false
200        }
201    }
202
203    fn collect(&self, n_samples: usize) -> Vec<u8> {
204        #[cfg(not(target_os = "linux"))]
205        {
206            let _ = n_samples;
207            Vec::new()
208        }
209
210        #[cfg(target_os = "linux")]
211        {
212            use crate::sources::helpers::xor_fold_u64;
213
214            let fd = match passthrough::try_open_nvme() {
215                Some(fd) => fd,
216                None => return Vec::new(),
217            };
218
219            // Over-sample for the extraction pipeline.
220            let raw_count = n_samples * 4 + 64;
221            let (timings, temps) = passthrough::timed_smart_reads(fd, raw_count);
222
223            // SAFETY: close() on a valid fd.
224            unsafe { libc::close(fd) };
225
226            if timings.len() < 4 {
227                return Vec::new();
228            }
229
230            // Primary entropy: command round-trip timing.
231            let timing_bytes = extract_timing_entropy(&timings, n_samples);
232
233            // Secondary entropy: temperature ADC LSB noise.
234            let temp_deltas: Vec<u64> = temps
235                .windows(2)
236                .map(|w| (w[1] as u64).wrapping_sub(w[0] as u64))
237                .collect();
238            let temp_xored: Vec<u64> = temp_deltas.windows(2).map(|w| w[0] ^ w[1]).collect();
239            let temp_bytes: Vec<u8> = temp_xored
240                .iter()
241                .map(|&x| xor_fold_u64(x))
242                .take(n_samples)
243                .collect();
244
245            // XOR both streams together.
246            let mut output = Vec::with_capacity(n_samples);
247            for i in 0..timing_bytes.len().max(temp_bytes.len()).min(n_samples) {
248                let tb = timing_bytes.get(i).copied().unwrap_or(0);
249                let tempb = temp_bytes.get(i).copied().unwrap_or(0);
250                output.push(tb ^ tempb);
251            }
252            output.truncate(n_samples);
253            output
254        }
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    #[test]
263    fn info() {
264        let src = NvmePassthroughLinuxSource;
265        assert_eq!(src.name(), "nvme_passthrough_linux");
266        assert_eq!(src.info().category, SourceCategory::IO);
267        assert_eq!(src.info().platform, Platform::Linux);
268        assert!(!src.info().composite);
269    }
270
271    #[test]
272    fn physics_mentions_ioctl() {
273        let src = NvmePassthroughLinuxSource;
274        assert!(src.info().physics.contains("ioctl"));
275        assert!(src.info().physics.contains("SMART"));
276        assert!(src.info().physics.contains("Fowler-Nordheim"));
277    }
278
279    #[test]
280    fn not_available_on_non_linux() {
281        let src = NvmePassthroughLinuxSource;
282        #[cfg(not(target_os = "linux"))]
283        assert!(!src.is_available());
284        #[cfg(target_os = "linux")]
285        let _ = src; // availability depends on /dev/nvme0 access
286    }
287
288    #[test]
289    #[ignore] // Requires Linux with /dev/nvme0 access (CAP_SYS_ADMIN)
290    fn collects_bytes() {
291        let src = NvmePassthroughLinuxSource;
292        if src.is_available() {
293            let data = src.collect(64);
294            assert!(!data.is_empty());
295            assert!(data.len() <= 64);
296        }
297    }
298}