supermachine 0.7.70

Run any OCI/Docker image as a hardware-isolated microVM on macOS HVF (Linux KVM and Windows WHP in progress). Single library API, zero flags for the common case, sub-100 ms cold-restore from snapshot.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
// Read-only and read-write block device backed by an mmap'd file.
// RO mode is used for OCI image layers (squashfs); RW is used for
// user-supplied volumes (`--volume HOST:GUEST`). Supports
// VIRTIO_BLK_T_IN (read), VIRTIO_BLK_T_OUT (write, RW only),
// VIRTIO_BLK_T_FLUSH (msync, RW only), and VIRTIO_BLK_T_GET_ID.

use std::fs::{File, OpenOptions};
use std::os::unix::io::AsRawFd;
use std::sync::{Arc, Mutex};

use super::queue::Queue;
use super::{VirtioDevice, VIRTIO_ID_BLOCK};

const VIRTIO_BLK_T_IN: u32 = 0;
const VIRTIO_BLK_T_OUT: u32 = 1;
const VIRTIO_BLK_T_FLUSH: u32 = 4;
const VIRTIO_BLK_T_GET_ID: u32 = 8;

const VIRTIO_BLK_S_OK: u8 = 0;
const VIRTIO_BLK_S_IOERR: u8 = 1;
const VIRTIO_BLK_S_UNSUPP: u8 = 2;

/// virtio-blk feature bits.
const VIRTIO_BLK_F_SIZE_MAX: u64 = 1 << 1;
const VIRTIO_BLK_F_SEG_MAX: u64 = 1 << 2;
const VIRTIO_BLK_F_RO: u64 = 1 << 5;
const VIRTIO_BLK_F_BLK_SIZE: u64 = 1 << 6;
/// FLUSH support — set this bit on a writable device so the Linux
/// guest issues `VIRTIO_BLK_T_FLUSH` on `sync`/`fsync` (which the
/// host translates to msync + F_FULLFSYNC). Without it, the guest
/// driver assumes the device has no flush primitive and skips the
/// barrier entirely: `sync(2)` returns from the guest with the
/// ack'd journal commit still buffered in the worker's mmap'd
/// region, and a SIGKILL of the worker before the macOS UBC flush
/// loses the metadata. Next mount surfaces `Bad message`
/// (EBADMSG) on the data blocks the on-disk file actually
/// contains. (Field-report bug fixed 0.7.30.)
const VIRTIO_BLK_F_FLUSH: u64 = 1 << 9;
const VIRTIO_F_VERSION_1: u64 = 1 << 32;

const SECTOR_SIZE: u64 = 512;

pub struct VirtioBlk {
    name: String,
    /// mmap pointer to the backing file. PROT_READ in RO mode,
    /// PROT_READ|PROT_WRITE + MAP_SHARED in RW mode.
    backing_ptr: *mut u8,
    backing_len: usize,
    /// True for `open_rw`; controls whether `VIRTIO_BLK_T_OUT` /
    /// `VIRTIO_BLK_T_FLUSH` are honored and whether the
    /// `VIRTIO_BLK_F_RO` feature bit is advertised.
    writable: bool,
    queues: Mutex<Vec<Queue>>,
    activated: std::sync::atomic::AtomicBool,
    irq_raise: Mutex<Option<Arc<dyn Fn() + Send + Sync>>>,
}

unsafe impl Send for VirtioBlk {}
unsafe impl Sync for VirtioBlk {}

impl VirtioBlk {
    /// Mount `path` as a read-only block device (mmap MAP_PRIVATE +
    /// PROT_READ; OS handles paging, we never write).
    pub fn open_ro(name: &str, path: &str) -> std::io::Result<Self> {
        let f = File::open(path)?;
        let len = f.metadata()?.len() as usize;
        // SAFETY: standard mmap call.
        let p = unsafe {
            libc::mmap(
                std::ptr::null_mut(),
                len,
                libc::PROT_READ,
                libc::MAP_PRIVATE,
                f.as_raw_fd(),
                0,
            )
        };
        if p == libc::MAP_FAILED {
            return Err(std::io::Error::last_os_error());
        }
        // Hint kernel: we'll read sequentially.
        unsafe {
            libc::madvise(p, len, libc::MADV_SEQUENTIAL);
        }
        eprintln!("[virtio-blk:{name}] mmap ro {} bytes from {path}", len);
        // The mmap keeps the underlying inode reference alive
        // after `f` is dropped; we don't need the fd for any
        // fsync path. Match the pre-flush-handler-change lifecycle
        // exactly.
        drop(f);
        Ok(Self {
            name: name.to_string(),
            backing_ptr: p as *mut u8,
            backing_len: len,
            writable: false,
            queues: Mutex::new(Vec::new()),
            activated: std::sync::atomic::AtomicBool::new(false),
            irq_raise: Mutex::new(None),
        })
    }

    /// Mount `path` as a read-write block device (mmap MAP_SHARED +
    /// PROT_READ|PROT_WRITE). Used for `--volume` persistent
    /// volumes — the host file is the canonical store; guest writes
    /// land directly in it.
    ///
    /// The file size is fixed at open time (`size_bytes`). If the
    /// file is smaller than `size_bytes`, it's grown via `truncate`
    /// before mapping. Subsequent runs reuse the same file at the
    /// same size; growing or shrinking would invalidate any
    /// filesystem the guest formatted.
    pub fn open_rw(name: &str, path: &str, size_bytes: u64) -> std::io::Result<Self> {
        let f = OpenOptions::new()
            .read(true)
            .write(true)
            .create(true)
            // MUST NOT truncate: this is the persistent volume backing
            // file; truncating would destroy the guest's formatted
            // filesystem on every open. set_len below right-sizes it.
            .truncate(false)
            .open(path)?;
        let cur_len = f.metadata()?.len();
        if cur_len < size_bytes {
            f.set_len(size_bytes)?;
        }
        let len = f.metadata()?.len() as usize;
        // SAFETY: mmap with shared mapping for the file's length.
        let p = unsafe {
            libc::mmap(
                std::ptr::null_mut(),
                len,
                libc::PROT_READ | libc::PROT_WRITE,
                libc::MAP_SHARED,
                f.as_raw_fd(),
                0,
            )
        };
        if p == libc::MAP_FAILED {
            return Err(std::io::Error::last_os_error());
        }
        // Random access pattern: filesystems poke all over.
        unsafe {
            libc::madvise(p, len, libc::MADV_RANDOM);
        }
        eprintln!("[virtio-blk:{name}] mmap rw {} bytes from {path}", len);
        // mmap keeps the inode reference alive; the fd isn't
        // needed past this point (FLUSH does msync, not fsync).
        drop(f);
        Ok(Self {
            name: name.to_string(),
            backing_ptr: p as *mut u8,
            backing_len: len,
            writable: true,
            queues: Mutex::new(Vec::new()),
            activated: std::sync::atomic::AtomicBool::new(false),
            irq_raise: Mutex::new(None),
        })
    }

    pub fn set_irq_raise(&self, f: Arc<dyn Fn() + Send + Sync>) {
        *self.irq_raise.lock().unwrap() = Some(f);
    }

    fn drain_q(&self) {
        let mut qs = self.queues.lock().unwrap();
        let q = match qs.get_mut(0) {
            Some(q) => q,
            None => return,
        };
        if !q.ready {
            return;
        }
        let mut any_used = false;
        while let Some((head, chain)) = q.pop_chain() {
            // Request layout (virtio-blk):
            //   desc[0] (read-only): struct virtio_blk_req {
            //     u32 type, u32 reserved, u64 sector }  — 16 bytes
            //   desc[1..n-1]: data buffers
            //   desc[n-1]  (write-only): u8 status
            if chain.len() < 2 {
                q.add_used(head, 0);
                any_used = true;
                continue;
            }
            let hdr = chain[0];
            let status_desc = chain[chain.len() - 1];
            // Read header.
            let req_type = q.mem.read_u32(hdr.addr);
            let _reserved = q.mem.read_u32(hdr.addr + 4);
            let sector = q.mem.read_u64(hdr.addr + 8);

            let mut status = VIRTIO_BLK_S_OK;
            let mut bytes_written: u32 = 1; // status byte
            match req_type {
                VIRTIO_BLK_T_IN => {
                    // Copy from backing[sector*512..] into each data
                    // descriptor in the middle of the chain. `sector` and
                    // the descriptor lengths are guest-controlled, so the
                    // running byte offset is tracked in u64 with CHECKED
                    // arithmetic. A crafted sector (e.g. near u64::MAX)
                    // would overflow `sector * 512`; the prior `as usize`
                    // cast then wrapped to a small value that slipped past
                    // the `off + want > backing_len` bounds check while
                    // `backing_ptr.add(off)` still used the real, huge
                    // offset — a guest→host OOB read. Any overflow or
                    // out-of-range span now fails the request with IOERR.
                    let mut off = sector.checked_mul(SECTOR_SIZE);
                    for d in &chain[1..chain.len() - 1] {
                        let want = d.len as u64;
                        let Some(start) = off else {
                            status = VIRTIO_BLK_S_IOERR;
                            break;
                        };
                        let Some(end) = start
                            .checked_add(want)
                            .filter(|e| *e <= self.backing_len as u64)
                        else {
                            status = VIRTIO_BLK_S_IOERR;
                            break;
                        };
                        // SAFETY: backing_ptr is mmap'd for backing_len
                        // bytes; `end <= backing_len` proves [start, end)
                        // is in bounds, so start/want fit in usize.
                        unsafe {
                            let src = self.backing_ptr.add(start as usize);
                            let slice = std::slice::from_raw_parts(src, want as usize);
                            q.mem.write_slice(d.addr, slice);
                        }
                        bytes_written = bytes_written.saturating_add(want as u32);
                        off = Some(end);
                    }
                }
                VIRTIO_BLK_T_FLUSH => {
                    // RO: no-op. RW: `msync(MS_SYNC)` pushes dirty
                    // mmap pages from the worker's address space
                    // into the macOS unified buffer cache. The UBC
                    // is process-independent and survives clean
                    // process exit, so this is sufficient for the
                    // ext4-journal-correctness story under normal
                    // shutdown.
                    //
                    // Why not F_FULLFSYNC: we tried that in an
                    // early 0.7.30 cut to ALSO cover hard SIGKILL
                    // of the worker (durability past UBC into the
                    // device). It's semantically correct but
                    // PROHIBITIVELY slow under write-heavy
                    // workloads — Apple's F_FULLFSYNC blocks until
                    // the SSD acks every outstanding write, and an
                    // `npm install` of 2k packages issues 10k+
                    // FLUSHes. Field-report: 30× slower bake
                    // (~5 s → ~150 s) PLUS in-guest stat() hangs
                    // post-bake when a relatime update queues
                    // behind in-flight F_FULLFSYNCs — host hits
                    // exec timeout, SIGKILLs the exec → `exit=137`
                    // with empty stdout/stderr → integrator's
                    // script reports `mount failed:`. F_FULLFSYNC
                    // removed in 0.7.31. SIGKILL durability is now
                    // best-effort; workloads that genuinely need
                    // it should run a guest-side `sync` then a
                    // graceful pool shutdown (which lets the
                    // worker exit cleanly, and macOS flushes the
                    // UBC to the device).
                    if self.writable {
                        unsafe {
                            libc::msync(
                                self.backing_ptr as *mut libc::c_void,
                                self.backing_len,
                                libc::MS_SYNC,
                            );
                        }
                    }
                }
                VIRTIO_BLK_T_GET_ID => {
                    let id = format!("{:>20}", self.name);
                    let bytes = id.as_bytes();
                    if let Some(d) = chain.get(1) {
                        let take = (d.len as usize).min(bytes.len());
                        q.mem.write_slice(d.addr, &bytes[..take]);
                        bytes_written += take as u32;
                    }
                }
                VIRTIO_BLK_T_OUT => {
                    if !self.writable {
                        status = VIRTIO_BLK_S_UNSUPP;
                    } else {
                        // Copy each data desc into backing[sector*512..].
                        // Same guest-controlled overflow hazard as T_IN
                        // above (here an OOB *write* into host memory):
                        // track the offset in u64 with checked math and
                        // reject any overflow / out-of-range span.
                        let mut off = sector.checked_mul(SECTOR_SIZE);
                        for d in &chain[1..chain.len() - 1] {
                            let n = d.len as u64;
                            let Some(start) = off else {
                                status = VIRTIO_BLK_S_IOERR;
                                break;
                            };
                            let Some(end) = start
                                .checked_add(n)
                                .filter(|e| *e <= self.backing_len as u64)
                            else {
                                status = VIRTIO_BLK_S_IOERR;
                                break;
                            };
                            // SAFETY: backing_ptr is mmap'd RW for
                            // backing_len bytes; [start, end) bounds-checked.
                            let mut tmp = vec![0u8; n as usize];
                            q.mem.read_slice(d.addr, &mut tmp);
                            unsafe {
                                let dst = self.backing_ptr.add(start as usize);
                                std::ptr::copy_nonoverlapping(tmp.as_ptr(), dst, n as usize);
                            }
                            off = Some(end);
                        }
                    }
                }
                _ => {
                    status = VIRTIO_BLK_S_UNSUPP;
                }
            }
            // Write status byte.
            q.mem.write_slice(status_desc.addr, &[status]);
            q.add_used(head, bytes_written);
            any_used = true;
        }
        if any_used {
            let f_opt = self.irq_raise.lock().unwrap().clone();
            drop(qs);
            if let Some(f) = f_opt {
                f();
            }
        }
    }
}

impl VirtioDevice for VirtioBlk {
    fn device_id(&self) -> u32 {
        VIRTIO_ID_BLOCK
    }
    fn num_queues(&self) -> usize {
        1
    }
    fn config(&self) -> Vec<u8> {
        // We only emit the first 2 fields (capacity u64) — Linux's
        // virtio-blk driver tolerates a short config space.
        let nsectors = (self.backing_len as u64) / SECTOR_SIZE;
        nsectors.to_le_bytes().to_vec()
    }
    fn features(&self) -> u64 {
        let mut f = VIRTIO_F_VERSION_1;
        if self.writable {
            // FLUSH only meaningful on a writable device — RO
            // mappings have no dirty pages and the host's
            // VIRTIO_BLK_T_FLUSH handler short-circuits when
            // `!writable`. Advertising it on RO would just cost a
            // no-op virtio round-trip per sync.
            f |= VIRTIO_BLK_F_FLUSH;
        } else {
            f |= VIRTIO_BLK_F_RO;
        }
        f
    }
    fn notify(&self, _q: u16) {
        self.drain_q();
    }
    fn activate(&self, queues: Vec<Queue>) {
        *self.queues.lock().unwrap() = queues;
        self.activated
            .store(true, std::sync::atomic::Ordering::Release);
        eprintln!(
            "[virtio-blk:{}] activated, {} sectors",
            self.name,
            self.backing_len as u64 / SECTOR_SIZE
        );
    }
    fn snapshot_queues(&self) -> Vec<Queue> {
        self.queues.lock().unwrap().clone()
    }
}

#[cfg(test)]
mod tests {
    //! Drives the block device over a real virtio descriptor chain in
    //! GuestMem — the path a guest actually hits. The headline cases are
    //! the guest-controlled `sector` overflow: a crafted sector must NOT
    //! drive `backing_ptr.add()` out of bounds (host OOB read/write) but
    //! fail cleanly with VIRTIO_BLK_S_IOERR.
    use super::*;
    use crate::devices::virtio::queue::{GuestMem, VRING_DESC_F_NEXT, VRING_DESC_F_WRITE};
    use std::io::Write;

    const BASE: u64 = 0x10_0000;
    const WIN: usize = 256 * 1024;
    const O_DESC: u64 = 0x0000;
    const O_AVAIL: u64 = 0x0800;
    const O_USED: u64 = 0x1000;
    const O_HDR: u64 = 0x2000;
    const O_DATA: u64 = 0x3000;
    const O_STATUS: u64 = 0x4000;

    fn temp_path(tag: &str) -> std::path::PathBuf {
        let nanos = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        std::env::temp_dir().join(format!("sm-blk-{tag}-{}-{nanos}.img", std::process::id()))
    }

    fn make_rw(size: u64) -> (VirtioBlk, std::path::PathBuf) {
        let path = temp_path("rw");
        File::create(&path).unwrap(); // empty; open_rw set_len-grows it
        let dev = VirtioBlk::open_rw("testvol", path.to_str().unwrap(), size).unwrap();
        (dev, path)
    }

    fn make_ro(size: u64) -> (VirtioBlk, std::path::PathBuf) {
        let path = temp_path("ro");
        let mut f = File::create(&path).unwrap();
        f.write_all(&vec![0u8; size as usize]).unwrap();
        drop(f);
        let dev = VirtioBlk::open_ro("testro", path.to_str().unwrap()).unwrap();
        (dev, path)
    }

    struct Resp {
        status: u8,
        data: Vec<u8>,
    }

    /// Issue one request through the full device path. `data` is the data
    /// descriptor's contents (for T_OUT, the bytes the guest sends; for
    /// T_IN, a zeroed receive buffer whose length is the read size).
    fn run(dev: &VirtioBlk, req_type: u32, sector: u64, data: &[u8], data_writable: bool) -> Resp {
        let mut backing = vec![0u8; WIN];
        let mem = GuestMem::new(backing.as_mut_ptr(), BASE, WIN);

        // virtio_blk_req header: type(u32) reserved(u32) sector(u64).
        mem.write_u32(BASE + O_HDR, req_type);
        mem.write_u32(BASE + O_HDR + 4, 0);
        mem.write_u64(BASE + O_HDR + 8, sector);
        mem.write_slice(BASE + O_DATA, data);

        let d = |i: u64| BASE + O_DESC + i * 16;
        // desc[0] header (RO) → desc[1].
        mem.write_u64(d(0), BASE + O_HDR);
        mem.write_u32(d(0) + 8, 16);
        mem.write_u16(d(0) + 12, VRING_DESC_F_NEXT);
        mem.write_u16(d(0) + 14, 1);
        // desc[1] data → desc[2]. Writable for reads (device → guest).
        let data_flags = VRING_DESC_F_NEXT | if data_writable { VRING_DESC_F_WRITE } else { 0 };
        mem.write_u64(d(1), BASE + O_DATA);
        mem.write_u32(d(1) + 8, data.len() as u32);
        mem.write_u16(d(1) + 12, data_flags);
        mem.write_u16(d(1) + 14, 2);
        // desc[2] status byte (WO), end of chain.
        mem.write_u64(d(2), BASE + O_STATUS);
        mem.write_u32(d(2) + 8, 1);
        mem.write_u16(d(2) + 12, VRING_DESC_F_WRITE);
        mem.write_u16(d(2) + 14, 0);
        // avail: ring[0] = head 0; idx = 1.
        mem.write_u16(BASE + O_AVAIL + 4, 0);
        mem.write_u16(BASE + O_AVAIL + 2, 1);

        let mut q = Queue::new(mem.clone());
        q.size = 8;
        q.ready = true;
        q.desc_table = BASE + O_DESC;
        q.avail_ring = BASE + O_AVAIL;
        q.used_ring = BASE + O_USED;

        dev.activate(vec![q]);
        dev.notify(0);

        let mut sb = [0u8; 1];
        mem.read_slice(BASE + O_STATUS, &mut sb);
        let mut out = vec![0u8; data.len()];
        mem.read_slice(BASE + O_DATA, &mut out);
        Resp {
            status: sb[0],
            data: out,
        }
    }

    #[test]
    fn write_then_read_round_trips() {
        let (dev, path) = make_rw(64 * 1024);
        let mut payload = b"SUPERMACHINE-BLK-ROUNDTRIP".to_vec();
        payload.resize(512, 0);

        let w = run(&dev, VIRTIO_BLK_T_OUT, 1, &payload, false);
        assert_eq!(w.status, VIRTIO_BLK_S_OK, "write should succeed");

        let r = run(&dev, VIRTIO_BLK_T_IN, 1, &vec![0u8; 512], true);
        assert_eq!(r.status, VIRTIO_BLK_S_OK, "read should succeed");
        assert_eq!(r.data, payload, "read-back must match written bytes");

        std::fs::remove_file(path).ok();
    }

    #[test]
    fn huge_sector_read_is_ioerr_not_oob() {
        // sector * 512 overflows u64. Before the fix this either panicked
        // (debug overflow) or wrapped small and drove backing_ptr.add()
        // far out of bounds (host OOB read). Must be a clean IOERR.
        let (dev, path) = make_rw(64 * 1024);
        let r = run(&dev, VIRTIO_BLK_T_IN, u64::MAX, &vec![0u8; 512], true);
        assert_eq!(r.status, VIRTIO_BLK_S_IOERR, "overflowing read must IOERR");
        std::fs::remove_file(path).ok();
    }

    #[test]
    fn huge_sector_write_is_ioerr_not_oob() {
        // Same overflow, but an OOB *write* into host memory before the fix.
        let (dev, path) = make_rw(64 * 1024);
        let r = run(&dev, VIRTIO_BLK_T_OUT, u64::MAX, &vec![0xABu8; 512], false);
        assert_eq!(r.status, VIRTIO_BLK_S_IOERR, "overflowing write must IOERR");
        std::fs::remove_file(path).ok();
    }

    #[test]
    fn sector_past_end_is_ioerr() {
        // In-range multiply, but the span runs off the end of the backing.
        let (dev, path) = make_rw(64 * 1024); // 128 sectors
        let r = run(&dev, VIRTIO_BLK_T_IN, 200, &vec![0u8; 512], true);
        assert_eq!(r.status, VIRTIO_BLK_S_IOERR);
        std::fs::remove_file(path).ok();
    }

    #[test]
    fn last_sector_in_bounds_is_ok() {
        // sector 127 * 512 = 65024; +512 = 65536 == backing_len → valid.
        let (dev, path) = make_rw(64 * 1024);
        let r = run(&dev, VIRTIO_BLK_T_IN, 127, &vec![0u8; 512], true);
        assert_eq!(
            r.status, VIRTIO_BLK_S_OK,
            "last full sector must be readable"
        );
        // One past the last sector must fail.
        let r2 = run(&dev, VIRTIO_BLK_T_IN, 128, &vec![0u8; 512], true);
        assert_eq!(r2.status, VIRTIO_BLK_S_IOERR);
        std::fs::remove_file(path).ok();
    }

    #[test]
    fn readonly_device_rejects_writes() {
        let (dev, path) = make_ro(64 * 1024);
        let r = run(&dev, VIRTIO_BLK_T_OUT, 0, &vec![0xCDu8; 512], false);
        assert_eq!(
            r.status, VIRTIO_BLK_S_UNSUPP,
            "RO device must reject T_OUT with UNSUPP"
        );
        std::fs::remove_file(path).ok();
    }
}