Skip to main content

qcow2/
lib.rs

1//! Pure-Rust read-only QCOW2 disk image reader.
2//!
3//! Supports QCOW2 v2 and v3 (uncompressed, no backing file, no encryption).
4//! Uses a two-level L1→L2 cluster lookup matching QEMU's own design.
5
6// Production code is panic-free (no unwrap/expect, enforced by the workspace
7// lints); tests legitimately use them.
8#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))]
9
10use std::fs::File;
11use std::io::{self, Read, Seek, SeekFrom};
12use std::path::Path;
13
14mod error;
15mod header;
16mod refcount;
17mod snapshots;
18
19pub use error::Qcow2Error;
20pub use header::Qcow2Info;
21pub use refcount::{refcount_report, Qcow2RefcountReport};
22pub use snapshots::{snapshots, Qcow2Snapshot};
23
24use header::Qcow2Header;
25
26/// Inspect a QCOW2 image's header for forensic facts (version, backing file,
27/// encryption, snapshots, incompatible-feature bits) **without** decoding it —
28/// works on images the reader rejects (encrypted, backing-file, etc.).
29pub fn inspect(path: &Path) -> Result<Qcow2Info, Qcow2Error> {
30    let mut file = File::open(path)?;
31    // Read a generous window so the parser can also reach the header-extension
32    // area and the backing filename, which qemu stores immediately after the
33    // fixed header (well within the first cluster). 8 KiB covers real images;
34    // a short file simply yields a shorter slice (parse is bounds-checked).
35    let mut hdr_buf = [0u8; 8192];
36    let n = read_window(&mut file, &mut hdr_buf)?;
37    Qcow2Info::parse(&hdr_buf[..n])
38}
39
40/// Fill `buf` from the start of `file`, returning the number of bytes read.
41/// Handles short reads (small files) by looping until EOF or `buf` is full.
42fn read_window(file: &mut File, buf: &mut [u8]) -> io::Result<usize> {
43    let mut filled = 0;
44    while filled < buf.len() {
45        match file.read(&mut buf[filled..])? {
46            0 => break,
47            n => filled += n,
48        }
49    }
50    Ok(filled)
51}
52
53/// Read-only QCOW2 container reader.
54///
55/// Implements `Read + Seek` over the virtual sector stream.
56pub struct Qcow2Reader {
57    file: File,
58    virtual_disk_size: u64,
59    cluster_size: u64,
60    l1_table: Vec<u64>,   // L1 entries (masked byte offsets of L2 tables)
61    l2_bits: u32,         // log2(entries per L2 table)
62    l2_mask: u64,
63    pos: u64,
64}
65
66impl Qcow2Reader {
67    /// Open a QCOW2 disk image (v2 or v3, uncompressed, no backing file).
68    pub fn open(path: &Path) -> Result<Self, Qcow2Error> {
69        // 8 MiB max L1 table — prevents OOM on crafted images.
70        const MAX_L1_ENTRIES: u32 = 1 << 20;
71
72        let mut file = File::open(path)?;
73
74        // Read enough bytes to cover both v2 (72 bytes) and v3 (104 bytes) headers.
75        let mut hdr_buf = [0u8; 104];
76        let hdr_read = file.read(&mut hdr_buf)?;
77        let hdr = Qcow2Header::parse(&hdr_buf[..hdr_read])?;
78
79        let cluster_size = 1u64 << hdr.cluster_bits;
80        // Each L2 table occupies one cluster; each entry is 8 bytes.
81        let l2_entries = cluster_size / 8;
82        let l2_bits = hdr.cluster_bits - 3; // log2(l2_entries)
83        let l2_mask = l2_entries - 1;
84
85        // Load L1 table into memory.
86        if hdr.l1_size > MAX_L1_ENTRIES {
87            return Err(Qcow2Error::L1TableTooLarge(hdr.l1_size));
88        }
89        file.seek(SeekFrom::Start(hdr.l1_table_offset))?;
90        let l1_bytes = u64::from(hdr.l1_size) * 8;
91        let mut l1_buf = vec![0u8; l1_bytes as usize];
92        file.read_exact(&mut l1_buf)?;
93        let l1_table: Vec<u64> = l1_buf
94            .chunks_exact(8)
95            .map(|c| {
96                let mut a = [0u8; 8];
97                a.copy_from_slice(c); // chunks_exact(8) guarantees len == 8
98                u64::from_be_bytes(a)
99            })
100            .collect();
101
102        Ok(Qcow2Reader {
103            file,
104            virtual_disk_size: hdr.disk_size,
105            cluster_size,
106            l1_table,
107            l2_bits,
108            l2_mask,
109            pos: 0,
110        })
111    }
112
113    /// Virtual disk size in bytes as recorded in the QCOW2 header.
114    pub fn virtual_disk_size(&self) -> u64 {
115        self.virtual_disk_size
116    }
117
118    /// Resolve `virtual_offset` to a cluster reference.
119    fn cluster_ref_for(&mut self, virtual_offset: u64) -> io::Result<ClusterRef> {
120        let cluster_idx = virtual_offset >> self.cluster_size.trailing_zeros();
121
122        let l1_idx = (cluster_idx >> self.l2_bits) as usize;
123        let l2_idx = cluster_idx & self.l2_mask;
124
125        let l1_entry = self.l1_table.get(l1_idx).copied().unwrap_or(0);
126        let l2_table_offset = l1_entry & 0x7FFF_FFFF_FFFF_FFFF; // mask COPIED bit
127        if l2_table_offset == 0 {
128            return Ok(ClusterRef::Unallocated);
129        }
130
131        let l2_entry_pos = l2_table_offset + l2_idx * 8;
132        self.file.seek(SeekFrom::Start(l2_entry_pos))?;
133        let mut l2_bytes = [0u8; 8];
134        self.file.read_exact(&mut l2_bytes)?;
135        let l2_entry = u64::from_be_bytes(l2_bytes);
136
137        if l2_entry & (1 << 62) != 0 {
138            // Compressed cluster. QCOW2 spec (QEMU implementation):
139            //   csize_shift = 40 - cluster_bits
140            //   lower csize_shift bits = file BYTE offset (already bytes, no ×512)
141            //   next (cluster_bits - 8) bits = compressed_sectors - 1
142            // QCOW2 spec: lower (63 - cluster_bits) bits = file byte offset;
143            // next (cluster_bits - 1) bits = compressed_sectors - 1.
144            // The offset is already in bytes — no sector-to-byte conversion.
145            let cluster_bits = self.cluster_size.trailing_zeros(); // u32, in [9, 20]
146            let split = 63u32 - cluster_bits; // bits in offset field
147            let count_mask = (1u64 << (cluster_bits - 1)) - 1; // cluster_bits-1 count bits
148            let file_offset = l2_entry & ((1u64 << split) - 1);
149            let nb_sectors = ((l2_entry >> split) & count_mask) + 1;
150            let compressed_bytes = (nb_sectors * 512) as usize;
151            return Ok(ClusterRef::Compressed { file_offset, compressed_bytes });
152        }
153
154        // QCOW_OFLAG_ZERO (bit 0): guest must see zeros regardless of cluster offset.
155        // Covers ZERO_PLAIN (l2_entry=1, no backing cluster) and ZERO_ALLOC (cluster
156        // allocated but zeroed out), both mandated by the QCOW2 spec.
157        if l2_entry & 1 != 0 {
158            return Ok(ClusterRef::ZeroCluster);
159        }
160
161        let cluster_offset = l2_entry & 0x3FFF_FFFF_FFFF_FFFF;
162        if cluster_offset == 0 {
163            return Ok(ClusterRef::Unallocated);
164        }
165        Ok(ClusterRef::Normal(cluster_offset))
166    }
167
168    /// Read and raw-deflate-decompress a compressed cluster; return the
169    /// full `cluster_size` bytes of decompressed data.
170    ///
171    /// `compressed_bytes` is an upper bound (`nb_sectors` × 512); the actual
172    /// compressed stream may be shorter, and near the end of the file the read
173    /// may hit EOF before reaching `compressed_bytes`. Both are normal.
174    fn decompress_cluster(&mut self, file_offset: u64, compressed_bytes: usize) -> io::Result<Vec<u8>> {
175        use flate2::read::DeflateDecoder;
176
177        self.file.seek(SeekFrom::Start(file_offset))?;
178        let mut raw = vec![0u8; compressed_bytes];
179        let mut filled = 0;
180        while filled < compressed_bytes {
181            match self.file.read(&mut raw[filled..])? {
182                0 => break, // EOF — normal for the last compressed cluster
183                n => filled += n,
184            }
185        }
186
187        let mut decoder = DeflateDecoder::new(&raw[..filled]);
188        let mut out = Vec::with_capacity(self.cluster_size as usize);
189        decoder.read_to_end(&mut out).map_err(|e| {
190            io::Error::new(io::ErrorKind::InvalidData, format!("qcow2 deflate: {e}"))
191        })?;
192        if out.len() < self.cluster_size as usize {
193            out.resize(self.cluster_size as usize, 0);
194        }
195        Ok(out)
196    }
197}
198
199/// Cluster location resolved from an L2 entry.
200enum ClusterRef {
201    Unallocated,
202    ZeroCluster,
203    Normal(u64),
204    Compressed { file_offset: u64, compressed_bytes: usize },
205}
206
207impl Read for Qcow2Reader {
208    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
209        if self.pos >= self.virtual_disk_size || buf.is_empty() {
210            return Ok(0);
211        }
212
213        let remaining_virtual = (self.virtual_disk_size - self.pos) as usize;
214        let offset_in_cluster = (self.pos & (self.cluster_size - 1)) as usize;
215        let remaining_in_cluster = self.cluster_size as usize - offset_in_cluster;
216        let to_read = buf.len().min(remaining_virtual).min(remaining_in_cluster);
217
218        let n = match self.cluster_ref_for(self.pos)? {
219            ClusterRef::Normal(cluster_offset) => {
220                let file_off = cluster_offset + offset_in_cluster as u64;
221                self.file.seek(SeekFrom::Start(file_off))?;
222                self.file.read(&mut buf[..to_read])?
223            }
224            ClusterRef::Compressed { file_offset, compressed_bytes } => {
225                let decompressed = self.decompress_cluster(file_offset, compressed_bytes)?;
226                let src = &decompressed[offset_in_cluster..offset_in_cluster + to_read];
227                buf[..to_read].copy_from_slice(src);
228                to_read
229            }
230            ClusterRef::ZeroCluster | ClusterRef::Unallocated => {
231                buf[..to_read].fill(0);
232                to_read
233            }
234        };
235
236        self.pos += n as u64;
237        Ok(n)
238    }
239}
240
241impl Seek for Qcow2Reader {
242    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
243        let new_pos = match pos {
244            SeekFrom::Start(n) => n as i64,
245            SeekFrom::Current(n) => self.pos as i64 + n,
246            SeekFrom::End(n) => self.virtual_disk_size as i64 + n,
247        };
248        if new_pos < 0 {
249            return Err(io::Error::new(
250                io::ErrorKind::InvalidInput,
251                "seek before start",
252            ));
253        }
254        self.pos = new_pos as u64;
255        Ok(self.pos)
256    }
257}
258
259// ── Test helpers ──────────────────────────────────────────────────────────────
260
261#[cfg(feature = "test-helpers")]
262pub mod testutil;
263#[cfg(not(feature = "test-helpers"))]
264mod testutil;
265
266// ── Tests ─────────────────────────────────────────────────────────────────────
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271    use testutil::test_qcow2;
272
273    fn write_tmp(data: &[u8]) -> tempfile::NamedTempFile {
274        use std::io::Write;
275        let mut f = tempfile::NamedTempFile::new().unwrap();
276        f.write_all(data).unwrap();
277        f
278    }
279
280    // ── Helpers ───────────────────────────────────────────────────────────────
281
282    /// Build a minimal valid QCOW2 v2 header (72 bytes) with arbitrary `cluster_bits`.
283    fn qcow2_header_bytes(cluster_bits: u32) -> Vec<u8> {
284        let mut h = vec![0u8; 72];
285        h[0..4].copy_from_slice(&0x5146_49fb_u32.to_be_bytes()); // magic
286        h[4..8].copy_from_slice(&2u32.to_be_bytes());             // version 2
287        // bytes 8..16: backing_file_offset = 0
288        // bytes 16..20: backing_file_size = 0
289        h[20..24].copy_from_slice(&cluster_bits.to_be_bytes());   // cluster_bits
290        h[24..32].copy_from_slice(&512u64.to_be_bytes());         // disk_size
291        // bytes 32..36: encryption = 0
292        h[36..40].copy_from_slice(&0u32.to_be_bytes());           // l1_size = 0
293        h[40..48].copy_from_slice(&0u64.to_be_bytes());           // l1_table_offset
294        h
295    }
296
297    // ── Panic regression tests (RED until header.rs validates cluster_bits) ───
298
299    #[test]
300    fn cluster_bits_too_large_rejected() {
301        // cluster_bits=200 triggers "attempt to shift left with overflow" on
302        // `1u64 << hdr.cluster_bits` (lib.rs line 40) in debug builds.
303        let f = write_tmp(&qcow2_header_bytes(200));
304        assert!(Qcow2Reader::open(f.path()).is_err());
305    }
306
307    #[test]
308    fn cluster_bits_zero_rejected() {
309        // cluster_bits=0 triggers u32 underflow on `cluster_bits - 3` (lib.rs line 43).
310        let f = write_tmp(&qcow2_header_bytes(0));
311        assert!(Qcow2Reader::open(f.path()).is_err());
312    }
313
314    #[test]
315    fn cluster_bits_below_minimum_rejected() {
316        // cluster_bits=2 also triggers the same underflow (2 - 3 wraps for u32).
317        let f = write_tmp(&qcow2_header_bytes(2));
318        assert!(Qcow2Reader::open(f.path()).is_err());
319    }
320
321    // ── Existing tests ────────────────────────────────────────────────────────
322
323    #[test]
324    fn open_nonexistent_returns_err() {
325        assert!(Qcow2Reader::open(Path::new("/tmp/no_such.qcow2")).is_err());
326    }
327
328    #[test]
329    fn open_empty_file_returns_err() {
330        let f = write_tmp(&[]);
331        assert!(Qcow2Reader::open(f.path()).is_err());
332    }
333
334    #[test]
335    fn open_non_qcow2_file_returns_err() {
336        let f = write_tmp(b"this is not a qcow2 image at all");
337        assert!(Qcow2Reader::open(f.path()).is_err());
338    }
339
340    #[test]
341    fn qcow2_virtual_disk_size() {
342        let img = test_qcow2(&[0u8; 512]);
343        let f = write_tmp(&img);
344        let reader = Qcow2Reader::open(f.path()).expect("open");
345        assert_eq!(reader.virtual_disk_size(), testutil::CLUSTER_SIZE as u64);
346    }
347
348    #[test]
349    fn qcow2_read_returns_cluster_data() {
350        let mut data = vec![0u8; 512];
351        data[42] = 0xDE;
352        data[43] = 0xAD;
353        let img = test_qcow2(&data);
354        let f = write_tmp(&img);
355        let mut reader = Qcow2Reader::open(f.path()).expect("open");
356        let mut buf = vec![0u8; 512];
357        reader.read_exact(&mut buf).expect("read");
358        assert_eq!(buf[42], 0xDE);
359        assert_eq!(buf[43], 0xAD);
360    }
361
362    #[test]
363    fn seek_and_read_at_offset() {
364        let mut data = vec![0u8; testutil::CLUSTER_SIZE];
365        data[100] = 0xBE;
366        data[101] = 0xEF;
367        let img = test_qcow2(&data);
368        let f = write_tmp(&img);
369        let mut reader = Qcow2Reader::open(f.path()).expect("open");
370        reader.seek(SeekFrom::Start(100)).unwrap();
371        let mut buf = [0u8; 2];
372        reader.read_exact(&mut buf).unwrap();
373        assert_eq!(buf, [0xBE, 0xEF]);
374    }
375
376    #[test]
377    fn qcow2_reader_is_send() {
378        fn assert_send<T: Send>() {}
379        assert_send::<Qcow2Reader>();
380    }
381
382    // ── Property tests: open() never panics on arbitrary input ────────────────
383
384    proptest::proptest! {
385        #[test]
386        fn open_never_panics_on_arbitrary_bytes(
387            bytes in proptest::collection::vec(proptest::prelude::any::<u8>(), 0..8192)
388        ) {
389            let f = write_tmp(&bytes);
390            let _ = Qcow2Reader::open(f.path());
391        }
392
393        #[test]
394        fn open_never_panics_on_valid_magic_plus_garbage(
395            suffix in proptest::collection::vec(proptest::prelude::any::<u8>(), 0..8192)
396        ) {
397            // Correct magic + version 2 prefix ensures the parser gets past early
398            // rejection and exercises field parsing with random data.
399            let mut bytes = vec![0u8; 8];
400            bytes[0..4].copy_from_slice(&0x5146_49fb_u32.to_be_bytes());
401            bytes[4..8].copy_from_slice(&2u32.to_be_bytes());
402            bytes.extend_from_slice(&suffix);
403            let f = write_tmp(&bytes);
404            let _ = Qcow2Reader::open(f.path());
405        }
406    }
407
408    // ── QCOW_OFLAG_ZERO (bit 0): ZERO_PLAIN clusters must read as zeros ─────────
409    // L2 entry = 1 (ZERO_PLAIN): bit 62=0 (not compressed), bit 0=1 (zero flag),
410    // offset field = 0. Correct behaviour: reads return cluster_size zeros.
411    // Bug path: our code masks with 0x3FFF.., gets cluster_offset=1, then seeks to
412    // file byte 1 and reads header bytes instead of returning zeros.
413    #[test]
414    fn zero_plain_cluster_reads_as_zeros() {
415        use std::io::Write;
416
417        // Build test_qcow2 but with L2[0] = 1 (ZERO_PLAIN) instead of DATA_OFFSET.
418        let img = test_qcow2(&[0xABu8; 512]); // produces a valid image
419        // Patch L2[0] = 1 at offset 1536 (L2_OFFSET from testutil).
420        let mut patched = img.clone();
421        let l2_offset = 1536usize;
422        patched[l2_offset..l2_offset + 8].copy_from_slice(&1u64.to_be_bytes());
423
424        let mut f = tempfile::NamedTempFile::new().unwrap();
425        f.write_all(&patched).unwrap();
426        let mut reader = Qcow2Reader::open(f.path()).expect("open");
427        let mut buf = [0xFFu8; 512];
428        reader.seek(SeekFrom::Start(0)).unwrap();
429        reader.read_exact(&mut buf).expect("read");
430        assert_eq!(
431            buf,
432            [0u8; 512],
433            "ZERO_PLAIN cluster (L2 entry=1) must read as all zeros"
434        );
435    }
436
437    // ── Differential test: bytes must match qemu-img convert -O raw output ────
438
439    #[test]
440    fn reads_match_qemu_raw_convert() {
441        const QEMU_IMG: &str = "/opt/homebrew/bin/qemu-img";
442        if !Path::new(QEMU_IMG).exists() {
443            return;
444        }
445        let tmp = tempfile::tempdir().expect("tempdir");
446
447        // 1 MiB source with a deterministic non-trivial pattern covering
448        // sector boundaries and cluster boundaries (default cluster = 65536 B).
449        let size: usize = 1 << 20;
450        let raw_data: Vec<u8> = (0..size).map(|i| (i ^ (i >> 8)) as u8).collect();
451        let raw_path = tmp.path().join("source.raw");
452        std::fs::write(&raw_path, &raw_data).expect("write raw");
453
454        let qcow2_path = tmp.path().join("test.qcow2");
455        let status = std::process::Command::new(QEMU_IMG)
456            .args(["convert", "-O", "qcow2",
457                   raw_path.to_str().unwrap(),
458                   qcow2_path.to_str().unwrap()])
459            .status()
460            .expect("spawn qemu-img");
461        assert!(status.success(), "qemu-img convert failed");
462
463        let mut reader = Qcow2Reader::open(&qcow2_path).expect("open");
464        assert_eq!(reader.virtual_disk_size(), size as u64);
465
466        // Sample: start, mid-sector, cluster boundary, cluster+sector, near-end.
467        let cluster = 65536usize;
468        for &offset in &[0usize, 511, cluster, cluster + 512, size - 512] {
469            let len = 512.min(size - offset);
470            let mut buf = vec![0u8; len];
471            reader.seek(SeekFrom::Start(offset as u64)).expect("seek");
472            reader.read_exact(&mut buf).expect("read");
473            assert_eq!(
474                buf,
475                raw_data[offset..offset + len],
476                "byte mismatch at offset {offset:#x}",
477            );
478        }
479    }
480
481    // ── Corpus differential test: real CirrOS image vs qemu-img convert ───────
482
483    #[test]
484    fn corpus_cirros_reads_match_qemu_raw_convert() {
485        const QEMU_IMG: &str = "/opt/homebrew/bin/qemu-img";
486        if !Path::new(QEMU_IMG).exists() {
487            return;
488        }
489        let corpus = Path::new(env!("CARGO_MANIFEST_DIR"))
490            .join("tests/data/cirros-0.6.3-x86_64-disk.img");
491        if !corpus.exists() {
492            return; // skip if corpus not present
493        }
494        let tmp = tempfile::tempdir().expect("tempdir");
495        let raw_path = tmp.path().join("cirros.raw");
496        let ok = std::process::Command::new(QEMU_IMG)
497            .args(["convert", "-O", "raw",
498                   corpus.to_str().unwrap(),
499                   raw_path.to_str().unwrap()])
500            .status().expect("spawn qemu-img").success();
501        assert!(ok, "qemu-img convert failed");
502        let ref_data = std::fs::read(&raw_path).expect("read raw");
503
504        let mut reader = Qcow2Reader::open(&corpus).expect("open corpus");
505        assert_eq!(reader.virtual_disk_size(), ref_data.len() as u64,
506            "virtual_disk_size must match reference raw length");
507
508        // CirrOS is 112 MiB virtual. Sample across the full range:
509        // MBR, partition table, multiple cluster boundaries, mid-image, near-end.
510        let vsize = ref_data.len();
511        let cluster = 65536usize;
512        let samples = [
513            0usize,               // MBR / boot sector
514            446,                  // partition table entries
515            510,                  // MBR boot signature (0x55 0xAA)
516            cluster,              // second cluster
517            cluster * 10,         // tenth cluster
518            vsize / 2,            // mid-image
519            vsize / 2 + cluster,  // mid-image + one cluster
520            vsize - 512,          // last sector
521        ];
522        for &offset in &samples {
523            let len = 512.min(vsize - offset);
524            let mut buf = vec![0u8; len];
525            reader.seek(SeekFrom::Start(offset as u64)).expect("seek");
526            reader.read_exact(&mut buf).expect("read");
527            assert_eq!(
528                buf, ref_data[offset..offset + len],
529                "byte mismatch at offset {offset:#x}",
530            );
531        }
532    }
533}