Skip to main content

vmdk/
chain.rs

1//! Snapshot/delta chain reader: layers a delta VMDK on top of its parent chain.
2//!
3//! Given a delta VMDK (parentCID != `0xffff_ffff`), opens the parent referenced by
4//! `parentFileNameHint`, validates that the parent's CID matches the delta's parentCID,
5//! and presents a unified `Read + Seek` view where:
6//! - allocated sectors in the delta are read from the delta
7//! - sparse sectors in the delta are read from the parent (recursively)
8//!
9//! A chain depth limit (64 levels) guards against circular references in crafted images.
10
11use std::io::{self, Read, Seek, SeekFrom};
12use std::path::Path;
13
14use crate::{VmdkError, VmdkFileReader};
15
16/// Maximum number of delta layers in a chain before returning an error.
17pub const MAX_CHAIN_DEPTH: usize = 64;
18
19/// A read-only view over a VMDK snapshot chain.
20///
21/// Implements `Read + Seek` over the merged virtual sector stream, walking from the
22/// most-recent delta down to the base image for each sparse grain.
23///
24/// Opened via [`VmdkChainReader::open`].
25pub struct VmdkChainReader {
26    /// Layers from newest (index 0 = delta) to oldest (last = base image).
27    layers: Vec<VmdkFileReader>,
28    virtual_disk_size: u64,
29    pos: u64,
30    /// Smallest non-zero grain size (bytes) across the layers; reads are clamped
31    /// to this boundary so a sparse grain can't zero-mask a following allocated
32    /// grain. `0` for an all-flat chain, where no clamping is needed.
33    clamp_bytes: u64,
34}
35
36impl VmdkChainReader {
37    /// Open a (potentially chained) VMDK from `path`, following `parentFileNameHint`
38    /// until a base image is reached or `MAX_CHAIN_DEPTH` is exceeded.
39    ///
40    /// If `path` is a base image (`parentCID == 0xffff_ffff`), this is equivalent to
41    /// `VmdkReader::open_path` wrapped in a single-layer chain.
42    pub fn open(path: &Path) -> Result<Self, VmdkError> {
43        let mut layers: Vec<VmdkFileReader> = Vec::new();
44        let mut current_path = path.to_path_buf();
45
46        for depth in 0..=MAX_CHAIN_DEPTH {
47            let reader = VmdkFileReader::open_path(&current_path)?;
48            let parent_cid = reader.parent_cid();
49            crate::diag::chain_layer(depth, reader.cid(), parent_cid);
50
51            // A CID mismatch between a child's parentCID and its parent's CID means the
52            // parent was modified after the snapshot was taken. QEMU warns but continues,
53            // and so do we — the chain is still structurally usable — so there is no
54            // separate branch to take here; every opened layer is simply pushed.
55            layers.push(reader);
56
57            if parent_cid == 0xffff_ffff {
58                break; // reached base image
59            }
60            if depth == MAX_CHAIN_DEPTH {
61                return Err(VmdkError::FieldOutOfRange {
62                    field: "chain_depth",
63                    value: MAX_CHAIN_DEPTH as u64,
64                    reason: "snapshot chain exceeds the maximum supported depth",
65                });
66            }
67
68            // Resolve the parent path relative to the current file's directory.
69            let desc_text = layers
70                .last()
71                .map(|r| r.descriptor_text().to_owned())
72                .unwrap_or_default();
73            let parent_hint = extract_parent_file_name(&desc_text);
74            if parent_hint.is_empty() {
75                break; // no hint available — treat as base
76            }
77            // Resolve the parent strictly within the current file's directory —
78            // an absolute or `..`-climbing parentFileNameHint is refused.
79            let parent_dir = current_path.parent().unwrap_or(Path::new("."));
80            current_path = crate::descriptor::resolve_extent_path(parent_dir, parent_hint)?;
81        }
82
83        let virtual_disk_size = layers
84            .first()
85            .map_or(0, super::VmdkReader::virtual_disk_size);
86        let clamp_bytes = layers
87            .iter()
88            .map(|l| l.info().grain_size_bytes)
89            .filter(|&g| g > 0)
90            .min()
91            .unwrap_or(0);
92        Ok(VmdkChainReader {
93            layers,
94            virtual_disk_size,
95            pos: 0,
96            clamp_bytes,
97        })
98    }
99
100    /// Total virtual disk size in bytes (from the delta/top layer).
101    pub fn virtual_disk_size(&self) -> u64 {
102        self.virtual_disk_size
103    }
104
105    /// Number of layers in the chain (1 = base image only, no parent).
106    pub fn depth(&self) -> usize {
107        self.layers.len()
108    }
109}
110
111impl Read for VmdkChainReader {
112    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
113        if buf.is_empty() || self.pos >= self.virtual_disk_size {
114            return Ok(0);
115        }
116        // Try each layer from newest to oldest. Read from the first layer that has
117        // data at this position. Sparse reads return zeros but we detect them by
118        // checking is_allocated; if a layer doesn't have data, try the next.
119        let mut to_read = buf.len().min((self.virtual_disk_size - self.pos) as usize);
120        if self.clamp_bytes > 0 {
121            // Clamp to a single grain so a sparse grain cannot zero-mask an
122            // allocated grain that falls within the same read.
123            let remaining_in_grain = (self.clamp_bytes - self.pos % self.clamp_bytes) as usize;
124            to_read = to_read.min(remaining_in_grain);
125        }
126        let lba = self.pos / 512;
127
128        for layer in &mut self.layers {
129            let allocated = layer.is_allocated(lba)?;
130            if allocated {
131                layer.seek(SeekFrom::Start(self.pos))?;
132                let n = layer.read(&mut buf[..to_read])?;
133                self.pos += n as u64;
134                return Ok(n);
135            }
136        }
137
138        // All layers are sparse at this position — return zeros.
139        buf[..to_read].fill(0);
140        self.pos += to_read as u64;
141        Ok(to_read)
142    }
143}
144
145impl Seek for VmdkChainReader {
146    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
147        let new_pos = match pos {
148            SeekFrom::Start(n) => n as i64,
149            SeekFrom::Current(n) => self.pos as i64 + n,
150            SeekFrom::End(n) => self.virtual_disk_size as i64 + n,
151        };
152        if new_pos < 0 {
153            return Err(io::Error::new(
154                io::ErrorKind::InvalidInput,
155                "seek before start",
156            ));
157        }
158        self.pos = new_pos as u64;
159        Ok(self.pos)
160    }
161}
162
163/// Extract `parentFileNameHint` value from a raw descriptor text.
164fn extract_parent_file_name(text: &str) -> &str {
165    for line in text.lines() {
166        if let Some(rest) = line.trim().strip_prefix("parentFileNameHint=") {
167            return rest.trim().trim_matches('"');
168        }
169    }
170    ""
171}
172
173#[cfg(test)]
174mod tests {
175    use std::io::{Read, Seek, SeekFrom};
176
177    use super::*;
178    use crate::testutil::{write_chain_to_dir, GRAIN_SIZE_BYTES};
179
180    #[test]
181    fn extract_parent_file_name_parses_hint() {
182        let desc = "# Disk DescriptorFile\nversion=1\nCID=00000001\nparentCID=ffffffff\nparentFileNameHint=\"../base.vmdk\"\ncreateType=\"monolithicSparse\"\n";
183        assert_eq!(extract_parent_file_name(desc), "../base.vmdk");
184    }
185
186    #[test]
187    fn extract_parent_file_name_returns_empty_when_absent() {
188        let desc = "# Disk DescriptorFile\nversion=1\nCID=ffffffff\nparentCID=ffffffff\ncreateType=\"monolithicSparse\"\n";
189        assert_eq!(extract_parent_file_name(desc), "");
190    }
191
192    #[test]
193    fn chain_depth_one_for_base_image() {
194        let dir = tempfile::tempdir().unwrap();
195        let base_data = vec![0x42u8; 512];
196        let (base_path, _) = write_chain_to_dir(dir.path(), &base_data);
197        let chain = VmdkChainReader::open(&base_path).expect("open base image chain");
198        assert_eq!(chain.depth(), 1, "base image has chain depth 1");
199    }
200
201    #[test]
202    fn chain_depth_two_for_delta() {
203        let dir = tempfile::tempdir().unwrap();
204        let base_data = vec![0x42u8; 512];
205        let (_, delta_path) = write_chain_to_dir(dir.path(), &base_data);
206        let chain = VmdkChainReader::open(&delta_path).expect("open delta chain");
207        assert_eq!(chain.depth(), 2, "delta over base has chain depth 2");
208    }
209
210    #[test]
211    fn chain_reads_base_data_through_sparse_delta() {
212        let dir = tempfile::tempdir().unwrap();
213        let mut base_data = vec![0u8; GRAIN_SIZE_BYTES];
214        base_data[0] = 0xDE;
215        base_data[1] = 0xAD;
216        let (_, delta_path) = write_chain_to_dir(dir.path(), &base_data);
217        let mut chain = VmdkChainReader::open(&delta_path).expect("open chain");
218        chain.seek(SeekFrom::Start(0)).expect("seek");
219        let mut buf = [0u8; 2];
220        chain.read_exact(&mut buf).expect("read");
221        assert_eq!(
222            buf,
223            [0xDE, 0xAD],
224            "chain must fall through to base data for sparse delta grain"
225        );
226    }
227
228    #[test]
229    fn chain_virtual_disk_size_from_delta() {
230        let dir = tempfile::tempdir().unwrap();
231        let (_, delta_path) = write_chain_to_dir(dir.path(), &[0u8; 512]);
232        let chain = VmdkChainReader::open(&delta_path).expect("open");
233        assert_eq!(chain.virtual_disk_size(), GRAIN_SIZE_BYTES as u64);
234    }
235
236    #[test]
237    fn chain_seek_variants_and_read_edges() {
238        let dir = tempfile::tempdir().unwrap();
239        let (_, delta_path) = write_chain_to_dir(dir.path(), &[0u8; GRAIN_SIZE_BYTES]);
240        let mut chain = VmdkChainReader::open(&delta_path).unwrap();
241        let sz = chain.virtual_disk_size();
242        assert_eq!(chain.seek(SeekFrom::Start(8)).unwrap(), 8);
243        assert_eq!(chain.seek(SeekFrom::Current(-4)).unwrap(), 4);
244        assert_eq!(chain.seek(SeekFrom::End(-2)).unwrap(), sz - 2);
245        assert!(chain.seek(SeekFrom::End(-(sz as i64) - 1)).is_err());
246        chain.seek(SeekFrom::Start(sz)).unwrap();
247        assert_eq!(chain.read(&mut [0u8; 4]).unwrap(), 0);
248        chain.seek(SeekFrom::Start(0)).unwrap();
249        assert_eq!(chain.read(&mut []).unwrap(), 0);
250    }
251
252    #[test]
253    fn chain_all_sparse_reads_zeros() {
254        // A single all-sparse layer → no layer reports allocated → zero-fill path.
255        let dir = tempfile::tempdir().unwrap();
256        let bytes = crate::testutil::gd_at_end_stream_opt_vmdk();
257        let p = dir.path().join("sparse.vmdk");
258        std::fs::write(&p, &bytes).unwrap();
259        let mut chain = VmdkChainReader::open(&p).unwrap();
260        let mut buf = [0xFFu8; 512];
261        chain.read_exact(&mut buf).unwrap();
262        assert_eq!(buf, [0u8; 512]);
263    }
264
265    #[test]
266    fn chain_breaks_when_parent_hint_missing() {
267        // parentCID set but no parentFileNameHint → loop breaks, treated as base.
268        let dir = tempfile::tempdir().unwrap();
269        let desc = "# Disk DescriptorFile\nversion=1\nCID=00000002\nparentCID=00000001\ncreateType=\"monolithicSparse\"\n";
270        let bytes = crate::testutil::test_sparse_vmdk_with_descriptor(&[0u8; 512], desc);
271        let p = dir.path().join("d.vmdk");
272        std::fs::write(&p, &bytes).unwrap();
273        let chain = VmdkChainReader::open(&p).unwrap();
274        assert_eq!(chain.depth(), 1, "missing hint → no parent layer");
275    }
276
277    #[test]
278    fn chain_read_does_not_zero_mask_an_allocated_grain_after_a_sparse_one() {
279        use std::io::Read as _;
280        // A 2-grain image: grain 0 is sparse, grain 1 holds 0xBB. A single read
281        // that spans both grains must not let the sparse grain 0 zero-mask the
282        // allocated grain 1 (the un-clamped zero-fill bug).
283        let dir = tempfile::tempdir().unwrap();
284        let g = crate::testutil::GRAIN_SIZE_BYTES;
285        let bytes = crate::testutil::test_sparse_vmdk_sparse_then_allocated(&vec![0xBBu8; g]);
286        let p = dir.path().join("base.vmdk");
287        std::fs::write(&p, &bytes).unwrap();
288        let mut chain = VmdkChainReader::open(&p).unwrap();
289        let mut buf = vec![0u8; 2 * g];
290        chain.read_exact(&mut buf).unwrap();
291        assert!(buf[..g].iter().all(|&b| b == 0), "grain 0 is sparse -> zeros");
292        assert!(
293            buf[g..].iter().all(|&b| b == 0xBB),
294            "grain 1 (allocated) must not be zero-masked by the sparse grain 0"
295        );
296    }
297
298    #[test]
299    fn chain_refuses_absolute_parent_hint() {
300        let dir = tempfile::tempdir().unwrap();
301        let base = crate::testutil::test_sparse_vmdk_with_descriptor(
302            &[0x55u8; 512],
303            "# Disk DescriptorFile\nversion=1\nCID=00000001\nparentCID=ffffffff\ncreateType=\"monolithicSparse\"\n",
304        );
305        let base_path = dir.path().join("base.vmdk");
306        std::fs::write(&base_path, &base).unwrap();
307        let delta_desc = format!(
308            "# Disk DescriptorFile\nversion=1\nCID=00000002\nparentCID=00000001\nparentFileNameHint=\"{}\"\ncreateType=\"monolithicSparse\"\n",
309            base_path.display()
310        );
311        let delta = crate::testutil::test_sparse_vmdk_with_descriptor(&[0u8; 512], &delta_desc);
312        let delta_path = dir.path().join("delta.vmdk");
313        std::fs::write(&delta_path, &delta).unwrap();
314        // An absolute parentFileNameHint is a crafted-image attempt to read an
315        // arbitrary host file; the chain reader must refuse it (secure by default).
316        assert!(
317            VmdkChainReader::open(&delta_path).is_err(),
318            "an absolute parentFileNameHint must be refused"
319        );
320    }
321
322    #[test]
323    fn chain_depth_limit_on_self_reference() {
324        // A delta whose parentFileNameHint points at itself loops until MAX_CHAIN_DEPTH.
325        let dir = tempfile::tempdir().unwrap();
326        let desc = "# Disk DescriptorFile\nversion=1\nCID=00000001\nparentCID=00000001\nparentFileNameHint=\"self.vmdk\"\ncreateType=\"monolithicSparse\"\n";
327        let bytes = crate::testutil::test_sparse_vmdk_with_descriptor(&[0u8; 512], desc);
328        let p = dir.path().join("self.vmdk");
329        std::fs::write(&p, &bytes).unwrap();
330        assert!(matches!(
331            VmdkChainReader::open(&p),
332            Err(VmdkError::FieldOutOfRange {
333                field: "chain_depth",
334                ..
335            })
336        ));
337    }
338}