Skip to main content

vmdk/
read.rs

1//! The read path — resolving a virtual offset to physical bytes and implementing
2//! `Read`/`Seek` over the decoded virtual sector stream. Sparse grains read as zeros,
3//! streamOptimized grains are zlib-decompressed, and (in recovery mode) damaged
4//! pointers resolve through the redundant grain directory (see `recovery.rs`).
5
6use std::io::{self, Read, Seek, SeekFrom};
7
8use crate::header::SECTOR_SIZE;
9use crate::{bytes, diag, sesparse, FormatState, VmdkReader};
10
11/// Where the bytes for a virtual offset live.
12pub(crate) enum GrainLookup {
13    /// Grain is not allocated — fill output with zeros.
14    Sparse,
15    /// Grain is uncompressed; data begins at this file byte offset.
16    FileOffset(u64),
17    /// Grain is zlib-compressed (streamOptimized); `data_offset` is the first
18    /// byte of compressed payload (after the 12-byte `GrainMarker` header),
19    /// `data_size` is the compressed length, and `offset_in_grain` is where
20    /// to start reading within the decompressed grain.
21    Compressed {
22        data_offset: u64,
23        data_size: u32,
24        offset_in_grain: u64,
25    },
26}
27
28impl<R: Read + Seek> VmdkReader<R> {
29    /// Grain size in bytes for the sparse/seSparse read path (0 for flat, which is
30    /// handled before this is reached on the read path).
31    pub(crate) fn sparse_grain_size_bytes(&self) -> u64 {
32        match &self.fmt {
33            FormatState::Sparse {
34                grain_size_bytes, ..
35            }
36            | FormatState::SeSparse {
37                grain_size_bytes, ..
38            } => *grain_size_bytes,
39            FormatState::Flat => 0,
40        }
41    }
42
43    /// Resolve `virtual_offset` to a [`GrainLookup`] describing where to find the data.
44    pub(crate) fn grain_location(&mut self, virtual_offset: u64) -> io::Result<GrainLookup> {
45        // seSparse uses nibble-typed, bit-rotated 8-byte grain entries — resolved separately.
46        if matches!(self.fmt, FormatState::SeSparse { .. }) {
47            return self.grain_location_sesparse(virtual_offset);
48        }
49        self.grain_location_sparse(virtual_offset)
50    }
51
52    /// Resolve a virtual offset for a seSparse (VMFS6) extent.
53    fn grain_location_sesparse(&mut self, virtual_offset: u64) -> io::Result<GrainLookup> {
54        let FormatState::SeSparse {
55            grain_dir,
56            grain_size_bytes,
57            gt_offset_sectors,
58            grains_offset_sectors,
59        } = &self.fmt
60        else {
61            return Ok(GrainLookup::Sparse); // dispatched only for seSparse
62        };
63        {
64            let grain_size_bytes = *grain_size_bytes;
65            let grain_sectors = grain_size_bytes / SECTOR_SIZE;
66            let grains_offset = *grains_offset_sectors;
67            let gt_off = *gt_offset_sectors;
68            let grain_idx = virtual_offset / grain_size_bytes;
69            let offset_in_grain = virtual_offset % grain_size_bytes;
70            let gd_idx = (grain_idx / sesparse::SE_GTES_PER_GT) as usize;
71            let gte_idx = grain_idx % sesparse::SE_GTES_PER_GT;
72            let gd_entry = grain_dir.get(gd_idx).copied().unwrap_or(0);
73
74            let Some(gte) = self.se_read_gte(gd_entry, gt_off, gte_idx)? else {
75                return Ok(GrainLookup::Sparse);
76            };
77            match gte & sesparse::SE_GTE_TYPE_MASK {
78                // Unallocated: the whole entry must be zero (already handled by se_read_gte
79                // for the GD level; a zero GTE here means a sparse grain within an allocated GT).
80                0 if gte == 0 => Ok(GrainLookup::Sparse),
81                sesparse::SE_GTE_TYPE_UNMAPPED | sesparse::SE_GTE_TYPE_ZERO => {
82                    Ok(GrainLookup::Sparse)
83                }
84                sesparse::SE_GTE_TYPE_ALLOCATED => {
85                    let grain_index = sesparse::se_gte_grain_index(gte);
86                    let cluster_sector = grains_offset + grain_index * grain_sectors;
87                    Ok(GrainLookup::FileOffset(
88                        cluster_sector * SECTOR_SIZE + offset_in_grain,
89                    ))
90                }
91                _ => Err(io::Error::new(
92                    io::ErrorKind::InvalidData,
93                    "seSparse grain entry has unsupported type nibble",
94                )),
95            }
96        }
97    }
98
99    /// Resolve a virtual offset for a VMDK4 sparse / streamOptimized extent.
100    fn grain_location_sparse(&mut self, virtual_offset: u64) -> io::Result<GrainLookup> {
101        let (
102            gd_idx,
103            gt_sector,
104            gte_idx,
105            offset_in_grain,
106            compressed,
107            grain_size_bytes,
108            num_gtes_per_gt,
109        ) = {
110            let FormatState::Sparse {
111                grain_dir,
112                grain_size_bytes,
113                num_gtes_per_gt,
114                compressed,
115            } = &self.fmt
116            else {
117                return Ok(GrainLookup::Sparse); // Flat — not reached from Read::read
118            };
119            let grain_idx = virtual_offset / grain_size_bytes;
120            let offset_in_grain = virtual_offset % grain_size_bytes;
121            let gd_idx = (grain_idx / num_gtes_per_gt) as usize;
122            let gte_idx = grain_idx % num_gtes_per_gt;
123            let gt_sector = grain_dir.get(gd_idx).copied().unwrap_or(0);
124            (
125                gd_idx,
126                gt_sector,
127                gte_idx,
128                offset_in_grain,
129                *compressed,
130                *grain_size_bytes,
131                *num_gtes_per_gt,
132            )
133        };
134        // Recovery mode: if the primary grain-table pointer is unusable, resolve it
135        // through the redundant grain directory instead.
136        let primary_gt_sector = gt_sector;
137        let gt_sector = if self.rgd_fallback {
138            self.resilient_gt_sector(gd_idx, gt_sector, num_gtes_per_gt)?
139        } else {
140            gt_sector
141        };
142        // The grain table was recovered when fallback swapped in a different (RGD) pointer.
143        let mut from_rgd = self.rgd_fallback && gt_sector != primary_gt_sector && gt_sector != 0;
144        if gt_sector == 0 {
145            return Ok(GrainLookup::Sparse);
146        }
147        // Use cached GT if available; otherwise read from file and cache it.
148        let gte = if let Some(gt) = self.gt_cache.get(&gt_sector) {
149            gt.get(gte_idx as usize).copied().unwrap_or(0)
150        } else {
151            // Read the full GT (num_gtes_per_gt entries × 4 bytes) into the cache.
152            let gt_byte_offset = u64::from(gt_sector) * SECTOR_SIZE;
153            self.inner.seek(SeekFrom::Start(gt_byte_offset))?;
154            let gt_size = num_gtes_per_gt as usize * 4;
155            let mut gt_bytes = vec![0u8; gt_size];
156            self.inner.read_exact(&mut gt_bytes)?;
157            let gt: Vec<u32> = bytes::le_u32_table(&gt_bytes);
158            let gte = gt.get(gte_idx as usize).copied().unwrap_or(0);
159            self.gt_cache.insert(gt_sector, gt);
160            gte
161        };
162        // Content-level recovery: the primary grain-table pointer was usable but this
163        // entry is sparse — if the redundant grain table still holds the grain pointer,
164        // the primary entry was lost to corruption, so recover it.
165        let gte = if self.rgd_fallback && gte <= 1 {
166            let rgd_gte = self.rgd_gte(gd_idx, gte_idx, num_gtes_per_gt)?;
167            if rgd_gte > 1 {
168                diag::entry_recovered(gd_idx, gte_idx);
169                from_rgd = true;
170                rgd_gte
171            } else {
172                gte
173            }
174        } else {
175            gte
176        };
177        if gte <= 1 {
178            diag::grain_resolved(virtual_offset, "sparse");
179            return Ok(GrainLookup::Sparse); // sparse or explicitly-zeroed grain
180        }
181        if from_rgd {
182            self.rgd_recovery_count += 1;
183        }
184        if compressed {
185            // GrainMarker layout: u64 LBA (8 bytes) + u32 dataSize (4 bytes) + data.
186            let marker_offset = u64::from(gte) * SECTOR_SIZE;
187            let mut marker_hdr = [0u8; 12];
188            self.read_exact_at(marker_offset, &mut marker_hdr)?;
189            let data_size = u32::from_le_bytes(marker_hdr[8..12].try_into().expect("4 bytes"));
190            // Cap data_size to prevent allocation amplification from crafted markers.
191            // A legitimate compressed grain cannot expand to more than 64 KiB past the
192            // raw grain size; 65536 bytes of headroom absorbs any real compressor overhead.
193            let max_data = grain_size_bytes.saturating_add(65536);
194            if u64::from(data_size) > max_data {
195                return Err(io::Error::new(
196                    io::ErrorKind::InvalidData,
197                    format!(
198                        "compressed grain data_size {data_size} exceeds limit {max_data}: \
199                         likely a crafted or corrupt VMDK"
200                    ),
201                ));
202            }
203            diag::grain_resolved(virtual_offset, "compressed");
204            return Ok(GrainLookup::Compressed {
205                data_offset: marker_offset + 12,
206                data_size,
207                offset_in_grain,
208            });
209        }
210        diag::grain_resolved(virtual_offset, "file");
211        Ok(GrainLookup::FileOffset(
212            u64::from(gte) * SECTOR_SIZE + offset_in_grain,
213        ))
214    }
215
216    /// Decompress a zlib-wrapped grain and copy the requested slice into `buf`.
217    fn read_compressed_grain(
218        &mut self,
219        buf: &mut [u8],
220        data_offset: u64,
221        data_size: u32,
222        offset_in_grain: u64,
223    ) -> io::Result<usize> {
224        use flate2::read::ZlibDecoder;
225
226        let grain_size_bytes = self.sparse_grain_size_bytes();
227        let mut compressed = vec![0u8; data_size as usize];
228        self.read_exact_at(data_offset, &mut compressed)?;
229
230        // Bound the decode to one grain (+1 sentinel byte). A legitimate grain
231        // decompresses to exactly grain_size_bytes; anything larger is a crafted
232        // decompression bomb and is refused before the expansion is materialised.
233        let mut decoder = ZlibDecoder::new(compressed.as_slice()).take(grain_size_bytes + 1);
234        let mut grain_data = Vec::new();
235        decoder.read_to_end(&mut grain_data)?;
236        if grain_data.len() as u64 > grain_size_bytes {
237            return Err(io::Error::new(
238                io::ErrorKind::InvalidData,
239                "compressed grain decompresses beyond its grain size (possible decompression bomb)",
240            ));
241        }
242
243        let start = offset_in_grain as usize;
244        let end = (start + buf.len()).min(grain_data.len());
245        let n = end.saturating_sub(start);
246        if n > 0 {
247            buf[..n].copy_from_slice(&grain_data[start..end]);
248        }
249        Ok(n)
250    }
251}
252
253impl<R: Read + Seek> Read for VmdkReader<R> {
254    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
255        if self.pos >= self.virtual_disk_size || buf.is_empty() {
256            return Ok(0);
257        }
258        let remaining_virtual = (self.virtual_disk_size - self.pos) as usize;
259
260        // Flat: direct pass-through to the inner reader at the current position.
261        if matches!(self.fmt, FormatState::Flat) {
262            let to_read = buf.len().min(remaining_virtual);
263            self.inner.seek(SeekFrom::Start(self.pos))?;
264            let n = self.inner.read(&mut buf[..to_read])?;
265            self.pos += n as u64;
266            return Ok(n);
267        }
268
269        // Sparse / StreamOptimized / SeSparse: clamp at grain boundary then do GTE lookup.
270        let grain_size_bytes = self.sparse_grain_size_bytes();
271        let remaining_in_grain = (grain_size_bytes - (self.pos % grain_size_bytes)) as usize;
272        let to_read = buf.len().min(remaining_virtual).min(remaining_in_grain);
273
274        let location = self.grain_location(self.pos)?;
275        let n = match location {
276            GrainLookup::Sparse => {
277                buf[..to_read].fill(0);
278                to_read
279            }
280            GrainLookup::FileOffset(file_off) => {
281                self.inner.seek(SeekFrom::Start(file_off))?;
282                self.inner.read(&mut buf[..to_read])?
283            }
284            GrainLookup::Compressed {
285                data_offset,
286                data_size,
287                offset_in_grain,
288            } => self.read_compressed_grain(
289                &mut buf[..to_read],
290                data_offset,
291                data_size,
292                offset_in_grain,
293            )?,
294        };
295
296        self.pos += n as u64;
297        Ok(n)
298    }
299}
300
301impl<R: Read + Seek> Seek for VmdkReader<R> {
302    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
303        let new_pos = match pos {
304            SeekFrom::Start(n) => n as i64,
305            SeekFrom::Current(n) => self.pos as i64 + n,
306            SeekFrom::End(n) => self.virtual_disk_size as i64 + n,
307        };
308        if new_pos < 0 {
309            return Err(io::Error::new(
310                io::ErrorKind::InvalidInput,
311                "seek before start",
312            ));
313        }
314        self.pos = new_pos as u64;
315        Ok(self.pos)
316    }
317}