vmdk 0.2.0

Pure-Rust read-only VMware VMDK disk image reader (monolithicSparse, streamOptimized, twoGbMaxExtentFlat/Sparse, monolithicFlat)
Documentation
//! Multi-file sparse extent reader (twoGbMaxExtentSparse).
//!
//! Each SPARSE extent is an independent binary VMDK file with its own
//! `SparseExtentHeader`, grain directory, and grain tables.

use std::fs::File;
use std::io::{self, BufReader, Read, Seek, SeekFrom};
use std::path::Path;

use crate::descriptor::SparseEntry;
use crate::header::{SparseExtentHeader, SECTOR_SIZE};

struct SparseChunk {
    byte_start: u64,
    byte_end: u64,
    grain_dir: Vec<u32>,
    grain_size_bytes: u64,
    num_gtes_per_gt: u64,
    file: BufReader<File>,
}

pub(crate) struct MultiSparseReader {
    chunks: Vec<SparseChunk>,
    pos: u64,
    total_bytes: u64,
}

impl MultiSparseReader {
    pub(crate) fn open(dir: &Path, entries: &[SparseEntry]) -> io::Result<Self> {
        let mut chunks = Vec::with_capacity(entries.len());
        let mut byte_offset = 0u64;

        for entry in entries {
            let path = dir.join(entry.filename.as_ref());
            let mut file = BufReader::new(File::open(&path)?);

            let mut hdr_bytes = [0u8; 512];
            file.read_exact(&mut hdr_bytes)?;
            let hdr = SparseExtentHeader::parse(&hdr_bytes)
                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;

            let grain_size_bytes = hdr
                .grain_size
                .checked_mul(SECTOR_SIZE)
                .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "grain_size overflow"))?;
            let num_gtes_per_gt = u64::from(hdr.num_gtes_per_gt);

            let num_grains = hdr
                .capacity
                .checked_add(hdr.grain_size - 1)
                .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "capacity overflow"))?
                / hdr.grain_size;
            let num_gts = num_grains
                .checked_add(num_gtes_per_gt - 1)
                .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "num_gts overflow"))?
                / num_gtes_per_gt;
            let gd_byte_len = num_gts * 4;

            const MAX_GD: u64 = 16 * 1024 * 1024;
            if gd_byte_len > MAX_GD {
                return Err(io::Error::new(io::ErrorKind::InvalidData, "GD too large"));
            }

            let gd_byte_offset = hdr
                .gd_offset
                .checked_mul(SECTOR_SIZE)
                .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "gd_offset overflow"))?;
            file.seek(SeekFrom::Start(gd_byte_offset))?;
            let mut gd_bytes = vec![0u8; gd_byte_len as usize];
            file.read_exact(&mut gd_bytes)?;

            let grain_dir = gd_bytes
                .chunks_exact(4)
                .map(|c| u32::from_le_bytes(c.try_into().expect("4-byte chunk")))
                .collect();

            let size_bytes = entry.size_sectors * SECTOR_SIZE;

            chunks.push(SparseChunk {
                byte_start: byte_offset,
                byte_end: byte_offset + size_bytes,
                grain_dir,
                grain_size_bytes,
                num_gtes_per_gt,
                file,
            });
            byte_offset += size_bytes;
        }

        Ok(MultiSparseReader {
            chunks,
            pos: 0,
            total_bytes: byte_offset,
        })
    }

    fn chunk_for(&self, pos: u64) -> Option<usize> {
        self.chunks
            .iter()
            .position(|c| pos >= c.byte_start && pos < c.byte_end)
    }
}

impl Read for MultiSparseReader {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        if self.pos >= self.total_bytes || buf.is_empty() {
            return Ok(0);
        }

        let chunk_idx = self.chunk_for(self.pos).ok_or_else(|| {
            io::Error::new(io::ErrorKind::InvalidInput, "position out of virtual range")
        })?;

        let (byte_start, byte_end, grain_size, num_gtes_per_gt) = {
            let c = &self.chunks[chunk_idx];
            (c.byte_start, c.byte_end, c.grain_size_bytes, c.num_gtes_per_gt)
        };

        let local_pos = self.pos - byte_start;
        let remaining_virtual = (self.total_bytes - self.pos) as usize;
        let remaining_in_grain = (grain_size - (local_pos % grain_size)) as usize;
        let remaining_in_chunk = (byte_end - self.pos) as usize;
        let to_read = buf
            .len()
            .min(remaining_virtual)
            .min(remaining_in_grain)
            .min(remaining_in_chunk);

        let grain_idx = local_pos / grain_size;
        let offset_in_grain = local_pos % grain_size;
        let gd_idx = (grain_idx / num_gtes_per_gt) as usize;
        let gte_local_idx = grain_idx % num_gtes_per_gt;

        let gt_sector = self.chunks[chunk_idx]
            .grain_dir
            .get(gd_idx)
            .copied()
            .unwrap_or(0);

        if gt_sector == 0 {
            buf[..to_read].fill(0);
            self.pos += to_read as u64;
            return Ok(to_read);
        }

        let gte_file_pos = u64::from(gt_sector) * SECTOR_SIZE + gte_local_idx * 4;
        let chunk = &mut self.chunks[chunk_idx];
        chunk.file.seek(SeekFrom::Start(gte_file_pos))?;
        let mut gte_bytes = [0u8; 4];
        chunk.file.read_exact(&mut gte_bytes)?;
        let gte = u32::from_le_bytes(gte_bytes);

        let n = if gte <= 1 {
            buf[..to_read].fill(0);
            to_read
        } else {
            let file_offset = u64::from(gte) * SECTOR_SIZE + offset_in_grain;
            chunk.file.seek(SeekFrom::Start(file_offset))?;
            chunk.file.read(&mut buf[..to_read])?
        };

        self.pos += n as u64;
        Ok(n)
    }
}

impl Seek for MultiSparseReader {
    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
        let new_pos = match pos {
            SeekFrom::Start(n) => n as i64,
            SeekFrom::Current(n) => self.pos as i64 + n,
            SeekFrom::End(n) => self.total_bytes as i64 + n,
        };
        if new_pos < 0 {
            return Err(io::Error::new(
                io::ErrorKind::InvalidInput,
                "seek before start",
            ));
        }
        self.pos = new_pos as u64;
        Ok(self.pos)
    }
}