use std::fs::File;
use std::io::{self, BufReader, Read, Seek, SeekFrom};
use std::path::Path;
use crate::cowd::{self, COWD_GTES_PER_GT};
use crate::descriptor::SparseEntry;
use crate::header::{SparseExtentHeader, SECTOR_SIZE};
struct SparseChunk {
byte_start: u64,
byte_end: u64,
grain_dir: Vec<u32>,
grain_size_bytes: u64,
num_gtes_per_gt: u64,
file: BufReader<File>,
}
pub(crate) struct MultiSparseReader {
chunks: Vec<SparseChunk>,
pos: u64,
total_bytes: u64,
}
impl MultiSparseReader {
pub(crate) fn open(dir: &Path, entries: &[SparseEntry]) -> io::Result<Self> {
let mut chunks = Vec::with_capacity(entries.len());
let mut byte_offset = 0u64;
for entry in entries {
let path = crate::descriptor::resolve_extent_path(dir, entry.filename.as_ref())?;
let mut file = BufReader::new(File::open(&path)?);
let mut hdr_bytes = [0u8; 512];
file.read_exact(&mut hdr_bytes)?;
let magic_be = u32::from_be_bytes(hdr_bytes[0..4].try_into().expect("4 bytes"));
if magic_be == cowd::COWD_MAGIC {
file.seek(SeekFrom::Start(0))?;
let (grain_dir, grain_size_bytes) = cowd::open_cowd(&mut file)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;
let byte_end = byte_offset
+ u64::from(
cowd::CowdHeader::parse(&hdr_bytes)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?
.capacity,
) * SECTOR_SIZE;
chunks.push(SparseChunk {
byte_start: byte_offset,
byte_end,
grain_dir,
grain_size_bytes,
num_gtes_per_gt: COWD_GTES_PER_GT as u64,
file,
});
byte_offset = byte_end;
continue;
}
let hdr = SparseExtentHeader::parse(&hdr_bytes)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;
let grain_size_bytes = hdr
.grain_size
.checked_mul(SECTOR_SIZE)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "grain_size overflow"))?;
let num_gtes_per_gt = u64::from(hdr.num_gtes_per_gt);
let num_grains = hdr
.capacity
.checked_add(hdr.grain_size - 1)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "capacity overflow"))?
/ hdr.grain_size;
let num_gts = num_grains
.checked_add(num_gtes_per_gt - 1)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "num_gts overflow"))?
/ num_gtes_per_gt;
let gd_byte_len = num_gts * 4;
const MAX_GD: u64 = 16 * 1024 * 1024;
if gd_byte_len > MAX_GD {
return Err(io::Error::new(io::ErrorKind::InvalidData, "GD too large"));
}
let gd_byte_offset = hdr
.gd_offset
.checked_mul(SECTOR_SIZE)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "gd_offset overflow"))?;
file.seek(SeekFrom::Start(gd_byte_offset))?;
let mut gd_bytes = vec![0u8; gd_byte_len as usize];
file.read_exact(&mut gd_bytes)?;
let grain_dir = gd_bytes
.chunks_exact(4)
.map(|c| u32::from_le_bytes(c.try_into().expect("4-byte chunk")))
.collect();
let size_bytes = entry.size_sectors * SECTOR_SIZE;
chunks.push(SparseChunk {
byte_start: byte_offset,
byte_end: byte_offset + size_bytes,
grain_dir,
grain_size_bytes,
num_gtes_per_gt,
file,
});
byte_offset += size_bytes;
}
Ok(MultiSparseReader {
chunks,
pos: 0,
total_bytes: byte_offset,
})
}
fn chunk_for(&self, pos: u64) -> Option<usize> {
self.chunks
.iter()
.position(|c| pos >= c.byte_start && pos < c.byte_end)
}
}
impl Read for MultiSparseReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if self.pos >= self.total_bytes || buf.is_empty() {
return Ok(0);
}
let chunk_idx = self.chunk_for(self.pos).ok_or_else(|| {
io::Error::new(io::ErrorKind::InvalidInput, "position out of virtual range")
})?;
let (byte_start, byte_end, grain_size, num_gtes_per_gt) = {
let c = &self.chunks[chunk_idx];
(
c.byte_start,
c.byte_end,
c.grain_size_bytes,
c.num_gtes_per_gt,
)
};
let local_pos = self.pos - byte_start;
let remaining_virtual = (self.total_bytes - self.pos) as usize;
let remaining_in_grain = (grain_size - (local_pos % grain_size)) as usize;
let remaining_in_chunk = (byte_end - self.pos) as usize;
let to_read = buf
.len()
.min(remaining_virtual)
.min(remaining_in_grain)
.min(remaining_in_chunk);
let grain_idx = local_pos / grain_size;
let offset_in_grain = local_pos % grain_size;
let gd_idx = (grain_idx / num_gtes_per_gt) as usize;
let gte_local_idx = grain_idx % num_gtes_per_gt;
let gt_sector = self.chunks[chunk_idx]
.grain_dir
.get(gd_idx)
.copied()
.unwrap_or(0);
if gt_sector == 0 {
buf[..to_read].fill(0);
self.pos += to_read as u64;
return Ok(to_read);
}
let gte_file_pos = u64::from(gt_sector) * SECTOR_SIZE + gte_local_idx * 4;
let chunk = &mut self.chunks[chunk_idx];
chunk.file.seek(SeekFrom::Start(gte_file_pos))?;
let mut gte_bytes = [0u8; 4];
chunk.file.read_exact(&mut gte_bytes)?;
let gte = u32::from_le_bytes(gte_bytes);
let n = if gte <= 1 {
buf[..to_read].fill(0);
to_read
} else {
let file_offset = u64::from(gte) * SECTOR_SIZE + offset_in_grain;
chunk.file.seek(SeekFrom::Start(file_offset))?;
chunk.file.read(&mut buf[..to_read])?
};
self.pos += n as u64;
Ok(n)
}
}
impl Seek for MultiSparseReader {
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
let new_pos = match pos {
SeekFrom::Start(n) => n as i64,
SeekFrom::Current(n) => self.pos as i64 + n,
SeekFrom::End(n) => self.total_bytes as i64 + n,
};
if new_pos < 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"seek before start",
));
}
self.pos = new_pos as u64;
Ok(self.pos)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::descriptor::SparseEntry;
use std::io::Write as _;
fn all_sparse_extent() -> Vec<u8> {
let mut v = vec![0u8; 1024];
v[0..4].copy_from_slice(&0x564D_444Bu32.to_le_bytes()); v[4..8].copy_from_slice(&1u32.to_le_bytes()); v[12..20].copy_from_slice(&8u64.to_le_bytes()); v[20..28].copy_from_slice(&8u64.to_le_bytes()); v[44..48].copy_from_slice(&512u32.to_le_bytes()); v[56..64].copy_from_slice(&1u64.to_le_bytes()); v
}
fn open_one(dir: &std::path::Path, bytes: &[u8], sectors: u64) -> MultiSparseReader {
std::fs::File::create(dir.join("s001.vmdk"))
.unwrap()
.write_all(bytes)
.unwrap();
let e = SparseEntry {
size_sectors: sectors,
filename: Box::from("s001.vmdk"),
};
MultiSparseReader::open(dir, &[e]).unwrap()
}
#[test]
fn all_sparse_reads_zeros_and_seeks() {
let dir = tempfile::tempdir().unwrap();
let mut r = open_one(dir.path(), &all_sparse_extent(), 8);
let mut b = [0xFFu8; 512];
r.read_exact(&mut b).unwrap();
assert_eq!(b, [0u8; 512]);
assert_eq!(r.seek(SeekFrom::Start(1024)).unwrap(), 1024);
assert_eq!(r.seek(SeekFrom::Current(-512)).unwrap(), 512);
assert_eq!(r.seek(SeekFrom::End(-256)).unwrap(), 8 * 512 - 256);
r.seek(SeekFrom::Start(8 * 512)).unwrap();
assert_eq!(r.read(&mut [0u8; 4]).unwrap(), 0);
r.seek(SeekFrom::Start(0)).unwrap();
assert_eq!(r.read(&mut []).unwrap(), 0);
}
#[test]
fn seek_before_start_errors() {
let dir = tempfile::tempdir().unwrap();
let mut r = open_one(dir.path(), &all_sparse_extent(), 8);
assert!(r.seek(SeekFrom::End(-99999)).is_err());
}
#[test]
fn grain_directory_too_large_rejected() {
let mut v = all_sparse_extent();
v[12..20].copy_from_slice(&100_000_000_000u64.to_le_bytes());
let dir = tempfile::tempdir().unwrap();
std::fs::File::create(dir.path().join("s001.vmdk"))
.unwrap()
.write_all(&v)
.unwrap();
let e = SparseEntry {
size_sectors: 8,
filename: Box::from("s001.vmdk"),
};
assert!(MultiSparseReader::open(dir.path(), &[e]).is_err());
}
#[test]
fn missing_extent_file_errors() {
let dir = tempfile::tempdir().unwrap();
let e = SparseEntry {
size_sectors: 8,
filename: Box::from("absent.vmdk"),
};
assert!(MultiSparseReader::open(dir.path(), &[e]).is_err());
}
}