use std::io;
use std::path::{Component, Path, PathBuf};
use crate::error::{Result, VmdkError};
pub(crate) fn resolve_extent_path(base_dir: &Path, filename: &str) -> io::Result<PathBuf> {
let rel = Path::new(filename);
let escapes = rel.components().any(|c| {
matches!(c, Component::ParentDir | Component::RootDir | Component::Prefix(_))
});
if escapes {
return Err(io::Error::new(
io::ErrorKind::PermissionDenied,
format!("refusing descriptor-controlled path that escapes the image directory: {filename}"),
));
}
Ok(base_dir.join(rel))
}
pub(crate) struct TextDescriptor {
pub create_type: Box<str>,
pub cid: u32,
pub parent_cid: u32,
pub raw_text: Box<str>,
pub extents: Vec<ExtentEntry>,
pub sparse_extents: Vec<SparseEntry>,
pub capacity_sectors: u64,
pub sparse_capacity_sectors: u64,
}
pub(crate) struct ExtentEntry {
pub size_sectors: u64,
pub filename: Box<str>,
pub file_byte_offset: u64,
pub is_zero: bool,
}
pub(crate) struct SparseEntry {
pub size_sectors: u64,
pub filename: Box<str>,
}
pub(crate) fn parse_text_descriptor(text: &str) -> Result<TextDescriptor> {
let mut create_type = Box::from("");
let mut cid: u32 = 0xffff_ffff;
let mut parent_cid: u32 = 0xffff_ffff;
let mut extents = Vec::new();
let mut sparse_extents = Vec::new();
let mut capacity_sectors = 0u64;
let mut sparse_capacity_sectors = 0u64;
let text_clean = {
let end = text.find('\0').unwrap_or(text.len());
&text[..end]
};
for line in text_clean.lines() {
let line = line.trim();
if line.starts_with('#') || line.is_empty() {
continue;
}
if let Some(rest) = line.strip_prefix("createType=") {
create_type = Box::from(rest.trim_matches('"'));
continue;
}
if let Some(rest) = line.strip_prefix("CID=") {
cid = u32::from_str_radix(rest.trim(), 16).unwrap_or(0xffff_ffff);
continue;
}
if let Some(rest) = line.strip_prefix("parentCID=") {
parent_cid = u32::from_str_radix(rest.trim(), 16).unwrap_or(0xffff_ffff);
continue;
}
if let Some(ext) = try_parse_flat_extent(line) {
capacity_sectors = capacity_sectors
.checked_add(ext.size_sectors)
.ok_or(VmdkError::GeometryOverflow { field: "capacity" })?;
extents.push(ext);
continue;
}
if let Some(ext) = try_parse_sparse_extent(line) {
sparse_capacity_sectors = sparse_capacity_sectors
.checked_add(ext.size_sectors)
.ok_or(VmdkError::GeometryOverflow { field: "capacity" })?;
sparse_extents.push(ext);
}
}
Ok(TextDescriptor {
create_type,
cid,
parent_cid,
raw_text: Box::from(text_clean),
extents,
sparse_extents,
capacity_sectors,
sparse_capacity_sectors,
})
}
fn try_parse_flat_extent(line: &str) -> Option<ExtentEntry> {
let mut rest = line;
let (access, tail) = split_token(rest)?;
if !matches!(access, "RW" | "RDONLY" | "NOACCESS") {
return None;
}
rest = tail;
let (sectors_str, tail) = split_token(rest)?;
let size_sectors: u64 = sectors_str.parse().ok()?;
rest = tail;
let (ext_type, tail) = split_token(rest)?;
if !matches!(ext_type, "FLAT" | "VMFS" | "VMFSRAW" | "VMFSRDM" | "ZERO") {
return None;
}
rest = tail.trim_start();
if ext_type == "ZERO" || access == "NOACCESS" {
return Some(ExtentEntry {
size_sectors,
filename: Box::from(""),
file_byte_offset: 0,
is_zero: true,
});
}
let (filename, remaining) = if let Some(inner) = rest.strip_prefix('"') {
let close = inner.find('"')?;
(&inner[..close], &inner[close + 1..])
} else {
let end = rest.find(char::is_whitespace).unwrap_or(rest.len());
(&rest[..end], &rest[end..])
};
let file_sector_offset: u64 = remaining.trim().parse().unwrap_or(0);
Some(ExtentEntry {
size_sectors,
filename: Box::from(filename),
file_byte_offset: file_sector_offset * 512,
is_zero: false,
})
}
fn try_parse_sparse_extent(line: &str) -> Option<SparseEntry> {
let mut rest = line;
let (access, tail) = split_token(rest)?;
if !matches!(access, "RW" | "RDONLY") {
return None;
}
rest = tail;
let (sectors_str, tail) = split_token(rest)?;
let size_sectors: u64 = sectors_str.parse().ok()?;
rest = tail;
let (ext_type, tail) = split_token(rest)?;
if !matches!(ext_type, "SPARSE" | "VMFSSPARSE" | "SESPARSE") {
return None;
}
rest = tail.trim_start();
let filename = if let Some(inner) = rest.strip_prefix('"') {
let close = inner.find('"')?;
&inner[..close]
} else {
let end = rest.find(char::is_whitespace).unwrap_or(rest.len());
&rest[..end]
};
Some(SparseEntry {
size_sectors,
filename: Box::from(filename),
})
}
fn split_token(s: &str) -> Option<(&str, &str)> {
let s = s.trim_start();
if s.is_empty() {
return None;
}
let end = s.find(char::is_whitespace).unwrap_or(s.len());
Some((&s[..end], &s[end..]))
}
pub(crate) fn decode_descriptor(bytes: &[u8]) -> String {
let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
let bytes = &bytes[..end];
let label = declared_encoding(&String::from_utf8_lossy(bytes));
decode_bytes(bytes, label.as_deref())
}
#[cfg(not(feature = "full-encoding"))]
fn decode_bytes(bytes: &[u8], label: Option<&str>) -> String {
if label.is_some_and(|e| e.eq_ignore_ascii_case("windows-1252")) {
return decode_windows_1252(bytes);
}
match std::str::from_utf8(bytes) {
Ok(s) => s.to_owned(),
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
}
}
#[cfg(feature = "full-encoding")]
fn decode_bytes(bytes: &[u8], label: Option<&str>) -> String {
let encoding = label
.and_then(|l| encoding_rs::Encoding::for_label(l.as_bytes()))
.unwrap_or(encoding_rs::UTF_8);
let (decoded, _had_errors) = encoding.decode_without_bom_handling(bytes);
decoded.into_owned()
}
fn declared_encoding(text: &str) -> Option<String> {
text.lines().find_map(|line| {
if !line.to_ascii_lowercase().contains("encoding") {
return None;
}
let (_, rhs) = line.split_once('=')?;
let value = rhs.trim().trim_matches('"').trim();
(!value.is_empty()).then(|| value.to_string())
})
}
#[cfg(not(feature = "full-encoding"))]
fn decode_windows_1252(bytes: &[u8]) -> String {
const C1: [char; 32] = [
'\u{20AC}', '\u{0081}', '\u{201A}', '\u{0192}', '\u{201E}', '\u{2026}', '\u{2020}',
'\u{2021}', '\u{02C6}', '\u{2030}', '\u{0160}', '\u{2039}', '\u{0152}', '\u{008D}',
'\u{017D}', '\u{008F}', '\u{0090}', '\u{2018}', '\u{2019}', '\u{201C}', '\u{201D}',
'\u{2022}', '\u{2013}', '\u{2014}', '\u{02DC}', '\u{2122}', '\u{0161}', '\u{203A}',
'\u{0153}', '\u{009D}', '\u{017E}', '\u{0178}',
];
bytes
.iter()
.map(|&b| match b {
0x80..=0x9F => C1[(b - 0x80) as usize],
_ => b as char,
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn decode_windows_1252_descriptor_per_ddb_encoding() {
let bytes = b"createType=\"monolithicFlat\"\nddb.encoding = \"windows-1252\"\nparentFileNameHint=\"caf\xE9\x80.vmdk\"\n";
let s = decode_descriptor(bytes);
assert!(s.contains('\u{00E9}'), "0xE9 -> é: {s:?}");
assert!(s.contains('\u{20AC}'), "0x80 -> € (cp1252, not Latin-1): {s:?}");
assert!(s.contains("monolithicFlat"), "ASCII structure preserved: {s:?}");
}
#[test]
fn decode_invalid_utf8_without_hint_is_lossy_not_empty() {
let bytes = b"createType=\"monolithicSparse\"\nname=\xE9 raw\n";
let s = decode_descriptor(bytes);
assert!(!s.is_empty(), "must never silently empty the descriptor");
assert!(s.contains("monolithicSparse"), "ASCII structure kept: {s:?}");
assert!(s.contains('\u{FFFD}'), "undecodable byte -> U+FFFD: {s:?}");
}
#[test]
fn decode_valid_utf8_unchanged() {
let s = decode_descriptor("createType=\"x\"\nlabel=café\n".as_bytes());
assert!(s.contains("café"), "valid UTF-8 round-trips: {s:?}");
}
#[cfg(feature = "full-encoding")]
#[test]
fn decode_shift_jis_with_full_encoding_feature() {
let bytes = b"createType=\"monolithicFlat\"\nddb.encoding = \"shift_jis\"\nname=\x82\xA0\n";
let s = decode_descriptor(bytes);
assert!(s.contains('\u{3042}'), "Shift_JIS あ decoded: {s:?}");
}
#[test]
fn parse_two_gb_max_extent_flat_descriptor() {
let text = r#"# Disk DescriptorFile
version=1
CID=49bcfa17
parentCID=ffffffff
createType="twoGbMaxExtentFlat"
# Extent description
RW 2048 FLAT "flat-f001.vmdk" 0
"#;
let d = parse_text_descriptor(text).expect("parse");
assert_eq!(d.create_type.as_ref(), "twoGbMaxExtentFlat");
assert_eq!(d.capacity_sectors, 2048);
assert_eq!(d.extents.len(), 1);
assert_eq!(d.extents[0].filename.as_ref(), "flat-f001.vmdk");
assert_eq!(d.extents[0].size_sectors, 2048);
assert_eq!(d.extents[0].file_byte_offset, 0);
}
#[test]
fn parse_sparse_extents() {
let text = "createType=\"twoGbMaxExtentSparse\"\nRW 8192 SPARSE \"disk-s001.vmdk\"\nRW 8192 SPARSE \"disk-s002.vmdk\"\n";
let d = parse_text_descriptor(text).expect("parse");
assert_eq!(d.extents.len(), 0);
assert_eq!(d.sparse_extents.len(), 2);
assert_eq!(d.sparse_extents[0].filename.as_ref(), "disk-s001.vmdk");
assert_eq!(d.sparse_extents[0].size_sectors, 8192);
assert_eq!(d.sparse_capacity_sectors, 16384);
}
#[test]
fn parse_quoted_filename_with_spaces() {
let text = "createType=\"twoGbMaxExtentFlat\"\nRW 512 FLAT \"my disk.vmdk\" 0\n";
let d = parse_text_descriptor(text).expect("parse");
assert_eq!(d.extents[0].filename.as_ref(), "my disk.vmdk");
}
#[test]
fn parse_nonzero_sector_offset() {
let text = "createType=\"twoGbMaxExtentFlat\"\nRW 512 FLAT \"ext.vmdk\" 4096\n";
let d = parse_text_descriptor(text).expect("parse");
assert_eq!(d.extents[0].file_byte_offset, 4096 * 512);
}
#[test]
fn parse_flat_bare_word_filename() {
let text = "createType=\"monolithicFlat\"\nRW 2048 FLAT bare-f001.vmdk 0\n";
let d = parse_text_descriptor(text).expect("parse");
assert_eq!(d.extents.len(), 1);
assert_eq!(d.extents[0].filename.as_ref(), "bare-f001.vmdk");
}
#[test]
fn parse_sparse_bare_word_filename() {
let text = "createType=\"twoGbMaxExtentSparse\"\nRW 2048 SPARSE bare-s001.vmdk\n";
let d = parse_text_descriptor(text).expect("parse");
assert_eq!(d.sparse_extents.len(), 1);
assert_eq!(d.sparse_extents[0].filename.as_ref(), "bare-s001.vmdk");
}
#[test]
fn parse_vmfsrdm_raw_device_mapping_extent() {
let text = "createType=\"vmfsRawDeviceMap\"\nRW 20971520 VMFSRDM \"vml.02000000006006016015301d00\"\n";
let d = parse_text_descriptor(text).expect("parse");
assert_eq!(d.create_type.as_ref(), "vmfsRawDeviceMap");
assert_eq!(d.extents.len(), 1);
assert_eq!(
d.extents[0].filename.as_ref(),
"vml.02000000006006016015301d00"
);
assert_eq!(d.extents[0].size_sectors, 20_971_520);
assert!(!d.extents[0].is_zero);
assert_eq!(d.capacity_sectors, 20_971_520);
}
#[test]
fn malformed_extent_lines_are_ignored() {
let text =
"createType=\"custom\"\nRW 100 BOGUS \"x.vmdk\"\nRDONLY notanumber FLAT \"y\"\nRW\n";
let d = parse_text_descriptor(text).expect("parse");
assert!(d.extents.is_empty());
assert!(d.sparse_extents.is_empty());
}
#[test]
fn sparse_extent_capacity_overflow_is_rejected() {
let big = u64::MAX;
let text = format!(
"createType=\"twoGbMaxExtentSparse\"\nRW {big} SPARSE \"a\"\nRW {big} SPARSE \"b\"\n"
);
assert!(matches!(
parse_text_descriptor(&text),
Err(VmdkError::GeometryOverflow { field: "capacity" })
));
}
#[test]
fn flat_extent_capacity_overflow_is_rejected() {
let big = u64::MAX;
let text = format!(
"createType=\"twoGbMaxExtentFlat\"\nRW {big} FLAT \"a\" 0\nRW {big} FLAT \"b\" 0\n"
);
assert!(matches!(
parse_text_descriptor(&text),
Err(VmdkError::GeometryOverflow { field: "capacity" })
));
}
}