use crate::error::{ByteOrder, Error, Result};
use crate::io::Cursor;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StringSize {
Fixed(u32),
Variable,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StringEncoding {
Ascii,
Utf8,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StringPadding {
NullTerminate,
NullPad,
SpacePad,
}
#[derive(Debug, Clone)]
pub struct CompoundField {
pub name: String,
pub byte_offset: u32,
pub datatype: Datatype,
}
#[derive(Debug, Clone)]
pub struct EnumMember {
pub name: String,
pub value: Vec<u8>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ReferenceType {
Object,
DatasetRegion,
}
#[derive(Debug, Clone)]
pub enum Datatype {
FixedPoint {
size: u8,
signed: bool,
byte_order: ByteOrder,
},
FloatingPoint { size: u8, byte_order: ByteOrder },
String {
size: StringSize,
encoding: StringEncoding,
padding: StringPadding,
},
Compound {
size: u32,
fields: Vec<CompoundField>,
},
Array { base: Box<Datatype>, dims: Vec<u64> },
Enum {
base: Box<Datatype>,
members: Vec<EnumMember>,
},
VarLen { base: Box<Datatype> },
Opaque { size: u32, tag: String },
Reference { ref_type: ReferenceType, size: u8 },
Bitfield { size: u8, byte_order: ByteOrder },
}
#[derive(Debug, Clone)]
pub struct DatatypeMessage {
pub datatype: Datatype,
pub size: u32,
}
pub fn parse(cursor: &mut Cursor<'_>, msg_size: usize) -> Result<DatatypeMessage> {
let start = cursor.position();
let (dt, size) = parse_datatype_description(cursor)?;
let consumed = (cursor.position() - start) as usize;
if consumed < msg_size {
cursor.skip(msg_size - consumed)?;
}
Ok(DatatypeMessage { datatype: dt, size })
}
pub fn parse_datatype_description(cursor: &mut Cursor<'_>) -> Result<(Datatype, u32)> {
let class_and_flags = cursor.read_u32_le()?;
let class = (class_and_flags & 0x0F) as u8;
let version = ((class_and_flags >> 4) & 0x0F) as u8;
let class_flags = class_and_flags >> 8; let size = cursor.read_u32_le()?;
let dt = match class {
0 => parse_fixed_point(cursor, class_flags, size)?,
1 => parse_floating_point(cursor, class_flags, size)?,
2 => parse_time(cursor, size)?,
3 => parse_string(class_flags, size)?,
4 => parse_bitfield(cursor, class_flags, size)?,
5 => parse_opaque(cursor, class_flags, size)?,
6 => parse_compound(cursor, class_flags, size, version)?,
7 => parse_reference(class_flags, size)?,
8 => parse_enum(cursor, class_flags, size)?,
9 => parse_varlen(cursor, class_flags, size)?,
10 => parse_array(cursor, size, version)?,
c => return Err(Error::UnsupportedDatatypeClass(c)),
};
Ok((dt, size))
}
fn parse_fixed_point(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
let byte_order = if (flags & 0x01) != 0 {
ByteOrder::BigEndian
} else {
ByteOrder::LittleEndian
};
let signed = (flags & 0x08) != 0;
let _bit_offset = cursor.read_u16_le()?;
let _bit_precision = cursor.read_u16_le()?;
Ok(Datatype::FixedPoint {
size: size as u8,
signed,
byte_order,
})
}
fn parse_floating_point(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
let bo_lo = flags & 0x01;
let bo_hi = (flags >> 6) & 0x01;
let byte_order = match (bo_hi, bo_lo) {
(0, 0) => ByteOrder::LittleEndian,
(0, 1) => ByteOrder::BigEndian,
_ => ByteOrder::LittleEndian,
};
let _bit_offset = cursor.read_u16_le()?;
let _bit_precision = cursor.read_u16_le()?;
let _exp_location = cursor.read_u8()?;
let _exp_size = cursor.read_u8()?;
let _mant_location = cursor.read_u8()?;
let _mant_size = cursor.read_u8()?;
let _exp_bias = cursor.read_u32_le()?;
Ok(Datatype::FloatingPoint {
size: size as u8,
byte_order,
})
}
fn parse_time(cursor: &mut Cursor<'_>, size: u32) -> Result<Datatype> {
let _bit_precision = cursor.read_u16_le()?;
Ok(Datatype::Opaque {
size,
tag: "HDF5_TIME".to_string(),
})
}
fn parse_string(flags: u32, size: u32) -> Result<Datatype> {
let padding = match flags & 0x0F {
0 => StringPadding::NullTerminate,
1 => StringPadding::NullPad,
2 => StringPadding::SpacePad,
_ => StringPadding::NullTerminate,
};
let encoding = match (flags >> 4) & 0x0F {
0 => StringEncoding::Ascii,
1 => StringEncoding::Utf8,
_ => StringEncoding::Ascii,
};
let string_size = if size == 0 {
StringSize::Variable
} else {
StringSize::Fixed(size)
};
Ok(Datatype::String {
size: string_size,
encoding,
padding,
})
}
fn parse_bitfield(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
let byte_order = if (flags & 0x01) != 0 {
ByteOrder::BigEndian
} else {
ByteOrder::LittleEndian
};
let _bit_offset = cursor.read_u16_le()?;
let _bit_precision = cursor.read_u16_le()?;
Ok(Datatype::Bitfield {
size: size as u8,
byte_order,
})
}
fn parse_opaque(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
let tag_len = (flags & 0xFF) as usize;
let tag = if tag_len > 0 {
let tag_bytes = cursor.read_bytes(tag_len)?;
let end = tag_bytes.iter().rposition(|&b| b != 0).map_or(0, |i| i + 1);
String::from_utf8_lossy(&tag_bytes[..end]).into_owned()
} else {
String::new()
};
let padded = (tag_len + 7) & !7;
if padded > tag_len {
cursor.skip(padded - tag_len)?;
}
Ok(Datatype::Opaque { size, tag })
}
fn parse_compound(cursor: &mut Cursor<'_>, flags: u32, size: u32, version: u8) -> Result<Datatype> {
let n_members = (flags & 0xFFFF) as usize;
let byte_offset_size = compound_member_offset_size(size);
let mut fields = Vec::with_capacity(n_members);
for _ in 0..n_members {
let name = cursor.read_null_terminated_string()?;
if version < 3 {
cursor.align(8)?;
}
let byte_offset = if version == 1 {
cursor.read_u32_le()?
} else if version >= 3 {
cursor.read_uvar(byte_offset_size)? as u32
} else {
cursor.read_u32_le()?
};
if version == 1 {
let _dimensionality = cursor.read_u8()?;
cursor.skip(3)?; cursor.skip(4)?; cursor.skip(4)?; cursor.skip(16)?; }
let (member_dt, _member_size) = parse_datatype_description(cursor)?;
fields.push(CompoundField {
name,
byte_offset,
datatype: member_dt,
});
}
Ok(Datatype::Compound { size, fields })
}
fn compound_member_offset_size(size: u32) -> usize {
match size {
0..=0xFF => 1,
0x100..=0xFFFF => 2,
0x1_0000..=0xFF_FFFF => 3,
_ => 4,
}
}
fn parse_reference(flags: u32, size: u32) -> Result<Datatype> {
let ref_type = match flags & 0x0F {
0 => ReferenceType::Object,
1 => ReferenceType::DatasetRegion,
_ => ReferenceType::Object,
};
Ok(Datatype::Reference {
ref_type,
size: size as u8,
})
}
fn parse_enum(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
let n_members = (flags & 0xFFFF) as usize;
let (base_dt, _base_size) = parse_datatype_description(cursor)?;
let mut names = Vec::with_capacity(n_members);
for _ in 0..n_members {
names.push(cursor.read_null_terminated_string()?);
}
let member_value_size = size as usize;
let mut members = Vec::with_capacity(n_members);
for name in names {
let value = cursor.read_bytes(member_value_size)?.to_vec();
members.push(EnumMember { name, value });
}
Ok(Datatype::Enum {
base: Box::new(base_dt),
members,
})
}
fn parse_varlen(cursor: &mut Cursor<'_>, flags: u32, _size: u32) -> Result<Datatype> {
let _vlen_type = flags & 0x0F;
let _padding = (flags >> 4) & 0x0F;
let _charset = (flags >> 8) & 0x0F;
let (base_dt, _base_size) = parse_datatype_description(cursor)?;
Ok(Datatype::VarLen {
base: Box::new(base_dt),
})
}
fn parse_array(cursor: &mut Cursor<'_>, _size: u32, version: u8) -> Result<Datatype> {
let rank = cursor.read_u8()? as usize;
if version < 3 {
cursor.skip(3)?;
}
let mut dims = Vec::with_capacity(rank);
for _ in 0..rank {
dims.push(cursor.read_u32_le()? as u64);
}
if version < 3 {
cursor.skip(rank * 4)?;
}
let (base_dt, _base_size) = parse_datatype_description(cursor)?;
Ok(Datatype::Array {
base: Box::new(base_dt),
dims,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn class_word(class: u8, version: u8, flags: u32) -> u32 {
(class as u32) | ((version as u32) << 4) | (flags << 8)
}
#[test]
fn test_parse_u32_le() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(0, 1, 0x00).to_le_bytes());
data.extend_from_slice(&4u32.to_le_bytes());
data.extend_from_slice(&0u16.to_le_bytes());
data.extend_from_slice(&32u16.to_le_bytes());
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
assert_eq!(msg.size, 4);
match &msg.datatype {
Datatype::FixedPoint {
size,
signed,
byte_order,
} => {
assert_eq!(*size, 4);
assert!(!*signed);
assert_eq!(*byte_order, ByteOrder::LittleEndian);
}
other => panic!("expected FixedPoint, got {:?}", other),
}
}
#[test]
fn test_parse_i64_be() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(0, 1, 0x09).to_le_bytes());
data.extend_from_slice(&8u32.to_le_bytes());
data.extend_from_slice(&0u16.to_le_bytes());
data.extend_from_slice(&64u16.to_le_bytes());
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
assert_eq!(msg.size, 8);
match &msg.datatype {
Datatype::FixedPoint {
size,
signed,
byte_order,
} => {
assert_eq!(*size, 8);
assert!(*signed);
assert_eq!(*byte_order, ByteOrder::BigEndian);
}
other => panic!("expected FixedPoint, got {:?}", other),
}
}
#[test]
fn test_parse_f32_le() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(1, 1, 0x20).to_le_bytes());
data.extend_from_slice(&4u32.to_le_bytes());
data.extend_from_slice(&0u16.to_le_bytes());
data.extend_from_slice(&32u16.to_le_bytes());
data.push(23);
data.push(8);
data.push(0);
data.push(23);
data.extend_from_slice(&127u32.to_le_bytes());
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
assert_eq!(msg.size, 4);
match &msg.datatype {
Datatype::FloatingPoint { size, byte_order } => {
assert_eq!(*size, 4);
assert_eq!(*byte_order, ByteOrder::LittleEndian);
}
other => panic!("expected FloatingPoint, got {:?}", other),
}
}
#[test]
fn test_parse_f64_be() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(1, 1, 0x01).to_le_bytes());
data.extend_from_slice(&8u32.to_le_bytes());
data.extend_from_slice(&0u16.to_le_bytes());
data.extend_from_slice(&64u16.to_le_bytes());
data.push(52);
data.push(11);
data.push(0);
data.push(52);
data.extend_from_slice(&1023u32.to_le_bytes());
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
assert_eq!(msg.size, 8);
match &msg.datatype {
Datatype::FloatingPoint { size, byte_order } => {
assert_eq!(*size, 8);
assert_eq!(*byte_order, ByteOrder::BigEndian);
}
other => panic!("expected FloatingPoint, got {:?}", other),
}
}
#[test]
fn test_parse_string_fixed_ascii() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(3, 1, 0x00).to_le_bytes());
data.extend_from_slice(&32u32.to_le_bytes());
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
assert_eq!(msg.size, 32);
match &msg.datatype {
Datatype::String {
size,
encoding,
padding,
} => {
assert_eq!(*size, StringSize::Fixed(32));
assert_eq!(*encoding, StringEncoding::Ascii);
assert_eq!(*padding, StringPadding::NullTerminate);
}
other => panic!("expected String, got {:?}", other),
}
}
#[test]
fn test_parse_string_utf8_space_pad() {
let mut data = Vec::new();
let flags: u32 = 0x02 | (0x01 << 4);
data.extend_from_slice(&class_word(3, 1, flags).to_le_bytes());
data.extend_from_slice(&16u32.to_le_bytes());
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
match &msg.datatype {
Datatype::String {
size,
encoding,
padding,
} => {
assert_eq!(*size, StringSize::Fixed(16));
assert_eq!(*encoding, StringEncoding::Utf8);
assert_eq!(*padding, StringPadding::SpacePad);
}
other => panic!("expected String, got {:?}", other),
}
}
#[test]
fn test_parse_reference_object() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(7, 1, 0x00).to_le_bytes());
data.extend_from_slice(&8u32.to_le_bytes());
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
match &msg.datatype {
Datatype::Reference { ref_type, size } => {
assert_eq!(*ref_type, ReferenceType::Object);
assert_eq!(*size, 8);
}
other => panic!("expected Reference, got {:?}", other),
}
}
#[test]
fn test_parse_reference_region() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(7, 1, 0x01).to_le_bytes());
data.extend_from_slice(&12u32.to_le_bytes());
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
match &msg.datatype {
Datatype::Reference { ref_type, size } => {
assert_eq!(*ref_type, ReferenceType::DatasetRegion);
assert_eq!(*size, 12);
}
other => panic!("expected Reference, got {:?}", other),
}
}
#[test]
fn test_parse_compound_v3_variable_member_offsets() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(6, 3, 2).to_le_bytes());
data.extend_from_slice(&16u32.to_le_bytes());
data.extend_from_slice(b"dataset\0");
data.push(0x00);
data.extend_from_slice(&class_word(7, 1, 0x00).to_le_bytes());
data.extend_from_slice(&8u32.to_le_bytes());
data.extend_from_slice(b"dimension\0");
data.push(0x08);
data.extend_from_slice(&class_word(0, 1, 0x00).to_le_bytes());
data.extend_from_slice(&4u32.to_le_bytes());
data.extend_from_slice(&0u16.to_le_bytes());
data.extend_from_slice(&32u16.to_le_bytes());
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
match &msg.datatype {
Datatype::Compound { size, fields } => {
assert_eq!(*size, 16);
assert_eq!(fields.len(), 2);
assert_eq!(fields[0].name, "dataset");
assert_eq!(fields[0].byte_offset, 0);
assert_eq!(fields[1].name, "dimension");
assert_eq!(fields[1].byte_offset, 8);
}
other => panic!("expected Compound, got {:?}", other),
}
}
#[test]
fn test_parse_enum_u8() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(8, 3, 2).to_le_bytes());
data.extend_from_slice(&1u32.to_le_bytes());
data.extend_from_slice(&class_word(0, 1, 0).to_le_bytes());
data.extend_from_slice(&1u32.to_le_bytes());
data.extend_from_slice(&0u16.to_le_bytes());
data.extend_from_slice(&8u16.to_le_bytes());
data.extend_from_slice(b"OFF\0");
data.extend_from_slice(b"ON\0");
data.push(0x00);
data.push(0x01);
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
match &msg.datatype {
Datatype::Enum { base, members } => {
assert!(matches!(
base.as_ref(),
Datatype::FixedPoint {
size: 1,
signed: false,
..
}
));
assert_eq!(members.len(), 2);
assert_eq!(members[0].name, "OFF");
assert_eq!(members[0].value, vec![0x00]);
assert_eq!(members[1].name, "ON");
assert_eq!(members[1].value, vec![0x01]);
}
other => panic!("expected Enum, got {:?}", other),
}
}
#[test]
fn test_parse_bitfield() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(4, 1, 0x00).to_le_bytes());
data.extend_from_slice(&2u32.to_le_bytes());
data.extend_from_slice(&0u16.to_le_bytes());
data.extend_from_slice(&16u16.to_le_bytes());
let mut cursor = Cursor::new(&data);
let msg = parse(&mut cursor, data.len()).unwrap();
match &msg.datatype {
Datatype::Bitfield { size, byte_order } => {
assert_eq!(*size, 2);
assert_eq!(*byte_order, ByteOrder::LittleEndian);
}
other => panic!("expected Bitfield, got {:?}", other),
}
}
#[test]
fn test_unsupported_class() {
let mut data = Vec::new();
data.extend_from_slice(&class_word(15, 1, 0).to_le_bytes());
data.extend_from_slice(&0u32.to_le_bytes());
let mut cursor = Cursor::new(&data);
assert!(parse(&mut cursor, data.len()).is_err());
}
}