Skip to main content

async_hdf5/messages/
datatype.rs

1use bytes::Bytes;
2
3use crate::endian::HDF5Reader;
4use crate::error::{HDF5Error, Result};
5
6/// Byte order for multi-byte data types.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum ByteOrder {
9    /// Little-endian byte order.
10    LittleEndian,
11    /// Big-endian byte order.
12    BigEndian,
13    /// VAX-endian (rare, HDF5 legacy).
14    Vax,
15    /// Not applicable (e.g., single-byte types).
16    NotApplicable,
17}
18
19/// String padding type.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum StringPadding {
22    /// Null-terminated string.
23    NullTerminate,
24    /// Null-padded string.
25    NullPad,
26    /// Space-padded string.
27    SpacePad,
28}
29
30/// String character set.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum Charset {
33    /// ASCII character set.
34    Ascii,
35    /// UTF-8 character set.
36    Utf8,
37}
38
39/// A field within a compound data type.
40#[derive(Debug, Clone)]
41pub struct CompoundField {
42    /// Field name.
43    pub name: String,
44    /// Byte offset of the field within the compound type.
45    pub byte_offset: u32,
46    /// Data type of the field.
47    pub dtype: DataType,
48}
49
50/// HDF5 data type descriptor.
51///
52/// Parsed from the Datatype message (0x0003) in object headers.
53#[derive(Debug, Clone)]
54pub enum DataType {
55    /// Fixed-point integers (signed/unsigned, various sizes).
56    FixedPoint {
57        /// Total size in bytes.
58        size: u32,
59        /// Whether the integer is signed.
60        signed: bool,
61        /// Byte order.
62        byte_order: ByteOrder,
63        /// Bit offset of the first significant bit.
64        bit_offset: u16,
65        /// Number of significant bits.
66        bit_precision: u16,
67    },
68    /// IEEE 754 floating-point.
69    FloatingPoint {
70        /// Total size in bytes.
71        size: u32,
72        /// Byte order.
73        byte_order: ByteOrder,
74        /// Bit offset of the first significant bit.
75        bit_offset: u16,
76        /// Number of significant bits.
77        bit_precision: u16,
78        /// Bit position of the exponent field.
79        exponent_location: u8,
80        /// Size of the exponent field in bits.
81        exponent_size: u8,
82        /// Bit position of the mantissa field.
83        mantissa_location: u8,
84        /// Size of the mantissa field in bits.
85        mantissa_size: u8,
86        /// Exponent bias.
87        exponent_bias: u32,
88    },
89    /// Fixed-length or variable-length strings.
90    String {
91        /// Total size in bytes (0 for variable-length).
92        size: u32,
93        /// Padding type.
94        padding: StringPadding,
95        /// Character set.
96        charset: Charset,
97    },
98    /// Compound types (e.g., CFloat32 = {r: float32, i: float32}).
99    Compound {
100        /// Total size in bytes.
101        size: u32,
102        /// Member fields.
103        fields: Vec<CompoundField>,
104    },
105    /// Enumerated types.
106    Enum {
107        /// Total size in bytes.
108        size: u32,
109        /// Underlying integer type.
110        base_type: Box<DataType>,
111        /// Enum member names and their raw values.
112        members: Vec<(String, Vec<u8>)>,
113    },
114    /// Variable-length sequences or strings.
115    VarLen {
116        /// Base element type (for sequences) or u8 (for strings).
117        base_type: Box<DataType>,
118        /// True if this is a variable-length string (class bits type=1).
119        is_string: bool,
120    },
121    /// Fixed-size array of another type.
122    Array {
123        /// Element type.
124        base_type: Box<DataType>,
125        /// Array dimensions.
126        dimensions: Vec<u32>,
127    },
128    /// Opaque data.
129    Opaque {
130        /// Total size in bytes.
131        size: u32,
132        /// ASCII tag describing the opaque type.
133        tag: String,
134    },
135    /// Bitfield.
136    Bitfield {
137        /// Total size in bytes.
138        size: u32,
139        /// Byte order.
140        byte_order: ByteOrder,
141        /// Bit offset of the first significant bit.
142        bit_offset: u16,
143        /// Number of significant bits.
144        bit_precision: u16,
145    },
146    /// Reference type.
147    Reference {
148        /// Total size in bytes.
149        size: u32,
150        /// Reference type (0 = object, 1 = region).
151        ref_type: u8,
152    },
153}
154
155impl DataType {
156    /// Element size in bytes.
157    pub fn size(&self) -> u32 {
158        match self {
159            DataType::FixedPoint { size, .. } => *size,
160            DataType::FloatingPoint { size, .. } => *size,
161            DataType::String { size, .. } => *size,
162            DataType::Compound { size, .. } => *size,
163            DataType::Enum { size, .. } => *size,
164            DataType::VarLen { .. } => 16, // HDF5 vlen: {size, pointer}
165            DataType::Array {
166                base_type,
167                dimensions,
168            } => {
169                let dim_product = dimensions
170                    .iter()
171                    .copied()
172                    .fold(1u32, |acc, d| acc.saturating_mul(d));
173                base_type.size().saturating_mul(dim_product)
174            }
175            DataType::Opaque { size, .. } => *size,
176            DataType::Bitfield { size, .. } => *size,
177            DataType::Reference { size, .. } => *size,
178        }
179    }
180
181    /// Parse a datatype message from raw bytes.
182    pub fn parse(data: &Bytes) -> Result<Self> {
183        let mut r = HDF5Reader::new(data.clone());
184        Self::parse_from_reader(&mut r)
185    }
186
187    /// Parse from an HDF5Reader (allows recursive parsing for compound types).
188    pub(crate) fn parse_from_reader(r: &mut HDF5Reader) -> Result<Self> {
189        let class_and_version = r.read_u8()?;
190        let class = class_and_version & 0x0F;
191        let version = (class_and_version >> 4) & 0x0F;
192
193        // 3 bytes of class bit field
194        let bf0 = r.read_u8()?;
195        let bf1 = r.read_u8()?;
196        let bf2 = r.read_u8()?;
197        let class_bits = (bf2 as u32) << 16 | (bf1 as u32) << 8 | (bf0 as u32);
198
199        let size = r.read_u32()?;
200
201        match class {
202            0 => Self::parse_fixed_point(r, class_bits, size),
203            1 => Self::parse_floating_point(r, class_bits, size),
204            3 => Self::parse_string(class_bits, size),
205            4 => Self::parse_bitfield(r, class_bits, size),
206            5 => Self::parse_opaque(r, class_bits, size),
207            6 => Self::parse_compound(r, class_bits, size, version),
208            7 => Ok(DataType::Reference {
209                size,
210                ref_type: (class_bits & 0x0F) as u8,
211            }),
212            8 => Self::parse_enum(r, class_bits, size),
213            9 => Self::parse_varlen(r, class_bits, size),
214            10 => Self::parse_array(r, class_bits, size),
215            _ => Err(HDF5Error::UnsupportedDatatypeClass(class)),
216        }
217    }
218
219    fn parse_fixed_point(r: &mut HDF5Reader, class_bits: u32, size: u32) -> Result<Self> {
220        let byte_order = match class_bits & 0x01 {
221            0 => ByteOrder::LittleEndian,
222            1 => ByteOrder::BigEndian,
223            _ => unreachable!(),
224        };
225        let signed = (class_bits >> 3) & 0x01 == 1;
226
227        let bit_offset = r.read_u16()?;
228        let bit_precision = r.read_u16()?;
229
230        Ok(DataType::FixedPoint {
231            size,
232            signed,
233            byte_order,
234            bit_offset,
235            bit_precision,
236        })
237    }
238
239    fn parse_floating_point(r: &mut HDF5Reader, class_bits: u32, size: u32) -> Result<Self> {
240        let byte_order = match (class_bits & 0x01, (class_bits >> 6) & 0x01) {
241            (0, 0) => ByteOrder::LittleEndian,
242            (1, 0) => ByteOrder::BigEndian,
243            (0, 1) => ByteOrder::Vax,
244            _ => ByteOrder::NotApplicable,
245        };
246
247        let bit_offset = r.read_u16()?;
248        let bit_precision = r.read_u16()?;
249        let exponent_location = r.read_u8()?;
250        let exponent_size = r.read_u8()?;
251        let mantissa_location = r.read_u8()?;
252        let mantissa_size = r.read_u8()?;
253        let exponent_bias = r.read_u32()?;
254
255        Ok(DataType::FloatingPoint {
256            size,
257            byte_order,
258            bit_offset,
259            bit_precision,
260            exponent_location,
261            exponent_size,
262            mantissa_location,
263            mantissa_size,
264            exponent_bias,
265        })
266    }
267
268    fn parse_string(class_bits: u32, size: u32) -> Result<Self> {
269        let padding = match class_bits & 0x0F {
270            0 => StringPadding::NullTerminate,
271            1 => StringPadding::NullPad,
272            2 => StringPadding::SpacePad,
273            _ => StringPadding::NullTerminate,
274        };
275        let charset = match (class_bits >> 4) & 0x0F {
276            0 => Charset::Ascii,
277            1 => Charset::Utf8,
278            _ => Charset::Ascii,
279        };
280
281        Ok(DataType::String {
282            size,
283            padding,
284            charset,
285        })
286    }
287
288    fn parse_bitfield(r: &mut HDF5Reader, class_bits: u32, size: u32) -> Result<Self> {
289        let byte_order = match class_bits & 0x01 {
290            0 => ByteOrder::LittleEndian,
291            _ => ByteOrder::BigEndian,
292        };
293        let bit_offset = r.read_u16()?;
294        let bit_precision = r.read_u16()?;
295
296        Ok(DataType::Bitfield {
297            size,
298            byte_order,
299            bit_offset,
300            bit_precision,
301        })
302    }
303
304    fn parse_opaque(r: &mut HDF5Reader, class_bits: u32, size: u32) -> Result<Self> {
305        let tag_len = (class_bits & 0xFF) as usize;
306        let tag_bytes = r.read_bytes(tag_len)?;
307        let tag = String::from_utf8_lossy(&tag_bytes)
308            .trim_end_matches('\0')
309            .to_string();
310        r.skip_field_padding(tag_len, 8);
311
312        Ok(DataType::Opaque { size, tag })
313    }
314
315    fn parse_compound(r: &mut HDF5Reader, class_bits: u32, size: u32, version: u8) -> Result<Self> {
316        let num_members = (class_bits & 0xFFFF) as usize;
317        let mut fields = Vec::with_capacity(num_members);
318
319        for _ in 0..num_members {
320            let name = r.read_null_terminated_string()?;
321
322            // Version 1 & 2: pad name to multiple of 8 bytes (including null terminator)
323            if version < 3 {
324                let name_total = name.len() + 1; // including null
325                r.skip_field_padding(name_total, 8);
326            }
327
328            // Byte offset of member
329            let byte_offset = if version < 3 {
330                // v1/v2: 4-byte offset + dimensionality info
331                let offset = r.read_u32()?;
332                // v1: dimensionality (1 byte) + reserved (3 bytes) + perm (4 bytes) + reserved (4 bytes) + dims (4*4 bytes)
333                // v2: no dimensionality
334                if version == 1 {
335                    let _ndims = r.read_u8()?;
336                    r.skip(3 + 4 + 4); // reserved + permutation + reserved
337                    r.skip(4 * 4); // dimension sizes — always 4 slots in v1
338                }
339                offset
340            } else {
341                // v3: variable-size offset based on type size
342                if size <= 0xFF {
343                    r.read_u8()? as u32
344                } else if size <= 0xFFFF {
345                    r.read_u16()? as u32
346                } else {
347                    r.read_u32()?
348                }
349            };
350
351            // Recursively parse member datatype
352            let dtype = DataType::parse_from_reader(r)?;
353
354            fields.push(CompoundField {
355                name,
356                byte_offset,
357                dtype,
358            });
359        }
360
361        Ok(DataType::Compound { size, fields })
362    }
363
364    fn parse_enum(r: &mut HDF5Reader, class_bits: u32, size: u32) -> Result<Self> {
365        let num_members = (class_bits & 0xFFFF) as usize;
366        let base_type = Box::new(DataType::parse_from_reader(r)?);
367
368        let mut names = Vec::with_capacity(num_members);
369        for _ in 0..num_members {
370            names.push(r.read_null_terminated_string()?);
371        }
372
373        let member_size = base_type.size() as usize;
374        let mut members = Vec::with_capacity(num_members);
375        for name in names {
376            let value = r.read_bytes(member_size)?;
377            members.push((name, value));
378        }
379
380        Ok(DataType::Enum {
381            size,
382            base_type,
383            members,
384        })
385    }
386
387    fn parse_varlen(r: &mut HDF5Reader, class_bits: u32, _size: u32) -> Result<Self> {
388        let is_string = (class_bits & 0x0F) == 1;
389        let base_type = Box::new(DataType::parse_from_reader(r)?);
390        Ok(DataType::VarLen {
391            base_type,
392            is_string,
393        })
394    }
395
396    fn parse_array(r: &mut HDF5Reader, _class_bits: u32, _size: u32) -> Result<Self> {
397        let ndims = r.read_u8()?;
398        // v3: no reserved bytes. v2: 3 reserved bytes.
399        // We try the v3 path — if needed, we can add version awareness.
400
401        let mut dimensions = Vec::with_capacity(ndims as usize);
402        for _ in 0..ndims {
403            dimensions.push(r.read_u32()?);
404        }
405
406        let base_type = Box::new(DataType::parse_from_reader(r)?);
407        Ok(DataType::Array {
408            base_type,
409            dimensions,
410        })
411    }
412}