Skip to main content

hdf5_reader/messages/
datatype.rs

1//! HDF5 Datatype message (type 0x0003).
2//!
3//! The datatype message describes the type of each element in a dataset.
4//! The first 4 bytes encode class (bits 0-3), version (bits 4-7), and
5//! class-specific bit flags (bits 8-31). The remaining bytes carry
6//! class-specific properties.
7//!
8//! Supported classes:
9//! - 0: Fixed-point (integer)
10//! - 1: Floating-point
11//! - 2: Time (treated as opaque)
12//! - 3: String
13//! - 4: Bitfield
14//! - 5: Opaque
15//! - 6: Compound
16//! - 7: Reference
17//! - 8: Enum
18//! - 9: Variable-length
19//! - 10: Array
20
21use crate::error::{ByteOrder, Error, Result};
22use crate::io::Cursor;
23
24// ---------------------------------------------------------------------------
25// Public types
26// ---------------------------------------------------------------------------
27
28/// How a string's length is determined.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum StringSize {
31    /// Fixed-length, padded to `n` bytes.
32    Fixed(u32),
33    /// Variable-length (stored as a global-heap reference).
34    Variable,
35}
36
37/// String character encoding.
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum StringEncoding {
40    Ascii,
41    Utf8,
42}
43
44/// String padding type.
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum StringPadding {
47    NullTerminate,
48    NullPad,
49    SpacePad,
50}
51
52/// HDF5 variable-length datatype flavor.
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum VarLenKind {
55    /// Variable-length sequence of values.
56    Sequence,
57    /// Variable-length string.
58    String,
59    /// Unknown HDF5 vlen kind; retained for metadata fidelity.
60    Unknown(u8),
61}
62
63/// A field within a compound datatype.
64#[derive(Debug, Clone)]
65pub struct CompoundField {
66    pub name: String,
67    pub byte_offset: u32,
68    pub datatype: Datatype,
69}
70
71/// A member of an enumeration.
72#[derive(Debug, Clone)]
73pub struct EnumMember {
74    pub name: String,
75    pub value: Vec<u8>,
76}
77
78/// HDF5 reference type.
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
80pub enum ReferenceType {
81    /// Object reference (8 bytes in HDF5 1.8+).
82    Object,
83    /// Dataset region reference (12 bytes).
84    DatasetRegion,
85}
86
87/// Describes the element type of a dataset or attribute.
88#[derive(Debug, Clone)]
89pub enum Datatype {
90    /// Integer (class 0).
91    FixedPoint {
92        size: u8,
93        signed: bool,
94        byte_order: ByteOrder,
95    },
96    /// IEEE 754 float (class 1).
97    FloatingPoint { size: u8, byte_order: ByteOrder },
98    /// Character string (class 3).
99    String {
100        size: StringSize,
101        encoding: StringEncoding,
102        padding: StringPadding,
103    },
104    /// Compound / struct (class 6).
105    Compound {
106        size: u32,
107        fields: Vec<CompoundField>,
108    },
109    /// Fixed-size array of a base type (class 10).
110    Array { base: Box<Datatype>, dims: Vec<u64> },
111    /// Enumeration (class 8).
112    Enum {
113        base: Box<Datatype>,
114        members: Vec<EnumMember>,
115    },
116    /// Variable-length sequence or string (class 9).
117    VarLen {
118        base: Box<Datatype>,
119        kind: VarLenKind,
120        encoding: StringEncoding,
121        padding: StringPadding,
122    },
123    /// Opaque blob (class 5).
124    Opaque { size: u32, tag: String },
125    /// Object or region reference (class 7).
126    Reference { ref_type: ReferenceType, size: u8 },
127    /// Bitfield (class 4).
128    Bitfield { size: u8, byte_order: ByteOrder },
129}
130
131/// Wrapper returned by the message parser, pairing the decoded datatype
132/// with the total element size from the message header.
133#[derive(Debug, Clone)]
134pub struct DatatypeMessage {
135    pub datatype: Datatype,
136    /// Element size in bytes (from the 4-byte class/version word).
137    pub size: u32,
138}
139
140// ---------------------------------------------------------------------------
141// Parsing
142// ---------------------------------------------------------------------------
143
144/// Parse a datatype message starting at the current cursor position.
145///
146/// The `msg_size` is the total number of bytes allocated for this message
147/// (used to skip any trailing padding).
148pub fn parse(cursor: &mut Cursor<'_>, msg_size: usize) -> Result<DatatypeMessage> {
149    let start = cursor.position();
150    let (dt, size) = parse_datatype_description(cursor)?;
151
152    let consumed = (cursor.position() - start) as usize;
153    if consumed < msg_size {
154        cursor.skip(msg_size - consumed)?;
155    }
156
157    Ok(DatatypeMessage { datatype: dt, size })
158}
159
160/// Parse a single datatype description (the 4-byte header + properties).
161///
162/// This is also called recursively for compound members, arrays, enums, etc.
163pub fn parse_datatype_description(cursor: &mut Cursor<'_>) -> Result<(Datatype, u32)> {
164    let class_and_flags = cursor.read_u32_le()?;
165    let class = (class_and_flags & 0x0F) as u8;
166    let version = ((class_and_flags >> 4) & 0x0F) as u8;
167    let class_flags = class_and_flags >> 8; // upper 24 bits
168    let size = cursor.read_u32_le()?;
169
170    let dt = match class {
171        0 => parse_fixed_point(cursor, class_flags, size)?,
172        1 => parse_floating_point(cursor, class_flags, size)?,
173        2 => parse_time(cursor, size)?,
174        3 => parse_string(class_flags, size)?,
175        4 => parse_bitfield(cursor, class_flags, size)?,
176        5 => parse_opaque(cursor, class_flags, size)?,
177        6 => parse_compound(cursor, class_flags, size, version)?,
178        7 => parse_reference(class_flags, size)?,
179        8 => parse_enum(cursor, class_flags, size)?,
180        9 => parse_varlen(cursor, class_flags, size)?,
181        10 => parse_array(cursor, size, version)?,
182        c => return Err(Error::UnsupportedDatatypeClass(c)),
183    };
184
185    Ok((dt, size))
186}
187
188// ---------------------------------------------------------------------------
189// Class 0: Fixed-point (integer)
190// ---------------------------------------------------------------------------
191
192fn parse_fixed_point(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
193    // Bit 0 of class flags: byte order (0 = LE, 1 = BE)
194    let byte_order = if (flags & 0x01) != 0 {
195        ByteOrder::BigEndian
196    } else {
197        ByteOrder::LittleEndian
198    };
199    // Bit 3: signed (0 = unsigned, 1 = signed)
200    let signed = (flags & 0x08) != 0;
201
202    // Properties: bit offset (u16) + bit precision (u16)
203    let _bit_offset = cursor.read_u16_le()?;
204    let _bit_precision = cursor.read_u16_le()?;
205
206    Ok(Datatype::FixedPoint {
207        size: size as u8,
208        signed,
209        byte_order,
210    })
211}
212
213// ---------------------------------------------------------------------------
214// Class 1: Floating-point
215// ---------------------------------------------------------------------------
216
217fn parse_floating_point(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
218    // Byte order: bit 0 low-order, bit 6 high-order
219    //   00 = LE, 01 = BE, 10 = VAX (treated as LE for our purposes)
220    let bo_lo = flags & 0x01;
221    let bo_hi = (flags >> 6) & 0x01;
222    let byte_order = match (bo_hi, bo_lo) {
223        (0, 0) => ByteOrder::LittleEndian,
224        (0, 1) => ByteOrder::BigEndian,
225        // VAX order — map to little endian (close enough for decoding)
226        _ => ByteOrder::LittleEndian,
227    };
228
229    // Properties: 12 bytes
230    // bit offset (u16), bit precision (u16), exponent location (u8),
231    // exponent size (u8), mantissa location (u8), mantissa size (u8),
232    // exponent bias (u32)
233    let _bit_offset = cursor.read_u16_le()?;
234    let _bit_precision = cursor.read_u16_le()?;
235    let _exp_location = cursor.read_u8()?;
236    let _exp_size = cursor.read_u8()?;
237    let _mant_location = cursor.read_u8()?;
238    let _mant_size = cursor.read_u8()?;
239    let _exp_bias = cursor.read_u32_le()?;
240
241    Ok(Datatype::FloatingPoint {
242        size: size as u8,
243        byte_order,
244    })
245}
246
247// ---------------------------------------------------------------------------
248// Class 2: Time (rarely used, treat as opaque)
249// ---------------------------------------------------------------------------
250
251fn parse_time(cursor: &mut Cursor<'_>, size: u32) -> Result<Datatype> {
252    // Properties: bit precision (u16)
253    let _bit_precision = cursor.read_u16_le()?;
254    Ok(Datatype::Opaque {
255        size,
256        tag: "HDF5_TIME".to_string(),
257    })
258}
259
260// ---------------------------------------------------------------------------
261// Class 3: String
262// ---------------------------------------------------------------------------
263
264fn parse_string(flags: u32, size: u32) -> Result<Datatype> {
265    // Bits 0-3: padding type
266    let padding = string_padding_from_bits(flags & 0x0F);
267
268    // Bits 4-7: character set
269    let encoding = string_encoding_from_bits((flags >> 4) & 0x0F);
270
271    // No additional property bytes for string class.
272
273    let string_size = if size == 0 {
274        // Size 0 can indicate variable-length when used with vlen wrapper,
275        // but for the string class itself we treat it as Variable.
276        StringSize::Variable
277    } else {
278        StringSize::Fixed(size)
279    };
280
281    Ok(Datatype::String {
282        size: string_size,
283        encoding,
284        padding,
285    })
286}
287
288// ---------------------------------------------------------------------------
289// Class 4: Bitfield
290// ---------------------------------------------------------------------------
291
292fn parse_bitfield(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
293    let byte_order = if (flags & 0x01) != 0 {
294        ByteOrder::BigEndian
295    } else {
296        ByteOrder::LittleEndian
297    };
298
299    // Properties: bit offset (u16) + bit precision (u16)
300    let _bit_offset = cursor.read_u16_le()?;
301    let _bit_precision = cursor.read_u16_le()?;
302
303    Ok(Datatype::Bitfield {
304        size: size as u8,
305        byte_order,
306    })
307}
308
309// ---------------------------------------------------------------------------
310// Class 5: Opaque
311// ---------------------------------------------------------------------------
312
313fn parse_opaque(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
314    // The class flags encode the length of the tag (in the lower bits).
315    let tag_len = (flags & 0xFF) as usize;
316
317    let tag = if tag_len > 0 {
318        let tag_bytes = cursor.read_bytes(tag_len)?;
319        // Trim trailing nulls
320        let end = tag_bytes.iter().rposition(|&b| b != 0).map_or(0, |i| i + 1);
321        String::from_utf8_lossy(&tag_bytes[..end]).into_owned()
322    } else {
323        String::new()
324    };
325
326    // Pad to 8-byte alignment
327    let padded = (tag_len + 7) & !7;
328    if padded > tag_len {
329        cursor.skip(padded - tag_len)?;
330    }
331
332    Ok(Datatype::Opaque { size, tag })
333}
334
335// ---------------------------------------------------------------------------
336// Class 6: Compound
337// ---------------------------------------------------------------------------
338
339fn parse_compound(cursor: &mut Cursor<'_>, flags: u32, size: u32, version: u8) -> Result<Datatype> {
340    // Lower 16 bits of class flags = number of members
341    let n_members = (flags & 0xFFFF) as usize;
342    let byte_offset_size = compound_member_offset_size(size);
343
344    let mut fields = Vec::with_capacity(n_members);
345
346    for _ in 0..n_members {
347        let name = cursor.read_null_terminated_string()?;
348
349        if version < 3 {
350            // V1/V2: name is padded to 8-byte boundary (relative to start of name)
351            // The null terminator is included in the count. We already read
352            // through the null terminator via read_null_terminated_string.
353            // Pad the position to 8-byte alignment.
354            cursor.align(8)?;
355        }
356
357        let byte_offset = if version == 1 {
358            // V1: byte offset is `size of offsets` (4 bytes)
359            cursor.read_u32_le()?
360        } else if version >= 3 {
361            cursor.read_uvar(byte_offset_size)? as u32
362        } else {
363            // V2/V3: byte offset is 4 bytes
364            cursor.read_u32_le()?
365        };
366
367        if version == 1 {
368            // V1: dimensionality (1 byte), reserved (3 bytes), dim perm (4 bytes),
369            // reserved (4 bytes), dim sizes (4 * 4 = 16 bytes)
370            let _dimensionality = cursor.read_u8()?;
371            cursor.skip(3)?; // reserved
372            cursor.skip(4)?; // dimension permutation
373            cursor.skip(4)?; // reserved
374            cursor.skip(16)?; // 4 dimension sizes (each u32)
375        }
376
377        let (member_dt, _member_size) = parse_datatype_description(cursor)?;
378
379        fields.push(CompoundField {
380            name,
381            byte_offset,
382            datatype: member_dt,
383        });
384    }
385
386    Ok(Datatype::Compound { size, fields })
387}
388
389fn compound_member_offset_size(size: u32) -> usize {
390    match size {
391        0..=0xFF => 1,
392        0x100..=0xFFFF => 2,
393        0x1_0000..=0xFF_FFFF => 3,
394        _ => 4,
395    }
396}
397
398// ---------------------------------------------------------------------------
399// Class 7: Reference
400// ---------------------------------------------------------------------------
401
402fn parse_reference(flags: u32, size: u32) -> Result<Datatype> {
403    // Bit 0-3: reference type (0 = object, 1 = dataset region)
404    let ref_type = match flags & 0x0F {
405        0 => ReferenceType::Object,
406        1 => ReferenceType::DatasetRegion,
407        _ => ReferenceType::Object,
408    };
409
410    // No property bytes for reference class.
411
412    Ok(Datatype::Reference {
413        ref_type,
414        size: size as u8,
415    })
416}
417
418// ---------------------------------------------------------------------------
419// Class 8: Enum
420// ---------------------------------------------------------------------------
421
422fn parse_enum(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
423    let n_members = (flags & 0xFFFF) as usize;
424
425    // Base type
426    let (base_dt, _base_size) = parse_datatype_description(cursor)?;
427
428    // Member names (null-terminated)
429    let mut names = Vec::with_capacity(n_members);
430    for _ in 0..n_members {
431        names.push(cursor.read_null_terminated_string()?);
432    }
433
434    // Member values (each is `size` bytes, matching the base type size)
435    let member_value_size = size as usize;
436    let mut members = Vec::with_capacity(n_members);
437    for name in names {
438        let value = cursor.read_bytes(member_value_size)?.to_vec();
439        members.push(EnumMember { name, value });
440    }
441
442    Ok(Datatype::Enum {
443        base: Box::new(base_dt),
444        members,
445    })
446}
447
448// ---------------------------------------------------------------------------
449// Class 9: Variable-length
450// ---------------------------------------------------------------------------
451
452fn parse_varlen(cursor: &mut Cursor<'_>, flags: u32, _size: u32) -> Result<Datatype> {
453    // Bits 0-3: type (0 = sequence, 1 = string)
454    let kind = match flags & 0x0F {
455        0 => VarLenKind::Sequence,
456        1 => VarLenKind::String,
457        other => VarLenKind::Unknown(other as u8),
458    };
459    // Bits 4-7: padding type (for strings)
460    let padding = string_padding_from_bits((flags >> 4) & 0x0F);
461    // Bits 8-11: character set (for strings)
462    let encoding = string_encoding_from_bits((flags >> 8) & 0x0F);
463
464    // Base type follows
465    let (base_dt, _base_size) = parse_datatype_description(cursor)?;
466
467    Ok(Datatype::VarLen {
468        base: Box::new(base_dt),
469        kind,
470        encoding,
471        padding,
472    })
473}
474
475fn string_padding_from_bits(bits: u32) -> StringPadding {
476    match bits {
477        0 => StringPadding::NullTerminate,
478        1 => StringPadding::NullPad,
479        2 => StringPadding::SpacePad,
480        _ => StringPadding::NullTerminate,
481    }
482}
483
484fn string_encoding_from_bits(bits: u32) -> StringEncoding {
485    match bits {
486        0 => StringEncoding::Ascii,
487        1 => StringEncoding::Utf8,
488        _ => StringEncoding::Ascii,
489    }
490}
491
492// ---------------------------------------------------------------------------
493// Class 10: Array
494// ---------------------------------------------------------------------------
495
496fn parse_array(cursor: &mut Cursor<'_>, _size: u32, version: u8) -> Result<Datatype> {
497    let rank = cursor.read_u8()? as usize;
498
499    if version < 3 {
500        // Version 1/2: 3 reserved bytes after rank
501        cursor.skip(3)?;
502    }
503
504    let mut dims = Vec::with_capacity(rank);
505    for _ in 0..rank {
506        dims.push(cursor.read_u32_le()? as u64);
507    }
508
509    if version < 3 {
510        // Version 1: permutation indices (rank * u32) — skip them
511        cursor.skip(rank * 4)?;
512    }
513
514    // Base type
515    let (base_dt, _base_size) = parse_datatype_description(cursor)?;
516
517    Ok(Datatype::Array {
518        base: Box::new(base_dt),
519        dims,
520    })
521}
522
523// ---------------------------------------------------------------------------
524// Tests
525// ---------------------------------------------------------------------------
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530
531    /// Build the 4-byte class+version+flags word.
532    fn class_word(class: u8, version: u8, flags: u32) -> u32 {
533        (class as u32) | ((version as u32) << 4) | (flags << 8)
534    }
535
536    #[test]
537    fn test_parse_u32_le() {
538        let mut data = Vec::new();
539        // class=0 (fixed-point), version=1, flags: bit0=0 (LE), bit3=0 (unsigned)
540        data.extend_from_slice(&class_word(0, 1, 0x00).to_le_bytes());
541        // size = 4
542        data.extend_from_slice(&4u32.to_le_bytes());
543        // properties: bit offset=0, bit precision=32
544        data.extend_from_slice(&0u16.to_le_bytes());
545        data.extend_from_slice(&32u16.to_le_bytes());
546
547        let mut cursor = Cursor::new(&data);
548        let msg = parse(&mut cursor, data.len()).unwrap();
549        assert_eq!(msg.size, 4);
550        match &msg.datatype {
551            Datatype::FixedPoint {
552                size,
553                signed,
554                byte_order,
555            } => {
556                assert_eq!(*size, 4);
557                assert!(!*signed);
558                assert_eq!(*byte_order, ByteOrder::LittleEndian);
559            }
560            other => panic!("expected FixedPoint, got {:?}", other),
561        }
562    }
563
564    #[test]
565    fn test_parse_i64_be() {
566        let mut data = Vec::new();
567        // class=0 (fixed-point), version=1, flags: bit0=1 (BE), bit3=1 (signed)
568        data.extend_from_slice(&class_word(0, 1, 0x09).to_le_bytes());
569        // size = 8
570        data.extend_from_slice(&8u32.to_le_bytes());
571        // properties: bit offset=0, bit precision=64
572        data.extend_from_slice(&0u16.to_le_bytes());
573        data.extend_from_slice(&64u16.to_le_bytes());
574
575        let mut cursor = Cursor::new(&data);
576        let msg = parse(&mut cursor, data.len()).unwrap();
577        assert_eq!(msg.size, 8);
578        match &msg.datatype {
579            Datatype::FixedPoint {
580                size,
581                signed,
582                byte_order,
583            } => {
584                assert_eq!(*size, 8);
585                assert!(*signed);
586                assert_eq!(*byte_order, ByteOrder::BigEndian);
587            }
588            other => panic!("expected FixedPoint, got {:?}", other),
589        }
590    }
591
592    #[test]
593    fn test_parse_f32_le() {
594        let mut data = Vec::new();
595        // class=1 (float), version=1, flags: bit0=0 (LE), bit6=0
596        data.extend_from_slice(&class_word(1, 1, 0x20).to_le_bytes());
597        // size = 4
598        data.extend_from_slice(&4u32.to_le_bytes());
599        // properties: bit offset=0, bit precision=32
600        data.extend_from_slice(&0u16.to_le_bytes());
601        data.extend_from_slice(&32u16.to_le_bytes());
602        // exp location=23, exp size=8
603        data.push(23);
604        data.push(8);
605        // mant location=0, mant size=23
606        data.push(0);
607        data.push(23);
608        // exp bias=127
609        data.extend_from_slice(&127u32.to_le_bytes());
610
611        let mut cursor = Cursor::new(&data);
612        let msg = parse(&mut cursor, data.len()).unwrap();
613        assert_eq!(msg.size, 4);
614        match &msg.datatype {
615            Datatype::FloatingPoint { size, byte_order } => {
616                assert_eq!(*size, 4);
617                assert_eq!(*byte_order, ByteOrder::LittleEndian);
618            }
619            other => panic!("expected FloatingPoint, got {:?}", other),
620        }
621    }
622
623    #[test]
624    fn test_parse_f64_be() {
625        let mut data = Vec::new();
626        // class=1 (float), version=1, flags: bit0=1 (BE), bit6=0
627        data.extend_from_slice(&class_word(1, 1, 0x01).to_le_bytes());
628        // size = 8
629        data.extend_from_slice(&8u32.to_le_bytes());
630        // properties
631        data.extend_from_slice(&0u16.to_le_bytes());
632        data.extend_from_slice(&64u16.to_le_bytes());
633        data.push(52);
634        data.push(11);
635        data.push(0);
636        data.push(52);
637        data.extend_from_slice(&1023u32.to_le_bytes());
638
639        let mut cursor = Cursor::new(&data);
640        let msg = parse(&mut cursor, data.len()).unwrap();
641        assert_eq!(msg.size, 8);
642        match &msg.datatype {
643            Datatype::FloatingPoint { size, byte_order } => {
644                assert_eq!(*size, 8);
645                assert_eq!(*byte_order, ByteOrder::BigEndian);
646            }
647            other => panic!("expected FloatingPoint, got {:?}", other),
648        }
649    }
650
651    #[test]
652    fn test_parse_string_fixed_ascii() {
653        let mut data = Vec::new();
654        // class=3 (string), version=1, flags: padding=0 (null-term), charset=0 (ascii)
655        data.extend_from_slice(&class_word(3, 1, 0x00).to_le_bytes());
656        // size = 32
657        data.extend_from_slice(&32u32.to_le_bytes());
658        // No property bytes for strings.
659
660        let mut cursor = Cursor::new(&data);
661        let msg = parse(&mut cursor, data.len()).unwrap();
662        assert_eq!(msg.size, 32);
663        match &msg.datatype {
664            Datatype::String {
665                size,
666                encoding,
667                padding,
668            } => {
669                assert_eq!(*size, StringSize::Fixed(32));
670                assert_eq!(*encoding, StringEncoding::Ascii);
671                assert_eq!(*padding, StringPadding::NullTerminate);
672            }
673            other => panic!("expected String, got {:?}", other),
674        }
675    }
676
677    #[test]
678    fn test_parse_string_utf8_space_pad() {
679        let mut data = Vec::new();
680        // class=3, version=1, flags: padding=2 (space-pad), charset=1 (utf8)
681        // padding bits 0-3 = 2, charset bits 4-7 = 1
682        let flags: u32 = 0x02 | (0x01 << 4);
683        data.extend_from_slice(&class_word(3, 1, flags).to_le_bytes());
684        data.extend_from_slice(&16u32.to_le_bytes());
685
686        let mut cursor = Cursor::new(&data);
687        let msg = parse(&mut cursor, data.len()).unwrap();
688        match &msg.datatype {
689            Datatype::String {
690                size,
691                encoding,
692                padding,
693            } => {
694                assert_eq!(*size, StringSize::Fixed(16));
695                assert_eq!(*encoding, StringEncoding::Utf8);
696                assert_eq!(*padding, StringPadding::SpacePad);
697            }
698            other => panic!("expected String, got {:?}", other),
699        }
700    }
701
702    #[test]
703    fn test_parse_varlen_string_preserves_flags() {
704        let mut data = Vec::new();
705        // class=9, version=1, flags: kind=string, padding=space-pad, charset=utf8
706        let flags: u32 = 0x01 | (0x02 << 4) | (0x01 << 8);
707        data.extend_from_slice(&class_word(9, 1, flags).to_le_bytes());
708        data.extend_from_slice(&16u32.to_le_bytes());
709        data.extend_from_slice(&class_word(0, 1, 0x00).to_le_bytes());
710        data.extend_from_slice(&1u32.to_le_bytes());
711        data.extend_from_slice(&0u16.to_le_bytes());
712        data.extend_from_slice(&8u16.to_le_bytes());
713
714        let mut cursor = Cursor::new(&data);
715        let msg = parse(&mut cursor, data.len()).unwrap();
716        match &msg.datatype {
717            Datatype::VarLen {
718                base,
719                kind,
720                encoding,
721                padding,
722            } => {
723                assert_eq!(*kind, VarLenKind::String);
724                assert_eq!(*encoding, StringEncoding::Utf8);
725                assert_eq!(*padding, StringPadding::SpacePad);
726                assert!(matches!(
727                    base.as_ref(),
728                    Datatype::FixedPoint {
729                        size: 1,
730                        signed: false,
731                        byte_order: ByteOrder::LittleEndian
732                    }
733                ));
734            }
735            other => panic!("expected VarLen, got {:?}", other),
736        }
737    }
738
739    #[test]
740    fn test_parse_varlen_sequence_preserves_kind() {
741        let mut data = Vec::new();
742        data.extend_from_slice(&class_word(9, 1, 0x00).to_le_bytes());
743        data.extend_from_slice(&16u32.to_le_bytes());
744        data.extend_from_slice(&class_word(0, 1, 0x00).to_le_bytes());
745        data.extend_from_slice(&4u32.to_le_bytes());
746        data.extend_from_slice(&0u16.to_le_bytes());
747        data.extend_from_slice(&32u16.to_le_bytes());
748
749        let mut cursor = Cursor::new(&data);
750        let msg = parse(&mut cursor, data.len()).unwrap();
751        match &msg.datatype {
752            Datatype::VarLen { kind, base, .. } => {
753                assert_eq!(*kind, VarLenKind::Sequence);
754                assert!(matches!(
755                    base.as_ref(),
756                    Datatype::FixedPoint {
757                        size: 4,
758                        signed: false,
759                        byte_order: ByteOrder::LittleEndian
760                    }
761                ));
762            }
763            other => panic!("expected VarLen, got {:?}", other),
764        }
765    }
766
767    #[test]
768    fn test_parse_reference_object() {
769        let mut data = Vec::new();
770        // class=7, version=1, flags: ref_type=0 (object)
771        data.extend_from_slice(&class_word(7, 1, 0x00).to_le_bytes());
772        data.extend_from_slice(&8u32.to_le_bytes());
773
774        let mut cursor = Cursor::new(&data);
775        let msg = parse(&mut cursor, data.len()).unwrap();
776        match &msg.datatype {
777            Datatype::Reference { ref_type, size } => {
778                assert_eq!(*ref_type, ReferenceType::Object);
779                assert_eq!(*size, 8);
780            }
781            other => panic!("expected Reference, got {:?}", other),
782        }
783    }
784
785    #[test]
786    fn test_parse_reference_region() {
787        let mut data = Vec::new();
788        // class=7, version=1, flags: ref_type=1 (dataset region)
789        data.extend_from_slice(&class_word(7, 1, 0x01).to_le_bytes());
790        data.extend_from_slice(&12u32.to_le_bytes());
791
792        let mut cursor = Cursor::new(&data);
793        let msg = parse(&mut cursor, data.len()).unwrap();
794        match &msg.datatype {
795            Datatype::Reference { ref_type, size } => {
796                assert_eq!(*ref_type, ReferenceType::DatasetRegion);
797                assert_eq!(*size, 12);
798            }
799            other => panic!("expected Reference, got {:?}", other),
800        }
801    }
802
803    #[test]
804    fn test_parse_compound_v3_variable_member_offsets() {
805        let mut data = Vec::new();
806        data.extend_from_slice(&class_word(6, 3, 2).to_le_bytes());
807        data.extend_from_slice(&16u32.to_le_bytes());
808
809        data.extend_from_slice(b"dataset\0");
810        data.push(0x00);
811        data.extend_from_slice(&class_word(7, 1, 0x00).to_le_bytes());
812        data.extend_from_slice(&8u32.to_le_bytes());
813
814        data.extend_from_slice(b"dimension\0");
815        data.push(0x08);
816        data.extend_from_slice(&class_word(0, 1, 0x00).to_le_bytes());
817        data.extend_from_slice(&4u32.to_le_bytes());
818        data.extend_from_slice(&0u16.to_le_bytes());
819        data.extend_from_slice(&32u16.to_le_bytes());
820
821        let mut cursor = Cursor::new(&data);
822        let msg = parse(&mut cursor, data.len()).unwrap();
823        match &msg.datatype {
824            Datatype::Compound { size, fields } => {
825                assert_eq!(*size, 16);
826                assert_eq!(fields.len(), 2);
827                assert_eq!(fields[0].name, "dataset");
828                assert_eq!(fields[0].byte_offset, 0);
829                assert_eq!(fields[1].name, "dimension");
830                assert_eq!(fields[1].byte_offset, 8);
831            }
832            other => panic!("expected Compound, got {:?}", other),
833        }
834    }
835
836    #[test]
837    fn test_parse_enum_u8() {
838        let mut data = Vec::new();
839        // class=8 (enum), version=3, flags: n_members=2
840        data.extend_from_slice(&class_word(8, 3, 2).to_le_bytes());
841        // size = 1
842        data.extend_from_slice(&1u32.to_le_bytes());
843
844        // Base type: u8 (class=0, version=1, flags=0, size=1, props: offset=0 precision=8)
845        data.extend_from_slice(&class_word(0, 1, 0).to_le_bytes());
846        data.extend_from_slice(&1u32.to_le_bytes());
847        data.extend_from_slice(&0u16.to_le_bytes());
848        data.extend_from_slice(&8u16.to_le_bytes());
849
850        // Member names
851        data.extend_from_slice(b"OFF\0");
852        data.extend_from_slice(b"ON\0");
853
854        // Member values (1 byte each, matching size=1)
855        data.push(0x00);
856        data.push(0x01);
857
858        let mut cursor = Cursor::new(&data);
859        let msg = parse(&mut cursor, data.len()).unwrap();
860        match &msg.datatype {
861            Datatype::Enum { base, members } => {
862                assert!(matches!(
863                    base.as_ref(),
864                    Datatype::FixedPoint {
865                        size: 1,
866                        signed: false,
867                        ..
868                    }
869                ));
870                assert_eq!(members.len(), 2);
871                assert_eq!(members[0].name, "OFF");
872                assert_eq!(members[0].value, vec![0x00]);
873                assert_eq!(members[1].name, "ON");
874                assert_eq!(members[1].value, vec![0x01]);
875            }
876            other => panic!("expected Enum, got {:?}", other),
877        }
878    }
879
880    #[test]
881    fn test_parse_bitfield() {
882        let mut data = Vec::new();
883        // class=4 (bitfield), version=1, flags: bit0=0 (LE)
884        data.extend_from_slice(&class_word(4, 1, 0x00).to_le_bytes());
885        data.extend_from_slice(&2u32.to_le_bytes());
886        // properties: bit offset=0, bit precision=16
887        data.extend_from_slice(&0u16.to_le_bytes());
888        data.extend_from_slice(&16u16.to_le_bytes());
889
890        let mut cursor = Cursor::new(&data);
891        let msg = parse(&mut cursor, data.len()).unwrap();
892        match &msg.datatype {
893            Datatype::Bitfield { size, byte_order } => {
894                assert_eq!(*size, 2);
895                assert_eq!(*byte_order, ByteOrder::LittleEndian);
896            }
897            other => panic!("expected Bitfield, got {:?}", other),
898        }
899    }
900
901    #[test]
902    fn test_unsupported_class() {
903        let mut data = Vec::new();
904        // class=15 (invalid), version=1, flags=0
905        data.extend_from_slice(&class_word(15, 1, 0).to_le_bytes());
906        data.extend_from_slice(&0u32.to_le_bytes());
907
908        let mut cursor = Cursor::new(&data);
909        assert!(parse(&mut cursor, data.len()).is_err());
910    }
911}