1use crate::error::{ByteOrder, Error, Result};
22use crate::io::Cursor;
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum StringSize {
31 Fixed(u32),
33 Variable,
35}
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum StringEncoding {
40 Ascii,
41 Utf8,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum StringPadding {
47 NullTerminate,
48 NullPad,
49 SpacePad,
50}
51
52#[derive(Debug, Clone)]
54pub struct CompoundField {
55 pub name: String,
56 pub byte_offset: u32,
57 pub datatype: Datatype,
58}
59
60#[derive(Debug, Clone)]
62pub struct EnumMember {
63 pub name: String,
64 pub value: Vec<u8>,
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum ReferenceType {
70 Object,
72 DatasetRegion,
74}
75
76#[derive(Debug, Clone)]
78pub enum Datatype {
79 FixedPoint {
81 size: u8,
82 signed: bool,
83 byte_order: ByteOrder,
84 },
85 FloatingPoint { size: u8, byte_order: ByteOrder },
87 String {
89 size: StringSize,
90 encoding: StringEncoding,
91 padding: StringPadding,
92 },
93 Compound {
95 size: u32,
96 fields: Vec<CompoundField>,
97 },
98 Array { base: Box<Datatype>, dims: Vec<u64> },
100 Enum {
102 base: Box<Datatype>,
103 members: Vec<EnumMember>,
104 },
105 VarLen { base: Box<Datatype> },
107 Opaque { size: u32, tag: String },
109 Reference { ref_type: ReferenceType, size: u8 },
111 Bitfield { size: u8, byte_order: ByteOrder },
113}
114
115#[derive(Debug, Clone)]
118pub struct DatatypeMessage {
119 pub datatype: Datatype,
120 pub size: u32,
122}
123
124pub fn parse(cursor: &mut Cursor<'_>, msg_size: usize) -> Result<DatatypeMessage> {
133 let start = cursor.position();
134 let (dt, size) = parse_datatype_description(cursor)?;
135
136 let consumed = (cursor.position() - start) as usize;
137 if consumed < msg_size {
138 cursor.skip(msg_size - consumed)?;
139 }
140
141 Ok(DatatypeMessage { datatype: dt, size })
142}
143
144pub fn parse_datatype_description(cursor: &mut Cursor<'_>) -> Result<(Datatype, u32)> {
148 let class_and_flags = cursor.read_u32_le()?;
149 let class = (class_and_flags & 0x0F) as u8;
150 let version = ((class_and_flags >> 4) & 0x0F) as u8;
151 let class_flags = class_and_flags >> 8; let size = cursor.read_u32_le()?;
153
154 let dt = match class {
155 0 => parse_fixed_point(cursor, class_flags, size)?,
156 1 => parse_floating_point(cursor, class_flags, size)?,
157 2 => parse_time(cursor, size)?,
158 3 => parse_string(class_flags, size)?,
159 4 => parse_bitfield(cursor, class_flags, size)?,
160 5 => parse_opaque(cursor, class_flags, size)?,
161 6 => parse_compound(cursor, class_flags, size, version)?,
162 7 => parse_reference(class_flags, size)?,
163 8 => parse_enum(cursor, class_flags, size)?,
164 9 => parse_varlen(cursor, class_flags, size)?,
165 10 => parse_array(cursor, size, version)?,
166 c => return Err(Error::UnsupportedDatatypeClass(c)),
167 };
168
169 Ok((dt, size))
170}
171
172fn parse_fixed_point(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
177 let byte_order = if (flags & 0x01) != 0 {
179 ByteOrder::BigEndian
180 } else {
181 ByteOrder::LittleEndian
182 };
183 let signed = (flags & 0x08) != 0;
185
186 let _bit_offset = cursor.read_u16_le()?;
188 let _bit_precision = cursor.read_u16_le()?;
189
190 Ok(Datatype::FixedPoint {
191 size: size as u8,
192 signed,
193 byte_order,
194 })
195}
196
197fn parse_floating_point(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
202 let bo_lo = flags & 0x01;
205 let bo_hi = (flags >> 6) & 0x01;
206 let byte_order = match (bo_hi, bo_lo) {
207 (0, 0) => ByteOrder::LittleEndian,
208 (0, 1) => ByteOrder::BigEndian,
209 _ => ByteOrder::LittleEndian,
211 };
212
213 let _bit_offset = cursor.read_u16_le()?;
218 let _bit_precision = cursor.read_u16_le()?;
219 let _exp_location = cursor.read_u8()?;
220 let _exp_size = cursor.read_u8()?;
221 let _mant_location = cursor.read_u8()?;
222 let _mant_size = cursor.read_u8()?;
223 let _exp_bias = cursor.read_u32_le()?;
224
225 Ok(Datatype::FloatingPoint {
226 size: size as u8,
227 byte_order,
228 })
229}
230
231fn parse_time(cursor: &mut Cursor<'_>, size: u32) -> Result<Datatype> {
236 let _bit_precision = cursor.read_u16_le()?;
238 Ok(Datatype::Opaque {
239 size,
240 tag: "HDF5_TIME".to_string(),
241 })
242}
243
244fn parse_string(flags: u32, size: u32) -> Result<Datatype> {
249 let padding = match flags & 0x0F {
251 0 => StringPadding::NullTerminate,
252 1 => StringPadding::NullPad,
253 2 => StringPadding::SpacePad,
254 _ => StringPadding::NullTerminate,
255 };
256
257 let encoding = match (flags >> 4) & 0x0F {
259 0 => StringEncoding::Ascii,
260 1 => StringEncoding::Utf8,
261 _ => StringEncoding::Ascii,
262 };
263
264 let string_size = if size == 0 {
267 StringSize::Variable
270 } else {
271 StringSize::Fixed(size)
272 };
273
274 Ok(Datatype::String {
275 size: string_size,
276 encoding,
277 padding,
278 })
279}
280
281fn parse_bitfield(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
286 let byte_order = if (flags & 0x01) != 0 {
287 ByteOrder::BigEndian
288 } else {
289 ByteOrder::LittleEndian
290 };
291
292 let _bit_offset = cursor.read_u16_le()?;
294 let _bit_precision = cursor.read_u16_le()?;
295
296 Ok(Datatype::Bitfield {
297 size: size as u8,
298 byte_order,
299 })
300}
301
302fn parse_opaque(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
307 let tag_len = (flags & 0xFF) as usize;
309
310 let tag = if tag_len > 0 {
311 let tag_bytes = cursor.read_bytes(tag_len)?;
312 let end = tag_bytes.iter().rposition(|&b| b != 0).map_or(0, |i| i + 1);
314 String::from_utf8_lossy(&tag_bytes[..end]).into_owned()
315 } else {
316 String::new()
317 };
318
319 let padded = (tag_len + 7) & !7;
321 if padded > tag_len {
322 cursor.skip(padded - tag_len)?;
323 }
324
325 Ok(Datatype::Opaque { size, tag })
326}
327
328fn parse_compound(cursor: &mut Cursor<'_>, flags: u32, size: u32, version: u8) -> Result<Datatype> {
333 let n_members = (flags & 0xFFFF) as usize;
335 let byte_offset_size = compound_member_offset_size(size);
336
337 let mut fields = Vec::with_capacity(n_members);
338
339 for _ in 0..n_members {
340 let name = cursor.read_null_terminated_string()?;
341
342 if version < 3 {
343 cursor.align(8)?;
348 }
349
350 let byte_offset = if version == 1 {
351 cursor.read_u32_le()?
353 } else if version >= 3 {
354 cursor.read_uvar(byte_offset_size)? as u32
355 } else {
356 cursor.read_u32_le()?
358 };
359
360 if version == 1 {
361 let _dimensionality = cursor.read_u8()?;
364 cursor.skip(3)?; cursor.skip(4)?; cursor.skip(4)?; cursor.skip(16)?; }
369
370 let (member_dt, _member_size) = parse_datatype_description(cursor)?;
371
372 fields.push(CompoundField {
373 name,
374 byte_offset,
375 datatype: member_dt,
376 });
377 }
378
379 Ok(Datatype::Compound { size, fields })
380}
381
382fn compound_member_offset_size(size: u32) -> usize {
383 match size {
384 0..=0xFF => 1,
385 0x100..=0xFFFF => 2,
386 0x1_0000..=0xFF_FFFF => 3,
387 _ => 4,
388 }
389}
390
391fn parse_reference(flags: u32, size: u32) -> Result<Datatype> {
396 let ref_type = match flags & 0x0F {
398 0 => ReferenceType::Object,
399 1 => ReferenceType::DatasetRegion,
400 _ => ReferenceType::Object,
401 };
402
403 Ok(Datatype::Reference {
406 ref_type,
407 size: size as u8,
408 })
409}
410
411fn parse_enum(cursor: &mut Cursor<'_>, flags: u32, size: u32) -> Result<Datatype> {
416 let n_members = (flags & 0xFFFF) as usize;
417
418 let (base_dt, _base_size) = parse_datatype_description(cursor)?;
420
421 let mut names = Vec::with_capacity(n_members);
423 for _ in 0..n_members {
424 names.push(cursor.read_null_terminated_string()?);
425 }
426
427 let member_value_size = size as usize;
429 let mut members = Vec::with_capacity(n_members);
430 for name in names {
431 let value = cursor.read_bytes(member_value_size)?.to_vec();
432 members.push(EnumMember { name, value });
433 }
434
435 Ok(Datatype::Enum {
436 base: Box::new(base_dt),
437 members,
438 })
439}
440
441fn parse_varlen(cursor: &mut Cursor<'_>, flags: u32, _size: u32) -> Result<Datatype> {
446 let _vlen_type = flags & 0x0F;
448 let _padding = (flags >> 4) & 0x0F;
450 let _charset = (flags >> 8) & 0x0F;
452
453 let (base_dt, _base_size) = parse_datatype_description(cursor)?;
455
456 Ok(Datatype::VarLen {
457 base: Box::new(base_dt),
458 })
459}
460
461fn parse_array(cursor: &mut Cursor<'_>, _size: u32, version: u8) -> Result<Datatype> {
466 let rank = cursor.read_u8()? as usize;
467
468 if version < 3 {
469 cursor.skip(3)?;
471 }
472
473 let mut dims = Vec::with_capacity(rank);
474 for _ in 0..rank {
475 dims.push(cursor.read_u32_le()? as u64);
476 }
477
478 if version < 3 {
479 cursor.skip(rank * 4)?;
481 }
482
483 let (base_dt, _base_size) = parse_datatype_description(cursor)?;
485
486 Ok(Datatype::Array {
487 base: Box::new(base_dt),
488 dims,
489 })
490}
491
492#[cfg(test)]
497mod tests {
498 use super::*;
499
500 fn class_word(class: u8, version: u8, flags: u32) -> u32 {
502 (class as u32) | ((version as u32) << 4) | (flags << 8)
503 }
504
505 #[test]
506 fn test_parse_u32_le() {
507 let mut data = Vec::new();
508 data.extend_from_slice(&class_word(0, 1, 0x00).to_le_bytes());
510 data.extend_from_slice(&4u32.to_le_bytes());
512 data.extend_from_slice(&0u16.to_le_bytes());
514 data.extend_from_slice(&32u16.to_le_bytes());
515
516 let mut cursor = Cursor::new(&data);
517 let msg = parse(&mut cursor, data.len()).unwrap();
518 assert_eq!(msg.size, 4);
519 match &msg.datatype {
520 Datatype::FixedPoint {
521 size,
522 signed,
523 byte_order,
524 } => {
525 assert_eq!(*size, 4);
526 assert!(!*signed);
527 assert_eq!(*byte_order, ByteOrder::LittleEndian);
528 }
529 other => panic!("expected FixedPoint, got {:?}", other),
530 }
531 }
532
533 #[test]
534 fn test_parse_i64_be() {
535 let mut data = Vec::new();
536 data.extend_from_slice(&class_word(0, 1, 0x09).to_le_bytes());
538 data.extend_from_slice(&8u32.to_le_bytes());
540 data.extend_from_slice(&0u16.to_le_bytes());
542 data.extend_from_slice(&64u16.to_le_bytes());
543
544 let mut cursor = Cursor::new(&data);
545 let msg = parse(&mut cursor, data.len()).unwrap();
546 assert_eq!(msg.size, 8);
547 match &msg.datatype {
548 Datatype::FixedPoint {
549 size,
550 signed,
551 byte_order,
552 } => {
553 assert_eq!(*size, 8);
554 assert!(*signed);
555 assert_eq!(*byte_order, ByteOrder::BigEndian);
556 }
557 other => panic!("expected FixedPoint, got {:?}", other),
558 }
559 }
560
561 #[test]
562 fn test_parse_f32_le() {
563 let mut data = Vec::new();
564 data.extend_from_slice(&class_word(1, 1, 0x20).to_le_bytes());
566 data.extend_from_slice(&4u32.to_le_bytes());
568 data.extend_from_slice(&0u16.to_le_bytes());
570 data.extend_from_slice(&32u16.to_le_bytes());
571 data.push(23);
573 data.push(8);
574 data.push(0);
576 data.push(23);
577 data.extend_from_slice(&127u32.to_le_bytes());
579
580 let mut cursor = Cursor::new(&data);
581 let msg = parse(&mut cursor, data.len()).unwrap();
582 assert_eq!(msg.size, 4);
583 match &msg.datatype {
584 Datatype::FloatingPoint { size, byte_order } => {
585 assert_eq!(*size, 4);
586 assert_eq!(*byte_order, ByteOrder::LittleEndian);
587 }
588 other => panic!("expected FloatingPoint, got {:?}", other),
589 }
590 }
591
592 #[test]
593 fn test_parse_f64_be() {
594 let mut data = Vec::new();
595 data.extend_from_slice(&class_word(1, 1, 0x01).to_le_bytes());
597 data.extend_from_slice(&8u32.to_le_bytes());
599 data.extend_from_slice(&0u16.to_le_bytes());
601 data.extend_from_slice(&64u16.to_le_bytes());
602 data.push(52);
603 data.push(11);
604 data.push(0);
605 data.push(52);
606 data.extend_from_slice(&1023u32.to_le_bytes());
607
608 let mut cursor = Cursor::new(&data);
609 let msg = parse(&mut cursor, data.len()).unwrap();
610 assert_eq!(msg.size, 8);
611 match &msg.datatype {
612 Datatype::FloatingPoint { size, byte_order } => {
613 assert_eq!(*size, 8);
614 assert_eq!(*byte_order, ByteOrder::BigEndian);
615 }
616 other => panic!("expected FloatingPoint, got {:?}", other),
617 }
618 }
619
620 #[test]
621 fn test_parse_string_fixed_ascii() {
622 let mut data = Vec::new();
623 data.extend_from_slice(&class_word(3, 1, 0x00).to_le_bytes());
625 data.extend_from_slice(&32u32.to_le_bytes());
627 let mut cursor = Cursor::new(&data);
630 let msg = parse(&mut cursor, data.len()).unwrap();
631 assert_eq!(msg.size, 32);
632 match &msg.datatype {
633 Datatype::String {
634 size,
635 encoding,
636 padding,
637 } => {
638 assert_eq!(*size, StringSize::Fixed(32));
639 assert_eq!(*encoding, StringEncoding::Ascii);
640 assert_eq!(*padding, StringPadding::NullTerminate);
641 }
642 other => panic!("expected String, got {:?}", other),
643 }
644 }
645
646 #[test]
647 fn test_parse_string_utf8_space_pad() {
648 let mut data = Vec::new();
649 let flags: u32 = 0x02 | (0x01 << 4);
652 data.extend_from_slice(&class_word(3, 1, flags).to_le_bytes());
653 data.extend_from_slice(&16u32.to_le_bytes());
654
655 let mut cursor = Cursor::new(&data);
656 let msg = parse(&mut cursor, data.len()).unwrap();
657 match &msg.datatype {
658 Datatype::String {
659 size,
660 encoding,
661 padding,
662 } => {
663 assert_eq!(*size, StringSize::Fixed(16));
664 assert_eq!(*encoding, StringEncoding::Utf8);
665 assert_eq!(*padding, StringPadding::SpacePad);
666 }
667 other => panic!("expected String, got {:?}", other),
668 }
669 }
670
671 #[test]
672 fn test_parse_reference_object() {
673 let mut data = Vec::new();
674 data.extend_from_slice(&class_word(7, 1, 0x00).to_le_bytes());
676 data.extend_from_slice(&8u32.to_le_bytes());
677
678 let mut cursor = Cursor::new(&data);
679 let msg = parse(&mut cursor, data.len()).unwrap();
680 match &msg.datatype {
681 Datatype::Reference { ref_type, size } => {
682 assert_eq!(*ref_type, ReferenceType::Object);
683 assert_eq!(*size, 8);
684 }
685 other => panic!("expected Reference, got {:?}", other),
686 }
687 }
688
689 #[test]
690 fn test_parse_reference_region() {
691 let mut data = Vec::new();
692 data.extend_from_slice(&class_word(7, 1, 0x01).to_le_bytes());
694 data.extend_from_slice(&12u32.to_le_bytes());
695
696 let mut cursor = Cursor::new(&data);
697 let msg = parse(&mut cursor, data.len()).unwrap();
698 match &msg.datatype {
699 Datatype::Reference { ref_type, size } => {
700 assert_eq!(*ref_type, ReferenceType::DatasetRegion);
701 assert_eq!(*size, 12);
702 }
703 other => panic!("expected Reference, got {:?}", other),
704 }
705 }
706
707 #[test]
708 fn test_parse_compound_v3_variable_member_offsets() {
709 let mut data = Vec::new();
710 data.extend_from_slice(&class_word(6, 3, 2).to_le_bytes());
711 data.extend_from_slice(&16u32.to_le_bytes());
712
713 data.extend_from_slice(b"dataset\0");
714 data.push(0x00);
715 data.extend_from_slice(&class_word(7, 1, 0x00).to_le_bytes());
716 data.extend_from_slice(&8u32.to_le_bytes());
717
718 data.extend_from_slice(b"dimension\0");
719 data.push(0x08);
720 data.extend_from_slice(&class_word(0, 1, 0x00).to_le_bytes());
721 data.extend_from_slice(&4u32.to_le_bytes());
722 data.extend_from_slice(&0u16.to_le_bytes());
723 data.extend_from_slice(&32u16.to_le_bytes());
724
725 let mut cursor = Cursor::new(&data);
726 let msg = parse(&mut cursor, data.len()).unwrap();
727 match &msg.datatype {
728 Datatype::Compound { size, fields } => {
729 assert_eq!(*size, 16);
730 assert_eq!(fields.len(), 2);
731 assert_eq!(fields[0].name, "dataset");
732 assert_eq!(fields[0].byte_offset, 0);
733 assert_eq!(fields[1].name, "dimension");
734 assert_eq!(fields[1].byte_offset, 8);
735 }
736 other => panic!("expected Compound, got {:?}", other),
737 }
738 }
739
740 #[test]
741 fn test_parse_enum_u8() {
742 let mut data = Vec::new();
743 data.extend_from_slice(&class_word(8, 3, 2).to_le_bytes());
745 data.extend_from_slice(&1u32.to_le_bytes());
747
748 data.extend_from_slice(&class_word(0, 1, 0).to_le_bytes());
750 data.extend_from_slice(&1u32.to_le_bytes());
751 data.extend_from_slice(&0u16.to_le_bytes());
752 data.extend_from_slice(&8u16.to_le_bytes());
753
754 data.extend_from_slice(b"OFF\0");
756 data.extend_from_slice(b"ON\0");
757
758 data.push(0x00);
760 data.push(0x01);
761
762 let mut cursor = Cursor::new(&data);
763 let msg = parse(&mut cursor, data.len()).unwrap();
764 match &msg.datatype {
765 Datatype::Enum { base, members } => {
766 assert!(matches!(
767 base.as_ref(),
768 Datatype::FixedPoint {
769 size: 1,
770 signed: false,
771 ..
772 }
773 ));
774 assert_eq!(members.len(), 2);
775 assert_eq!(members[0].name, "OFF");
776 assert_eq!(members[0].value, vec![0x00]);
777 assert_eq!(members[1].name, "ON");
778 assert_eq!(members[1].value, vec![0x01]);
779 }
780 other => panic!("expected Enum, got {:?}", other),
781 }
782 }
783
784 #[test]
785 fn test_parse_bitfield() {
786 let mut data = Vec::new();
787 data.extend_from_slice(&class_word(4, 1, 0x00).to_le_bytes());
789 data.extend_from_slice(&2u32.to_le_bytes());
790 data.extend_from_slice(&0u16.to_le_bytes());
792 data.extend_from_slice(&16u16.to_le_bytes());
793
794 let mut cursor = Cursor::new(&data);
795 let msg = parse(&mut cursor, data.len()).unwrap();
796 match &msg.datatype {
797 Datatype::Bitfield { size, byte_order } => {
798 assert_eq!(*size, 2);
799 assert_eq!(*byte_order, ByteOrder::LittleEndian);
800 }
801 other => panic!("expected Bitfield, got {:?}", other),
802 }
803 }
804
805 #[test]
806 fn test_unsupported_class() {
807 let mut data = Vec::new();
808 data.extend_from_slice(&class_word(15, 1, 0).to_le_bytes());
810 data.extend_from_slice(&0u32.to_le_bytes());
811
812 let mut cursor = Cursor::new(&data);
813 assert!(parse(&mut cursor, data.len()).is_err());
814 }
815}