Skip to main content

oxigdal_hdf5/
datatype.rs

1//! HDF5 datatype definitions and conversions.
2//!
3//! This module provides type definitions for HDF5 data types, including
4//! integer, floating-point, string, and compound types.
5
6use crate::error::{Hdf5Error, Result};
7use byteorder::{BigEndian, ByteOrder, LittleEndian};
8use serde::{Deserialize, Serialize};
9use std::fmt;
10
11/// HDF5 datatype class
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13#[repr(u8)]
14pub enum DatatypeClass {
15    /// Fixed-point (integer) type
16    FixedPoint = 0,
17    /// Floating-point type
18    FloatingPoint = 1,
19    /// Time type
20    Time = 2,
21    /// String type
22    String = 3,
23    /// Bitfield type
24    Bitfield = 4,
25    /// Opaque type
26    Opaque = 5,
27    /// Compound type
28    Compound = 6,
29    /// Reference type
30    Reference = 7,
31    /// Enumeration type
32    Enum = 8,
33    /// Variable-length type
34    VariableLength = 9,
35    /// Array type
36    Array = 10,
37}
38
39impl DatatypeClass {
40    /// Create from u8 value
41    pub fn from_u8(value: u8) -> Result<Self> {
42        match value {
43            0 => Ok(Self::FixedPoint),
44            1 => Ok(Self::FloatingPoint),
45            2 => Ok(Self::Time),
46            3 => Ok(Self::String),
47            4 => Ok(Self::Bitfield),
48            5 => Ok(Self::Opaque),
49            6 => Ok(Self::Compound),
50            7 => Ok(Self::Reference),
51            8 => Ok(Self::Enum),
52            9 => Ok(Self::VariableLength),
53            10 => Ok(Self::Array),
54            _ => Err(Hdf5Error::invalid_datatype(format!(
55                "Unknown datatype class: {}",
56                value
57            ))),
58        }
59    }
60}
61
62/// Byte order (endianness) for HDF5 data
63#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
64pub enum Hdf5ByteOrder {
65    /// Little-endian
66    LittleEndian,
67    /// Big-endian
68    BigEndian,
69}
70
71/// String padding type
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
73pub enum StringPadding {
74    /// Null-terminated
75    NullTerminated,
76    /// Null-padded
77    NullPadded,
78    /// Space-padded
79    SpacePadded,
80}
81
82/// HDF5 datatype
83#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
84pub enum Datatype {
85    /// 8-bit signed integer
86    Int8,
87    /// 8-bit unsigned integer
88    UInt8,
89    /// 16-bit signed integer
90    Int16,
91    /// 16-bit unsigned integer
92    UInt16,
93    /// 32-bit signed integer
94    Int32,
95    /// 32-bit unsigned integer
96    UInt32,
97    /// 64-bit signed integer
98    Int64,
99    /// 64-bit unsigned integer
100    UInt64,
101    /// 32-bit floating-point
102    Float32,
103    /// 64-bit floating-point
104    Float64,
105    /// Fixed-length string
106    FixedString {
107        /// String length
108        length: usize,
109        /// Padding type
110        padding: StringPadding,
111    },
112    /// Variable-length string
113    VarString {
114        /// Padding type
115        padding: StringPadding,
116    },
117    /// Compound type (struct)
118    Compound {
119        /// Size in bytes
120        size: usize,
121        /// Member fields
122        members: Vec<CompoundMember>,
123    },
124    /// Enumeration type
125    Enum {
126        /// Base integer type
127        base_type: Box<Datatype>,
128        /// Enum members
129        members: Vec<EnumMember>,
130    },
131    /// Array type
132    Array {
133        /// Base type
134        base_type: Box<Datatype>,
135        /// Array dimensions
136        dimensions: Vec<usize>,
137    },
138    /// Variable-length type
139    VarLen {
140        /// Base type
141        base_type: Box<Datatype>,
142    },
143    /// Opaque type
144    Opaque {
145        /// Size in bytes
146        size: usize,
147        /// Tag
148        tag: String,
149    },
150}
151
152impl Datatype {
153    /// Get the size in bytes of this datatype
154    pub fn size(&self) -> usize {
155        match self {
156            Self::Int8 | Self::UInt8 => 1,
157            Self::Int16 | Self::UInt16 => 2,
158            Self::Int32 | Self::UInt32 | Self::Float32 => 4,
159            Self::Int64 | Self::UInt64 | Self::Float64 => 8,
160            Self::FixedString { length, .. } => *length,
161            Self::VarString { .. } => 16, // Size of variable-length heap reference
162            Self::Compound { size, .. } => *size,
163            Self::Opaque { size, .. } => *size,
164            Self::Array {
165                base_type,
166                dimensions,
167            } => {
168                let base_size = base_type.size();
169                let total_elements: usize = dimensions.iter().product();
170                base_size * total_elements
171            }
172            Self::VarLen { .. } => 16, // Size of variable-length heap reference
173            Self::Enum { base_type, .. } => base_type.size(),
174        }
175    }
176
177    /// Get the datatype class
178    pub fn class(&self) -> DatatypeClass {
179        match self {
180            Self::Int8
181            | Self::UInt8
182            | Self::Int16
183            | Self::UInt16
184            | Self::Int32
185            | Self::UInt32
186            | Self::Int64
187            | Self::UInt64 => DatatypeClass::FixedPoint,
188            Self::Float32 | Self::Float64 => DatatypeClass::FloatingPoint,
189            Self::FixedString { .. } | Self::VarString { .. } => DatatypeClass::String,
190            Self::Compound { .. } => DatatypeClass::Compound,
191            Self::Enum { .. } => DatatypeClass::Enum,
192            Self::Array { .. } => DatatypeClass::Array,
193            Self::VarLen { .. } => DatatypeClass::VariableLength,
194            Self::Opaque { .. } => DatatypeClass::Opaque,
195        }
196    }
197
198    /// Get a human-readable name for this datatype
199    pub fn name(&self) -> String {
200        match self {
201            Self::Int8 => "int8".to_string(),
202            Self::UInt8 => "uint8".to_string(),
203            Self::Int16 => "int16".to_string(),
204            Self::UInt16 => "uint16".to_string(),
205            Self::Int32 => "int32".to_string(),
206            Self::UInt32 => "uint32".to_string(),
207            Self::Int64 => "int64".to_string(),
208            Self::UInt64 => "uint64".to_string(),
209            Self::Float32 => "float32".to_string(),
210            Self::Float64 => "float64".to_string(),
211            Self::FixedString { length, .. } => format!("string[{}]", length),
212            Self::VarString { .. } => "varstring".to_string(),
213            Self::Compound { members, .. } => {
214                let member_names: Vec<_> = members.iter().map(|m| m.name.as_str()).collect();
215                format!("compound{{{}}}", member_names.join(", "))
216            }
217            Self::Enum { .. } => "enum".to_string(),
218            Self::Array {
219                base_type,
220                dimensions,
221            } => {
222                let dims: Vec<_> = dimensions.iter().map(|d| d.to_string()).collect();
223                format!("{}[{}]", base_type.name(), dims.join(","))
224            }
225            Self::VarLen { base_type } => format!("varlen<{}>", base_type.name()),
226            Self::Opaque { tag, .. } => format!("opaque:{}", tag),
227        }
228    }
229
230    /// Check if this is an integer type
231    pub fn is_integer(&self) -> bool {
232        matches!(
233            self,
234            Self::Int8
235                | Self::UInt8
236                | Self::Int16
237                | Self::UInt16
238                | Self::Int32
239                | Self::UInt32
240                | Self::Int64
241                | Self::UInt64
242        )
243    }
244
245    /// Check if this is a floating-point type
246    pub fn is_float(&self) -> bool {
247        matches!(self, Self::Float32 | Self::Float64)
248    }
249
250    /// Check if this is a string type
251    pub fn is_string(&self) -> bool {
252        matches!(self, Self::FixedString { .. } | Self::VarString { .. })
253    }
254
255    /// Check if this is a compound type
256    pub fn is_compound(&self) -> bool {
257        matches!(self, Self::Compound { .. })
258    }
259}
260
261impl fmt::Display for Datatype {
262    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
263        write!(f, "{}", self.name())
264    }
265}
266
267/// Compound type member
268#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
269pub struct CompoundMember {
270    /// Member name
271    pub name: String,
272    /// Member datatype
273    pub datatype: Datatype,
274    /// Byte offset within compound type
275    pub offset: usize,
276}
277
278impl CompoundMember {
279    /// Create a new compound member
280    pub fn new(name: String, datatype: Datatype, offset: usize) -> Self {
281        Self {
282            name,
283            datatype,
284            offset,
285        }
286    }
287}
288
289/// Enumeration type member
290#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
291pub struct EnumMember {
292    /// Member name
293    pub name: String,
294    /// Member value
295    pub value: i64,
296}
297
298impl EnumMember {
299    /// Create a new enum member
300    pub fn new(name: String, value: i64) -> Self {
301        Self { name, value }
302    }
303}
304
305/// Type conversion utilities
306pub struct TypeConverter;
307
308impl TypeConverter {
309    /// Read i8 from bytes
310    pub fn read_i8(data: &[u8]) -> Result<i8> {
311        if data.is_empty() {
312            return Err(Hdf5Error::invalid_datatype("Empty data for i8"));
313        }
314        Ok(data[0] as i8)
315    }
316
317    /// Read u8 from bytes
318    pub fn read_u8(data: &[u8]) -> Result<u8> {
319        if data.is_empty() {
320            return Err(Hdf5Error::invalid_datatype("Empty data for u8"));
321        }
322        Ok(data[0])
323    }
324
325    /// Read i16 from bytes (little-endian)
326    pub fn read_i16_le(data: &[u8]) -> Result<i16> {
327        if data.len() < 2 {
328            return Err(Hdf5Error::invalid_datatype("Insufficient data for i16"));
329        }
330        Ok(LittleEndian::read_i16(data))
331    }
332
333    /// Read i16 from bytes (big-endian)
334    pub fn read_i16_be(data: &[u8]) -> Result<i16> {
335        if data.len() < 2 {
336            return Err(Hdf5Error::invalid_datatype("Insufficient data for i16"));
337        }
338        Ok(BigEndian::read_i16(data))
339    }
340
341    /// Read u16 from bytes (little-endian)
342    pub fn read_u16_le(data: &[u8]) -> Result<u16> {
343        if data.len() < 2 {
344            return Err(Hdf5Error::invalid_datatype("Insufficient data for u16"));
345        }
346        Ok(LittleEndian::read_u16(data))
347    }
348
349    /// Read u16 from bytes (big-endian)
350    pub fn read_u16_be(data: &[u8]) -> Result<u16> {
351        if data.len() < 2 {
352            return Err(Hdf5Error::invalid_datatype("Insufficient data for u16"));
353        }
354        Ok(BigEndian::read_u16(data))
355    }
356
357    /// Read i32 from bytes (little-endian)
358    pub fn read_i32_le(data: &[u8]) -> Result<i32> {
359        if data.len() < 4 {
360            return Err(Hdf5Error::invalid_datatype("Insufficient data for i32"));
361        }
362        Ok(LittleEndian::read_i32(data))
363    }
364
365    /// Read i32 from bytes (big-endian)
366    pub fn read_i32_be(data: &[u8]) -> Result<i32> {
367        if data.len() < 4 {
368            return Err(Hdf5Error::invalid_datatype("Insufficient data for i32"));
369        }
370        Ok(BigEndian::read_i32(data))
371    }
372
373    /// Read u32 from bytes (little-endian)
374    pub fn read_u32_le(data: &[u8]) -> Result<u32> {
375        if data.len() < 4 {
376            return Err(Hdf5Error::invalid_datatype("Insufficient data for u32"));
377        }
378        Ok(LittleEndian::read_u32(data))
379    }
380
381    /// Read u32 from bytes (big-endian)
382    pub fn read_u32_be(data: &[u8]) -> Result<u32> {
383        if data.len() < 4 {
384            return Err(Hdf5Error::invalid_datatype("Insufficient data for u32"));
385        }
386        Ok(BigEndian::read_u32(data))
387    }
388
389    /// Read i64 from bytes (little-endian)
390    pub fn read_i64_le(data: &[u8]) -> Result<i64> {
391        if data.len() < 8 {
392            return Err(Hdf5Error::invalid_datatype("Insufficient data for i64"));
393        }
394        Ok(LittleEndian::read_i64(data))
395    }
396
397    /// Read i64 from bytes (big-endian)
398    pub fn read_i64_be(data: &[u8]) -> Result<i64> {
399        if data.len() < 8 {
400            return Err(Hdf5Error::invalid_datatype("Insufficient data for i64"));
401        }
402        Ok(BigEndian::read_i64(data))
403    }
404
405    /// Read u64 from bytes (little-endian)
406    pub fn read_u64_le(data: &[u8]) -> Result<u64> {
407        if data.len() < 8 {
408            return Err(Hdf5Error::invalid_datatype("Insufficient data for u64"));
409        }
410        Ok(LittleEndian::read_u64(data))
411    }
412
413    /// Read u64 from bytes (big-endian)
414    pub fn read_u64_be(data: &[u8]) -> Result<u64> {
415        if data.len() < 8 {
416            return Err(Hdf5Error::invalid_datatype("Insufficient data for u64"));
417        }
418        Ok(BigEndian::read_u64(data))
419    }
420
421    /// Read f32 from bytes (little-endian)
422    pub fn read_f32_le(data: &[u8]) -> Result<f32> {
423        if data.len() < 4 {
424            return Err(Hdf5Error::invalid_datatype("Insufficient data for f32"));
425        }
426        Ok(LittleEndian::read_f32(data))
427    }
428
429    /// Read f32 from bytes (big-endian)
430    pub fn read_f32_be(data: &[u8]) -> Result<f32> {
431        if data.len() < 4 {
432            return Err(Hdf5Error::invalid_datatype("Insufficient data for f32"));
433        }
434        Ok(BigEndian::read_f32(data))
435    }
436
437    /// Read f64 from bytes (little-endian)
438    pub fn read_f64_le(data: &[u8]) -> Result<f64> {
439        if data.len() < 8 {
440            return Err(Hdf5Error::invalid_datatype("Insufficient data for f64"));
441        }
442        Ok(LittleEndian::read_f64(data))
443    }
444
445    /// Read f64 from bytes (big-endian)
446    pub fn read_f64_be(data: &[u8]) -> Result<f64> {
447        if data.len() < 8 {
448            return Err(Hdf5Error::invalid_datatype("Insufficient data for f64"));
449        }
450        Ok(BigEndian::read_f64(data))
451    }
452
453    /// Write i8 to bytes
454    pub fn write_i8(data: &mut [u8], value: i8) -> Result<()> {
455        if data.is_empty() {
456            return Err(Hdf5Error::invalid_datatype("Empty buffer for i8"));
457        }
458        data[0] = value as u8;
459        Ok(())
460    }
461
462    /// Write u8 to bytes
463    pub fn write_u8(data: &mut [u8], value: u8) -> Result<()> {
464        if data.is_empty() {
465            return Err(Hdf5Error::invalid_datatype("Empty buffer for u8"));
466        }
467        data[0] = value;
468        Ok(())
469    }
470
471    /// Write i16 to bytes (little-endian)
472    pub fn write_i16_le(data: &mut [u8], value: i16) -> Result<()> {
473        if data.len() < 2 {
474            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for i16"));
475        }
476        LittleEndian::write_i16(data, value);
477        Ok(())
478    }
479
480    /// Write i16 to bytes (big-endian)
481    pub fn write_i16_be(data: &mut [u8], value: i16) -> Result<()> {
482        if data.len() < 2 {
483            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for i16"));
484        }
485        BigEndian::write_i16(data, value);
486        Ok(())
487    }
488
489    /// Write u16 to bytes (little-endian)
490    pub fn write_u16_le(data: &mut [u8], value: u16) -> Result<()> {
491        if data.len() < 2 {
492            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for u16"));
493        }
494        LittleEndian::write_u16(data, value);
495        Ok(())
496    }
497
498    /// Write u16 to bytes (big-endian)
499    pub fn write_u16_be(data: &mut [u8], value: u16) -> Result<()> {
500        if data.len() < 2 {
501            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for u16"));
502        }
503        BigEndian::write_u16(data, value);
504        Ok(())
505    }
506
507    /// Write i32 to bytes (little-endian)
508    pub fn write_i32_le(data: &mut [u8], value: i32) -> Result<()> {
509        if data.len() < 4 {
510            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for i32"));
511        }
512        LittleEndian::write_i32(data, value);
513        Ok(())
514    }
515
516    /// Write i32 to bytes (big-endian)
517    pub fn write_i32_be(data: &mut [u8], value: i32) -> Result<()> {
518        if data.len() < 4 {
519            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for i32"));
520        }
521        BigEndian::write_i32(data, value);
522        Ok(())
523    }
524
525    /// Write u32 to bytes (little-endian)
526    pub fn write_u32_le(data: &mut [u8], value: u32) -> Result<()> {
527        if data.len() < 4 {
528            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for u32"));
529        }
530        LittleEndian::write_u32(data, value);
531        Ok(())
532    }
533
534    /// Write u32 to bytes (big-endian)
535    pub fn write_u32_be(data: &mut [u8], value: u32) -> Result<()> {
536        if data.len() < 4 {
537            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for u32"));
538        }
539        BigEndian::write_u32(data, value);
540        Ok(())
541    }
542
543    /// Write i64 to bytes (little-endian)
544    pub fn write_i64_le(data: &mut [u8], value: i64) -> Result<()> {
545        if data.len() < 8 {
546            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for i64"));
547        }
548        LittleEndian::write_i64(data, value);
549        Ok(())
550    }
551
552    /// Write i64 to bytes (big-endian)
553    pub fn write_i64_be(data: &mut [u8], value: i64) -> Result<()> {
554        if data.len() < 8 {
555            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for i64"));
556        }
557        BigEndian::write_i64(data, value);
558        Ok(())
559    }
560
561    /// Write u64 to bytes (little-endian)
562    pub fn write_u64_le(data: &mut [u8], value: u64) -> Result<()> {
563        if data.len() < 8 {
564            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for u64"));
565        }
566        LittleEndian::write_u64(data, value);
567        Ok(())
568    }
569
570    /// Write u64 to bytes (big-endian)
571    pub fn write_u64_be(data: &mut [u8], value: u64) -> Result<()> {
572        if data.len() < 8 {
573            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for u64"));
574        }
575        BigEndian::write_u64(data, value);
576        Ok(())
577    }
578
579    /// Write f32 to bytes (little-endian)
580    pub fn write_f32_le(data: &mut [u8], value: f32) -> Result<()> {
581        if data.len() < 4 {
582            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for f32"));
583        }
584        LittleEndian::write_f32(data, value);
585        Ok(())
586    }
587
588    /// Write f32 to bytes (big-endian)
589    pub fn write_f32_be(data: &mut [u8], value: f32) -> Result<()> {
590        if data.len() < 4 {
591            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for f32"));
592        }
593        BigEndian::write_f32(data, value);
594        Ok(())
595    }
596
597    /// Write f64 to bytes (little-endian)
598    pub fn write_f64_le(data: &mut [u8], value: f64) -> Result<()> {
599        if data.len() < 8 {
600            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for f64"));
601        }
602        LittleEndian::write_f64(data, value);
603        Ok(())
604    }
605
606    /// Write f64 to bytes (big-endian)
607    pub fn write_f64_be(data: &mut [u8], value: f64) -> Result<()> {
608        if data.len() < 8 {
609            return Err(Hdf5Error::invalid_datatype("Insufficient buffer for f64"));
610        }
611        BigEndian::write_f64(data, value);
612        Ok(())
613    }
614}
615
616#[cfg(test)]
617mod tests {
618    use super::*;
619
620    #[test]
621    fn test_datatype_size() {
622        assert_eq!(Datatype::Int8.size(), 1);
623        assert_eq!(Datatype::UInt8.size(), 1);
624        assert_eq!(Datatype::Int16.size(), 2);
625        assert_eq!(Datatype::UInt16.size(), 2);
626        assert_eq!(Datatype::Int32.size(), 4);
627        assert_eq!(Datatype::UInt32.size(), 4);
628        assert_eq!(Datatype::Int64.size(), 8);
629        assert_eq!(Datatype::UInt64.size(), 8);
630        assert_eq!(Datatype::Float32.size(), 4);
631        assert_eq!(Datatype::Float64.size(), 8);
632        assert_eq!(
633            Datatype::FixedString {
634                length: 10,
635                padding: StringPadding::NullTerminated
636            }
637            .size(),
638            10
639        );
640    }
641
642    #[test]
643    fn test_datatype_class() {
644        assert_eq!(Datatype::Int32.class(), DatatypeClass::FixedPoint);
645        assert_eq!(Datatype::Float64.class(), DatatypeClass::FloatingPoint);
646        assert_eq!(
647            Datatype::FixedString {
648                length: 10,
649                padding: StringPadding::NullTerminated
650            }
651            .class(),
652            DatatypeClass::String
653        );
654    }
655
656    #[test]
657    fn test_datatype_name() {
658        assert_eq!(Datatype::Int32.name(), "int32");
659        assert_eq!(Datatype::Float64.name(), "float64");
660        assert_eq!(
661            Datatype::FixedString {
662                length: 10,
663                padding: StringPadding::NullTerminated
664            }
665            .name(),
666            "string[10]"
667        );
668    }
669
670    #[test]
671    fn test_type_predicates() {
672        assert!(Datatype::Int32.is_integer());
673        assert!(!Datatype::Float64.is_integer());
674        assert!(Datatype::Float64.is_float());
675        assert!(!Datatype::Int32.is_float());
676        assert!(
677            Datatype::FixedString {
678                length: 10,
679                padding: StringPadding::NullTerminated
680            }
681            .is_string()
682        );
683    }
684
685    #[test]
686    fn test_type_converter_i32() {
687        let mut data = vec![0u8; 4];
688        TypeConverter::write_i32_le(&mut data, 42).expect("write failed");
689        let value = TypeConverter::read_i32_le(&data).expect("read failed");
690        assert_eq!(value, 42);
691    }
692
693    #[test]
694    fn test_type_converter_f64() {
695        let mut data = vec![0u8; 8];
696        TypeConverter::write_f64_le(&mut data, std::f64::consts::PI).expect("write failed");
697        let value = TypeConverter::read_f64_le(&data).expect("read failed");
698        assert!((value - std::f64::consts::PI).abs() < 1e-10);
699    }
700}