Skip to main content

netcdf_reader/nc4/
types.rs

1//! Map HDF5 datatypes to NetCDF-4 types.
2//!
3//! HDF5 has a richer type system than NetCDF-4. This module maps the subset
4//! of HDF5 types that are valid in NetCDF-4 to `NcType`:
5//!
6//! | HDF5 Datatype                 | NcType   |
7//! |-------------------------------|----------|
8//! | FixedPoint { size=1, signed } | Byte     |
9//! | FixedPoint { size=1, !signed} | UByte    |
10//! | FixedPoint { size=2, signed } | Short    |
11//! | FixedPoint { size=2, !signed} | UShort   |
12//! | FixedPoint { size=4, signed } | Int      |
13//! | FixedPoint { size=4, !signed} | UInt     |
14//! | FixedPoint { size=8, signed } | Int64    |
15//! | FixedPoint { size=8, !signed} | UInt64   |
16//! | FloatingPoint { size=4 }      | Float    |
17//! | FloatingPoint { size=8 }      | Double   |
18//! | String (any)                  | String   |
19//! | Enum { base=byte }            | Enum     |
20//! | Compound { .. }               | Compound |
21//! | Opaque { .. }                 | Opaque   |
22//! | Array { base, dims }          | Array    |
23//! | VarLen { kind=String, base=u8 } | String*  |
24//! | VarLen { base }               | VLen     |
25//!
26//! * Some NetCDF-4 string variables are stored as HDF5 vlen bytes.
27
28use hdf5_reader::messages::datatype::{Datatype, VarLenKind};
29use hdf5_reader::ByteOrder;
30
31use crate::error::{Error, Result};
32use crate::types::{NcCompoundField, NcEnumMember, NcIntegerValue, NcType};
33
34/// Map an HDF5 datatype to a NetCDF type.
35pub fn hdf5_to_nc_type(dtype: &Datatype) -> Result<NcType> {
36    match dtype {
37        Datatype::FixedPoint { size, signed, .. } => match (size, signed) {
38            (1, true) => Ok(NcType::Byte),
39            (1, false) => Ok(NcType::UByte),
40            (2, true) => Ok(NcType::Short),
41            (2, false) => Ok(NcType::UShort),
42            (4, true) => Ok(NcType::Int),
43            (4, false) => Ok(NcType::UInt),
44            (8, true) => Ok(NcType::Int64),
45            (8, false) => Ok(NcType::UInt64),
46            _ => Err(Error::InvalidData(format!(
47                "unsupported HDF5 integer size {} for NetCDF-4",
48                size
49            ))),
50        },
51        Datatype::FloatingPoint { size, .. } => match size {
52            4 => Ok(NcType::Float),
53            8 => Ok(NcType::Double),
54            _ => Err(Error::InvalidData(format!(
55                "unsupported HDF5 float size {} for NetCDF-4",
56                size
57            ))),
58        },
59        Datatype::String { .. } => Ok(NcType::String),
60        Datatype::Enum { base, members } => Ok(NcType::Enum {
61            base: Box::new(hdf5_to_nc_type(base)?),
62            members: members
63                .iter()
64                .map(|member| {
65                    Ok(NcEnumMember {
66                        name: member.name.clone(),
67                        value: decode_enum_integer(base, &member.value)?,
68                    })
69                })
70                .collect::<Result<Vec<_>>>()?,
71        }),
72        Datatype::Compound { size, fields } => {
73            let mut nc_fields = Vec::with_capacity(fields.len());
74            for f in fields {
75                nc_fields.push(NcCompoundField {
76                    name: f.name.clone(),
77                    offset: f.byte_offset as u64,
78                    dtype: hdf5_to_nc_type(&f.datatype)?,
79                });
80            }
81            Ok(NcType::Compound {
82                size: *size,
83                fields: nc_fields,
84            })
85        }
86        Datatype::Opaque { size, tag } => Ok(NcType::Opaque {
87            size: *size,
88            tag: tag.clone(),
89        }),
90        Datatype::Array { base, dims } => {
91            let base_nc = hdf5_to_nc_type(base)?;
92            Ok(NcType::Array {
93                base: Box::new(base_nc),
94                dims: dims.clone(),
95            })
96        }
97        Datatype::VarLen {
98            base,
99            kind: VarLenKind::String,
100            ..
101        } if matches!(base.as_ref(), Datatype::FixedPoint { size: 1, .. }) => Ok(NcType::String),
102        Datatype::VarLen { base, .. } => {
103            let base_nc = hdf5_to_nc_type(base)?;
104            Ok(NcType::VLen {
105                base: Box::new(base_nc),
106            })
107        }
108        _ => Err(Error::InvalidData(format!(
109            "HDF5 datatype {:?} has no NetCDF-4 equivalent",
110            dtype
111        ))),
112    }
113}
114
115pub(crate) fn decode_enum_integer(base: &Datatype, bytes: &[u8]) -> Result<NcIntegerValue> {
116    match base {
117        Datatype::FixedPoint {
118            size,
119            signed,
120            byte_order,
121        } => decode_fixed_point_integer(bytes, *size, *signed, *byte_order),
122        other => Err(Error::InvalidData(format!(
123            "NetCDF-4 enum base type must be integer, got {other:?}"
124        ))),
125    }
126}
127
128pub(crate) fn decode_fixed_point_integer(
129    bytes: &[u8],
130    size: u8,
131    signed: bool,
132    byte_order: ByteOrder,
133) -> Result<NcIntegerValue> {
134    fn read<const N: usize>(bytes: &[u8], byte_order: ByteOrder) -> Result<[u8; N]> {
135        if bytes.len() < N {
136            return Err(Error::InvalidData(format!(
137                "integer value too short: need {} bytes, have {}",
138                N,
139                bytes.len()
140            )));
141        }
142        let mut out = [0u8; N];
143        out.copy_from_slice(&bytes[..N]);
144        #[cfg(target_endian = "little")]
145        if byte_order == ByteOrder::BigEndian {
146            out.reverse();
147        }
148        #[cfg(target_endian = "big")]
149        if byte_order == ByteOrder::LittleEndian {
150            out.reverse();
151        }
152        Ok(out)
153    }
154
155    match (size, signed) {
156        (1, true) => Ok(NcIntegerValue::I8(i8::from_ne_bytes(read::<1>(
157            bytes, byte_order,
158        )?))),
159        (1, false) => Ok(NcIntegerValue::U8(u8::from_ne_bytes(read::<1>(
160            bytes, byte_order,
161        )?))),
162        (2, true) => Ok(NcIntegerValue::I16(i16::from_ne_bytes(read::<2>(
163            bytes, byte_order,
164        )?))),
165        (2, false) => Ok(NcIntegerValue::U16(u16::from_ne_bytes(read::<2>(
166            bytes, byte_order,
167        )?))),
168        (4, true) => Ok(NcIntegerValue::I32(i32::from_ne_bytes(read::<4>(
169            bytes, byte_order,
170        )?))),
171        (4, false) => Ok(NcIntegerValue::U32(u32::from_ne_bytes(read::<4>(
172            bytes, byte_order,
173        )?))),
174        (8, true) => Ok(NcIntegerValue::I64(i64::from_ne_bytes(read::<8>(
175            bytes, byte_order,
176        )?))),
177        (8, false) => Ok(NcIntegerValue::U64(u64::from_ne_bytes(read::<8>(
178            bytes, byte_order,
179        )?))),
180        _ => Err(Error::InvalidData(format!(
181            "unsupported NetCDF-4 enum integer size {size}"
182        ))),
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189    use hdf5_reader::error::ByteOrder;
190
191    #[test]
192    fn test_integer_types() {
193        let bo = ByteOrder::LittleEndian;
194        assert_eq!(
195            hdf5_to_nc_type(&Datatype::FixedPoint {
196                size: 1,
197                signed: true,
198                byte_order: bo
199            })
200            .unwrap(),
201            NcType::Byte
202        );
203        assert_eq!(
204            hdf5_to_nc_type(&Datatype::FixedPoint {
205                size: 1,
206                signed: false,
207                byte_order: bo
208            })
209            .unwrap(),
210            NcType::UByte
211        );
212        assert_eq!(
213            hdf5_to_nc_type(&Datatype::FixedPoint {
214                size: 4,
215                signed: true,
216                byte_order: bo
217            })
218            .unwrap(),
219            NcType::Int
220        );
221        assert_eq!(
222            hdf5_to_nc_type(&Datatype::FixedPoint {
223                size: 8,
224                signed: false,
225                byte_order: bo
226            })
227            .unwrap(),
228            NcType::UInt64
229        );
230    }
231
232    #[test]
233    fn test_float_types() {
234        let bo = ByteOrder::LittleEndian;
235        assert_eq!(
236            hdf5_to_nc_type(&Datatype::FloatingPoint {
237                size: 4,
238                byte_order: bo
239            })
240            .unwrap(),
241            NcType::Float
242        );
243        assert_eq!(
244            hdf5_to_nc_type(&Datatype::FloatingPoint {
245                size: 8,
246                byte_order: bo
247            })
248            .unwrap(),
249            NcType::Double
250        );
251    }
252
253    #[test]
254    fn test_varlen_u8_maps_to_string() {
255        let bo = ByteOrder::LittleEndian;
256        assert_eq!(
257            hdf5_to_nc_type(&Datatype::VarLen {
258                base: Box::new(Datatype::FixedPoint {
259                    size: 1,
260                    signed: false,
261                    byte_order: bo,
262                }),
263                kind: VarLenKind::String,
264                encoding: hdf5_reader::StringEncoding::Utf8,
265                padding: hdf5_reader::StringPadding::NullTerminate,
266            })
267            .unwrap(),
268            NcType::String
269        );
270    }
271
272    #[test]
273    fn test_sequence_varlen_u8_maps_to_vlen() {
274        let bo = ByteOrder::LittleEndian;
275        assert_eq!(
276            hdf5_to_nc_type(&Datatype::VarLen {
277                base: Box::new(Datatype::FixedPoint {
278                    size: 1,
279                    signed: false,
280                    byte_order: bo,
281                }),
282                kind: VarLenKind::Sequence,
283                encoding: hdf5_reader::StringEncoding::Ascii,
284                padding: hdf5_reader::StringPadding::NullTerminate,
285            })
286            .unwrap(),
287            NcType::VLen {
288                base: Box::new(NcType::UByte)
289            }
290        );
291    }
292}