Skip to main content

netcdf_reader/
user_defined.rs

1//! Decoders for NetCDF-4 user-defined data values.
2
3use hdf5_reader::{ByteOrder, Dataset, Datatype, StringPadding, StringSize, VarLenKind};
4use ndarray::{ArrayD, IxDyn};
5
6use crate::error::{Error, Result};
7use crate::types::{NcIntegerValue, NcType};
8
9/// A decoded NetCDF-4 enum value.
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct NcEnumValue {
12    /// The stored integer value.
13    pub value: NcIntegerValue,
14    /// The matching enum member name, if the stored value is declared by the type.
15    pub member: Option<String>,
16}
17
18/// A decoded field of a NetCDF-4 compound value.
19#[derive(Debug, Clone, PartialEq)]
20pub struct NcCompoundValueField {
21    pub name: String,
22    pub value: NcValue,
23}
24
25/// A decoded fixed-size NetCDF-4 array value.
26#[derive(Debug, Clone, PartialEq)]
27pub struct NcArrayValue {
28    pub dims: Vec<u64>,
29    pub values: Vec<NcValue>,
30}
31
32/// A decoded NetCDF-4 value.
33#[derive(Debug, Clone, PartialEq)]
34pub enum NcValue {
35    Byte(i8),
36    Char(u8),
37    Short(i16),
38    Int(i32),
39    Float(f32),
40    Double(f64),
41    UByte(u8),
42    UShort(u16),
43    UInt(u32),
44    Int64(i64),
45    UInt64(u64),
46    String(String),
47    Enum(NcEnumValue),
48    Opaque(Vec<u8>),
49    Compound(Vec<NcCompoundValueField>),
50    Array(NcArrayValue),
51    VLen(Vec<NcValue>),
52}
53
54/// A borrowed view of one logical NetCDF-4 value.
55///
56/// `NcValueView` lets callers decode directly into domain types without first
57/// allocating a full [`NcValue`] tree. Use [`NcValueView::to_owned_value`] for
58/// the dynamic representation.
59#[derive(Clone, Copy)]
60pub struct NcValueView<'a> {
61    dataset: &'a Dataset,
62    dtype: &'a Datatype,
63    bytes: &'a [u8],
64}
65
66impl<'a> NcValueView<'a> {
67    pub(crate) fn new(dataset: &'a Dataset, dtype: &'a Datatype, bytes: &'a [u8]) -> Self {
68        Self {
69            dataset,
70            dtype,
71            bytes,
72        }
73    }
74
75    /// The NetCDF type represented by this value.
76    pub fn nc_type(&self) -> Result<NcType> {
77        crate::nc4::types::hdf5_to_nc_type(self.dtype)
78    }
79
80    /// Decode this value into the dynamic owned representation.
81    pub fn to_owned_value(&self) -> Result<NcValue> {
82        decode_value(self.dataset, self.dtype, self.bytes)
83    }
84
85    /// Decode this value as a NetCDF integer or enum base integer.
86    pub fn integer(&self) -> Result<NcIntegerValue> {
87        match self.dtype {
88            Datatype::FixedPoint {
89                size,
90                signed,
91                byte_order,
92            } => crate::nc4::types::decode_fixed_point_integer(
93                self.bytes,
94                *size,
95                *signed,
96                *byte_order,
97            ),
98            Datatype::Enum { base, .. } => crate::nc4::types::decode_enum_integer(base, self.bytes),
99            other => Err(Error::TypeMismatch {
100                expected: "integer or enum value".to_string(),
101                actual: format!("{other:?}"),
102            }),
103        }
104    }
105
106    /// Decode this value as `f32`.
107    pub fn f32(&self) -> Result<f32> {
108        match self.dtype {
109            Datatype::FloatingPoint {
110                size: 4,
111                byte_order,
112            } => Ok(f32::from_ne_bytes(read_ordered_bytes::<4>(
113                self.bytes,
114                *byte_order,
115            )?)),
116            other => Err(Error::TypeMismatch {
117                expected: "f32".to_string(),
118                actual: format!("{other:?}"),
119            }),
120        }
121    }
122
123    /// Decode this value as `f64`.
124    pub fn f64(&self) -> Result<f64> {
125        match self.dtype {
126            Datatype::FloatingPoint {
127                size: 8,
128                byte_order,
129            } => Ok(f64::from_ne_bytes(read_ordered_bytes::<8>(
130                self.bytes,
131                *byte_order,
132            )?)),
133            other => Err(Error::TypeMismatch {
134                expected: "f64".to_string(),
135                actual: format!("{other:?}"),
136            }),
137        }
138    }
139
140    /// Decode this value as a NetCDF-4 enum.
141    pub fn enum_value(&self) -> Result<NcEnumValue> {
142        match self.dtype {
143            Datatype::Enum { base, members } => {
144                let value = crate::nc4::types::decode_enum_integer(base, self.bytes)?;
145                let mut member_name = None;
146                for member in members {
147                    if crate::nc4::types::decode_enum_integer(base, &member.value)? == value {
148                        member_name = Some(member.name.clone());
149                        break;
150                    }
151                }
152                Ok(NcEnumValue {
153                    value,
154                    member: member_name,
155                })
156            }
157            other => Err(Error::TypeMismatch {
158                expected: "enum value".to_string(),
159                actual: format!("{other:?}"),
160            }),
161        }
162    }
163
164    /// Borrow this value as an opaque byte blob.
165    pub fn opaque_bytes(&self) -> Result<&'a [u8]> {
166        match self.dtype {
167            Datatype::Opaque { size, .. } => {
168                let size = checked_usize(*size as u64, "opaque byte size")?;
169                require_len(self.bytes, size, "opaque value")?;
170                Ok(&self.bytes[..size])
171            }
172            other => Err(Error::TypeMismatch {
173                expected: "opaque value".to_string(),
174                actual: format!("{other:?}"),
175            }),
176        }
177    }
178
179    /// Borrow a field from a compound value by name.
180    pub fn compound_field(&self, name: &str) -> Result<NcValueView<'a>> {
181        match self.dtype {
182            Datatype::Compound { fields, .. } => {
183                let field = fields
184                    .iter()
185                    .find(|field| field.name == name)
186                    .ok_or_else(|| {
187                        Error::InvalidData(format!("compound field not found: {name}"))
188                    })?;
189                let start = checked_usize(field.byte_offset as u64, "compound field offset")?;
190                let len = value_size(self.dataset, &field.datatype)?;
191                let end = checked_add_usize(start, len, "compound field end")?;
192                require_len(self.bytes, end, "compound value")?;
193                Ok(NcValueView::new(
194                    self.dataset,
195                    &field.datatype,
196                    &self.bytes[start..end],
197                ))
198            }
199            other => Err(Error::TypeMismatch {
200                expected: "compound value".to_string(),
201                actual: format!("{other:?}"),
202            }),
203        }
204    }
205
206    /// Borrow all fields from a compound value in declaration order.
207    pub fn compound_fields(&self) -> Result<Vec<NcCompoundFieldView<'a>>> {
208        match self.dtype {
209            Datatype::Compound { fields, .. } => fields
210                .iter()
211                .map(|field| {
212                    let start = checked_usize(field.byte_offset as u64, "compound field offset")?;
213                    let len = value_size(self.dataset, &field.datatype)?;
214                    let end = checked_add_usize(start, len, "compound field end")?;
215                    require_len(self.bytes, end, "compound value")?;
216                    Ok(NcCompoundFieldView {
217                        name: &field.name,
218                        value: NcValueView::new(
219                            self.dataset,
220                            &field.datatype,
221                            &self.bytes[start..end],
222                        ),
223                    })
224                })
225                .collect(),
226            other => Err(Error::TypeMismatch {
227                expected: "compound value".to_string(),
228                actual: format!("{other:?}"),
229            }),
230        }
231    }
232
233    /// Borrow all fixed-size array elements in row-major order.
234    pub fn array_elements(&self) -> Result<Vec<NcValueView<'a>>> {
235        match self.dtype {
236            Datatype::Array { base, dims } => {
237                let count = checked_product_u64(dims, "array element count")?;
238                let elem_size = value_size(self.dataset, base)?;
239                let total = checked_mul_usize(count, elem_size, "array byte size")?;
240                require_len(self.bytes, total, "array value")?;
241                let mut values = Vec::with_capacity(count);
242                for index in 0..count {
243                    let start = checked_mul_usize(index, elem_size, "array element offset")?;
244                    let end = checked_add_usize(start, elem_size, "array element end")?;
245                    values.push(NcValueView::new(
246                        self.dataset,
247                        base,
248                        &self.bytes[start..end],
249                    ));
250                }
251                Ok(values)
252            }
253            other => Err(Error::TypeMismatch {
254                expected: "array value".to_string(),
255                actual: format!("{other:?}"),
256            }),
257        }
258    }
259
260    /// Decode a non-string vlen value into owned values.
261    pub fn vlen_values(&self) -> Result<Vec<NcValue>> {
262        match self.dtype {
263            Datatype::VarLen {
264                kind: VarLenKind::String,
265                ..
266            } => Err(Error::TypeMismatch {
267                expected: "non-string vlen value".to_string(),
268                actual: format!("{:?}", self.dtype),
269            }),
270            Datatype::VarLen { base, .. } => decode_vlen_values(self.dataset, base, self.bytes),
271            other => Err(Error::TypeMismatch {
272                expected: "vlen value".to_string(),
273                actual: format!("{other:?}"),
274            }),
275        }
276    }
277}
278
279/// A borrowed compound field view.
280#[derive(Clone, Copy)]
281pub struct NcCompoundFieldView<'a> {
282    pub name: &'a str,
283    pub value: NcValueView<'a>,
284}
285
286pub(crate) fn read_dataset_with_decoder<T, F>(
287    dataset: &Dataset,
288    mut decoder: F,
289) -> Result<ArrayD<T>>
290where
291    F: FnMut(NcValueView<'_>) -> Result<T>,
292{
293    let raw = dataset.read_raw_bytes()?;
294    let count = checked_usize(dataset.num_elements(), "NetCDF-4 variable element count")?;
295    let elem_size = value_size(dataset, dataset.dtype())?;
296    let total = checked_mul_usize(count, elem_size, "NetCDF-4 variable byte size")?;
297    require_len(&raw, total, "NetCDF-4 variable data")?;
298
299    let mut values = Vec::with_capacity(count);
300    for index in 0..count {
301        let start = checked_mul_usize(index, elem_size, "NetCDF-4 element byte offset")?;
302        let end = checked_add_usize(start, elem_size, "NetCDF-4 element byte end")?;
303        values.push(decoder(NcValueView::new(
304            dataset,
305            dataset.dtype(),
306            &raw[start..end],
307        ))?);
308    }
309
310    let shape = dataset
311        .shape()
312        .iter()
313        .map(|&dim| checked_usize(dim, "NetCDF-4 variable dimension"))
314        .collect::<Result<Vec<_>>>()?;
315    ArrayD::from_shape_vec(IxDyn(&shape), values)
316        .map_err(|err| Error::InvalidData(format!("array shape error: {err}")))
317}
318
319pub(crate) fn read_dataset_values(dataset: &Dataset) -> Result<ArrayD<NcValue>> {
320    read_dataset_with_decoder(dataset, |value| value.to_owned_value())
321}
322
323fn decode_value(dataset: &Dataset, dtype: &Datatype, bytes: &[u8]) -> Result<NcValue> {
324    match dtype {
325        Datatype::FixedPoint {
326            size,
327            signed,
328            byte_order,
329        } => integer_to_value(crate::nc4::types::decode_fixed_point_integer(
330            bytes,
331            *size,
332            *signed,
333            *byte_order,
334        )?),
335        Datatype::FloatingPoint {
336            size: 4,
337            byte_order,
338        } => Ok(NcValue::Float(f32::from_ne_bytes(read_ordered_bytes::<4>(
339            bytes,
340            *byte_order,
341        )?))),
342        Datatype::FloatingPoint {
343            size: 8,
344            byte_order,
345        } => Ok(NcValue::Double(f64::from_ne_bytes(
346            read_ordered_bytes::<8>(bytes, *byte_order)?,
347        ))),
348        Datatype::FloatingPoint { size, .. } => Err(Error::InvalidData(format!(
349            "unsupported floating-point size {size}"
350        ))),
351        Datatype::String {
352            size: StringSize::Fixed(len),
353            padding,
354            ..
355        } => {
356            let len = checked_usize(*len as u64, "fixed string length")?;
357            require_len(bytes, len, "fixed string value")?;
358            Ok(NcValue::String(decode_string_bytes(
359                &bytes[..len],
360                *padding,
361            )?))
362        }
363        Datatype::String {
364            size: StringSize::Variable,
365            padding,
366            ..
367        } => {
368            let raw = dataset.resolve_vlen_reference_bytes(bytes, 1)?;
369            Ok(NcValue::String(decode_string_bytes(&raw, *padding)?))
370        }
371        Datatype::Enum { .. } => Ok(NcValue::Enum(
372            NcValueView::new(dataset, dtype, bytes).enum_value()?,
373        )),
374        Datatype::Opaque { size, .. } => {
375            let size = checked_usize(*size as u64, "opaque byte size")?;
376            require_len(bytes, size, "opaque value")?;
377            Ok(NcValue::Opaque(bytes[..size].to_vec()))
378        }
379        Datatype::Compound { fields, .. } => {
380            let mut decoded = Vec::with_capacity(fields.len());
381            for field in fields {
382                let start = checked_usize(field.byte_offset as u64, "compound field offset")?;
383                let len = value_size(dataset, &field.datatype)?;
384                let end = checked_add_usize(start, len, "compound field end")?;
385                require_len(bytes, end, "compound value")?;
386                decoded.push(NcCompoundValueField {
387                    name: field.name.clone(),
388                    value: decode_value(dataset, &field.datatype, &bytes[start..end])?,
389                });
390            }
391            Ok(NcValue::Compound(decoded))
392        }
393        Datatype::Array { base, dims } => {
394            let count = checked_product_u64(dims, "array element count")?;
395            let elem_size = value_size(dataset, base)?;
396            let total = checked_mul_usize(count, elem_size, "array byte size")?;
397            require_len(bytes, total, "array value")?;
398            let mut values = Vec::with_capacity(count);
399            for index in 0..count {
400                let start = checked_mul_usize(index, elem_size, "array element offset")?;
401                let end = checked_add_usize(start, elem_size, "array element end")?;
402                values.push(decode_value(dataset, base, &bytes[start..end])?);
403            }
404            Ok(NcValue::Array(NcArrayValue {
405                dims: dims.clone(),
406                values,
407            }))
408        }
409        Datatype::VarLen {
410            base,
411            kind: VarLenKind::String,
412            padding,
413            ..
414        } if matches!(base.as_ref(), Datatype::FixedPoint { size: 1, .. }) => {
415            let raw = dataset.resolve_vlen_reference_bytes(bytes, 1)?;
416            Ok(NcValue::String(decode_string_bytes(&raw, *padding)?))
417        }
418        Datatype::VarLen { base, .. } => {
419            Ok(NcValue::VLen(decode_vlen_values(dataset, base, bytes)?))
420        }
421        other => Err(Error::InvalidData(format!(
422            "unsupported NetCDF-4 user-defined datatype: {other:?}"
423        ))),
424    }
425}
426
427fn decode_vlen_values(
428    dataset: &Dataset,
429    base: &Datatype,
430    reference: &[u8],
431) -> Result<Vec<NcValue>> {
432    let elem_size = value_size(dataset, base)?;
433    let raw = dataset.resolve_vlen_reference_bytes(reference, elem_size)?;
434    if elem_size == 0 {
435        return Err(Error::InvalidData(
436            "vlen base type has zero byte size".to_string(),
437        ));
438    }
439    if raw.len() % elem_size != 0 {
440        return Err(Error::InvalidData(format!(
441            "vlen payload has {} bytes, not a multiple of element size {}",
442            raw.len(),
443            elem_size
444        )));
445    }
446
447    let count = raw.len() / elem_size;
448    let mut values = Vec::with_capacity(count);
449    for index in 0..count {
450        let start = checked_mul_usize(index, elem_size, "vlen element offset")?;
451        let end = checked_add_usize(start, elem_size, "vlen element end")?;
452        values.push(decode_value(dataset, base, &raw[start..end])?);
453    }
454    Ok(values)
455}
456
457fn integer_to_value(value: NcIntegerValue) -> Result<NcValue> {
458    Ok(match value {
459        NcIntegerValue::I8(value) => NcValue::Byte(value),
460        NcIntegerValue::U8(value) => NcValue::UByte(value),
461        NcIntegerValue::I16(value) => NcValue::Short(value),
462        NcIntegerValue::U16(value) => NcValue::UShort(value),
463        NcIntegerValue::I32(value) => NcValue::Int(value),
464        NcIntegerValue::U32(value) => NcValue::UInt(value),
465        NcIntegerValue::I64(value) => NcValue::Int64(value),
466        NcIntegerValue::U64(value) => NcValue::UInt64(value),
467    })
468}
469
470fn value_size(dataset: &Dataset, dtype: &Datatype) -> Result<usize> {
471    match dtype {
472        Datatype::String {
473            size: StringSize::Variable,
474            ..
475        }
476        | Datatype::VarLen { .. } => Ok(dataset.vlen_reference_size()),
477        Datatype::Array { base, dims } => {
478            let count = checked_product_u64(dims, "array element count")?;
479            let elem_size = value_size(dataset, base)?;
480            checked_mul_usize(count, elem_size, "array byte size")
481        }
482        Datatype::Enum { base, .. } => value_size(dataset, base),
483        Datatype::FixedPoint { size, .. }
484        | Datatype::FloatingPoint { size, .. }
485        | Datatype::Bitfield { size, .. }
486        | Datatype::Reference { size, .. } => Ok(*size as usize),
487        Datatype::String {
488            size: StringSize::Fixed(len),
489            ..
490        } => Ok(*len as usize),
491        Datatype::Compound { size, .. } | Datatype::Opaque { size, .. } => Ok(*size as usize),
492    }
493}
494
495fn decode_string_bytes(bytes: &[u8], padding: StringPadding) -> Result<String> {
496    let trimmed = match padding {
497        StringPadding::NullTerminate => {
498            let end = bytes
499                .iter()
500                .position(|&byte| byte == 0)
501                .unwrap_or(bytes.len());
502            &bytes[..end]
503        }
504        StringPadding::NullPad => {
505            let end = bytes
506                .iter()
507                .rposition(|&byte| byte != 0)
508                .map_or(0, |idx| idx + 1);
509            &bytes[..end]
510        }
511        StringPadding::SpacePad => {
512            let end = bytes
513                .iter()
514                .rposition(|&byte| byte != b' ')
515                .map_or(0, |idx| idx + 1);
516            &bytes[..end]
517        }
518    };
519    String::from_utf8(trimmed.to_vec())
520        .map_err(|err| Error::InvalidData(format!("invalid string data: {err}")))
521}
522
523fn read_ordered_bytes<const N: usize>(bytes: &[u8], byte_order: ByteOrder) -> Result<[u8; N]> {
524    require_len(bytes, N, "numeric value")?;
525    let mut out = [0u8; N];
526    out.copy_from_slice(&bytes[..N]);
527    #[cfg(target_endian = "little")]
528    if byte_order == ByteOrder::BigEndian {
529        out.reverse();
530    }
531    #[cfg(target_endian = "big")]
532    if byte_order == ByteOrder::LittleEndian {
533        out.reverse();
534    }
535    Ok(out)
536}
537
538fn require_len(bytes: &[u8], needed: usize, context: &str) -> Result<()> {
539    if bytes.len() < needed {
540        return Err(Error::InvalidData(format!(
541            "{context} too short: need {needed} bytes, have {}",
542            bytes.len()
543        )));
544    }
545    Ok(())
546}
547
548fn checked_usize(value: u64, context: &str) -> Result<usize> {
549    usize::try_from(value)
550        .map_err(|_| Error::InvalidData(format!("{context} exceeds platform usize capacity")))
551}
552
553fn checked_add_usize(left: usize, right: usize, context: &str) -> Result<usize> {
554    left.checked_add(right)
555        .ok_or_else(|| Error::InvalidData(format!("{context} overflowed usize")))
556}
557
558fn checked_mul_usize(left: usize, right: usize, context: &str) -> Result<usize> {
559    left.checked_mul(right)
560        .ok_or_else(|| Error::InvalidData(format!("{context} overflowed usize")))
561}
562
563fn checked_product_u64(values: &[u64], context: &str) -> Result<usize> {
564    let mut product = 1usize;
565    for &value in values {
566        product = checked_mul_usize(product, checked_usize(value, context)?, context)?;
567    }
568    Ok(product)
569}