Skip to main content

netcdf_reader/
types.rs

1/// A NetCDF dimension.
2#[derive(Debug, Clone)]
3pub struct NcDimension {
4    pub name: String,
5    pub size: u64,
6    pub is_unlimited: bool,
7}
8
9/// A field within a compound (struct) type.
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct NcCompoundField {
12    pub name: String,
13    pub offset: u64,
14    pub dtype: NcType,
15}
16
17/// A typed integer value used by NetCDF-4 enum definitions and values.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum NcIntegerValue {
20    I8(i8),
21    U8(u8),
22    I16(i16),
23    U16(u16),
24    I32(i32),
25    U32(u32),
26    I64(i64),
27    U64(u64),
28}
29
30impl NcIntegerValue {
31    /// Return the value as `i128` when it is lossless for the signed domain.
32    pub fn as_i128(self) -> Option<i128> {
33        match self {
34            NcIntegerValue::I8(value) => Some(value as i128),
35            NcIntegerValue::U8(value) => Some(value as i128),
36            NcIntegerValue::I16(value) => Some(value as i128),
37            NcIntegerValue::U16(value) => Some(value as i128),
38            NcIntegerValue::I32(value) => Some(value as i128),
39            NcIntegerValue::U32(value) => Some(value as i128),
40            NcIntegerValue::I64(value) => Some(value as i128),
41            NcIntegerValue::U64(value) => Some(i128::from(value)),
42        }
43    }
44
45    /// Return the value as `u128` when it is non-negative.
46    pub fn as_u128(self) -> Option<u128> {
47        match self {
48            NcIntegerValue::I8(value) => u128::try_from(value).ok(),
49            NcIntegerValue::U8(value) => Some(value as u128),
50            NcIntegerValue::I16(value) => u128::try_from(value).ok(),
51            NcIntegerValue::U16(value) => Some(value as u128),
52            NcIntegerValue::I32(value) => u128::try_from(value).ok(),
53            NcIntegerValue::U32(value) => Some(value as u128),
54            NcIntegerValue::I64(value) => u128::try_from(value).ok(),
55            NcIntegerValue::U64(value) => Some(value as u128),
56        }
57    }
58}
59
60/// A named member of a NetCDF-4 enum type.
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub struct NcEnumMember {
63    pub name: String,
64    pub value: NcIntegerValue,
65}
66
67/// NetCDF data types.
68#[derive(Debug, Clone, PartialEq, Eq)]
69pub enum NcType {
70    /// NC_BYTE (i8)
71    Byte,
72    /// NC_CHAR (u8/char)
73    Char,
74    /// NC_SHORT (i16)
75    Short,
76    /// NC_INT (i32)
77    Int,
78    /// NC_FLOAT (f32)
79    Float,
80    /// NC_DOUBLE (f64)
81    Double,
82    /// NC_UBYTE (u8, CDF-5)
83    UByte,
84    /// NC_USHORT (u16, CDF-5)
85    UShort,
86    /// NC_UINT (u32, CDF-5)
87    UInt,
88    /// NC_INT64 (i64, CDF-5)
89    Int64,
90    /// NC_UINT64 (u64, CDF-5)
91    UInt64,
92    /// NetCDF-4 only (variable-length string)
93    String,
94    /// NetCDF-4 enum type with an integer base type.
95    Enum {
96        base: Box<NcType>,
97        members: Vec<NcEnumMember>,
98    },
99    /// NetCDF-4 compound type (struct with named fields).
100    Compound {
101        size: u32,
102        fields: Vec<NcCompoundField>,
103    },
104    /// NetCDF-4 opaque type (uninterpreted byte blob).
105    Opaque { size: u32, tag: String },
106    /// NetCDF-4 array type (fixed-size array of a base type).
107    Array { base: Box<NcType>, dims: Vec<u64> },
108    /// NetCDF-4 variable-length type.
109    VLen { base: Box<NcType> },
110}
111
112impl NcType {
113    /// Size of a single element in bytes.
114    pub fn size(&self) -> usize {
115        match self {
116            NcType::Byte | NcType::Char | NcType::UByte => 1,
117            NcType::Short | NcType::UShort => 2,
118            NcType::Int | NcType::UInt | NcType::Float => 4,
119            NcType::Int64 | NcType::UInt64 | NcType::Double => 8,
120            // Variable-length string; no fixed element size, but pointer-sized in memory.
121            NcType::String => std::mem::size_of::<usize>(),
122            NcType::Enum { base, .. } => base.size(),
123            NcType::Compound { size, .. } => *size as usize,
124            NcType::Opaque { size, .. } => *size as usize,
125            NcType::Array { base, dims } => {
126                base.size() * dims.iter().map(|&d| d as usize).product::<usize>()
127            }
128            NcType::VLen { .. } => std::mem::size_of::<usize>(), // pointer-sized
129        }
130    }
131
132    /// The numeric type code used in CDF-1/2/5 headers.
133    pub fn classic_type_code(&self) -> Option<u32> {
134        match self {
135            NcType::Byte => Some(1),
136            NcType::Char => Some(2),
137            NcType::Short => Some(3),
138            NcType::Int => Some(4),
139            NcType::Float => Some(5),
140            NcType::Double => Some(6),
141            NcType::UByte => Some(7),
142            NcType::UShort => Some(8),
143            NcType::UInt => Some(9),
144            NcType::Int64 => Some(10),
145            NcType::UInt64 => Some(11),
146            // Extended types are not valid in classic format.
147            NcType::String
148            | NcType::Enum { .. }
149            | NcType::Compound { .. }
150            | NcType::Opaque { .. }
151            | NcType::Array { .. }
152            | NcType::VLen { .. } => None,
153        }
154    }
155
156    /// Returns true if this is a primitive numeric or string type.
157    pub fn is_primitive(&self) -> bool {
158        matches!(
159            self,
160            NcType::Byte
161                | NcType::Char
162                | NcType::Short
163                | NcType::Int
164                | NcType::Float
165                | NcType::Double
166                | NcType::UByte
167                | NcType::UShort
168                | NcType::UInt
169                | NcType::Int64
170                | NcType::UInt64
171                | NcType::String
172        )
173    }
174}
175
176/// A NetCDF attribute value.
177#[derive(Debug, Clone)]
178pub enum NcAttrValue {
179    Bytes(Vec<i8>),
180    Chars(String),
181    Shorts(Vec<i16>),
182    Ints(Vec<i32>),
183    Floats(Vec<f32>),
184    Doubles(Vec<f64>),
185    UBytes(Vec<u8>),
186    UShorts(Vec<u16>),
187    UInts(Vec<u32>),
188    Int64s(Vec<i64>),
189    UInt64s(Vec<u64>),
190    Strings(Vec<String>),
191}
192
193impl NcAttrValue {
194    /// Get the value as a string (for Chars or single-element Strings).
195    pub fn as_string(&self) -> Option<String> {
196        match self {
197            NcAttrValue::Chars(s) => Some(s.clone()),
198            NcAttrValue::Strings(v) if v.len() == 1 => Some(v[0].clone()),
199            _ => None,
200        }
201    }
202
203    /// Get the value as f64 (with numeric promotion from the first element).
204    pub fn as_f64(&self) -> Option<f64> {
205        match self {
206            NcAttrValue::Bytes(v) => v.first().map(|&x| x as f64),
207            NcAttrValue::Shorts(v) => v.first().map(|&x| x as f64),
208            NcAttrValue::Ints(v) => v.first().map(|&x| x as f64),
209            NcAttrValue::Floats(v) => v.first().map(|&x| x as f64),
210            NcAttrValue::Doubles(v) => v.first().copied(),
211            NcAttrValue::UBytes(v) => v.first().map(|&x| x as f64),
212            NcAttrValue::UShorts(v) => v.first().map(|&x| x as f64),
213            NcAttrValue::UInts(v) => v.first().map(|&x| x as f64),
214            NcAttrValue::Int64s(v) => v.first().map(|&x| x as f64),
215            NcAttrValue::UInt64s(v) => v.first().map(|&x| x as f64),
216            NcAttrValue::Chars(_) | NcAttrValue::Strings(_) => None,
217        }
218    }
219
220    /// Get the value as a vector of f64 (with numeric promotion).
221    pub fn as_f64_vec(&self) -> Option<Vec<f64>> {
222        match self {
223            NcAttrValue::Bytes(v) => Some(v.iter().map(|&x| x as f64).collect()),
224            NcAttrValue::Shorts(v) => Some(v.iter().map(|&x| x as f64).collect()),
225            NcAttrValue::Ints(v) => Some(v.iter().map(|&x| x as f64).collect()),
226            NcAttrValue::Floats(v) => Some(v.iter().map(|&x| x as f64).collect()),
227            NcAttrValue::Doubles(v) => Some(v.clone()),
228            NcAttrValue::UBytes(v) => Some(v.iter().map(|&x| x as f64).collect()),
229            NcAttrValue::UShorts(v) => Some(v.iter().map(|&x| x as f64).collect()),
230            NcAttrValue::UInts(v) => Some(v.iter().map(|&x| x as f64).collect()),
231            NcAttrValue::Int64s(v) => Some(v.iter().map(|&x| x as f64).collect()),
232            NcAttrValue::UInt64s(v) => Some(v.iter().map(|&x| x as f64).collect()),
233            NcAttrValue::Chars(_) | NcAttrValue::Strings(_) => None,
234        }
235    }
236}
237
238/// A NetCDF attribute.
239#[derive(Debug, Clone)]
240pub struct NcAttribute {
241    pub name: String,
242    pub value: NcAttrValue,
243}
244
245/// A NetCDF variable (metadata only -- data is read on demand).
246#[derive(Debug, Clone)]
247pub struct NcVariable {
248    pub name: String,
249    pub dimensions: Vec<NcDimension>,
250    pub dtype: NcType,
251    pub attributes: Vec<NcAttribute>,
252    /// For classic: file byte offset to the start of this variable's data.
253    /// For nc4: HDF5 dataset object header address.
254    pub(crate) data_offset: u64,
255    /// Total data size in bytes (for non-record variables).
256    pub(crate) _data_size: u64,
257    /// Whether this variable uses the unlimited (record) dimension.
258    pub(crate) is_record_var: bool,
259    /// Size of one record slice in bytes (only meaningful for record variables).
260    pub(crate) record_size: u64,
261}
262
263impl NcVariable {
264    /// Variable name.
265    pub fn name(&self) -> &str {
266        &self.name
267    }
268
269    /// Variable dimensions.
270    pub fn dimensions(&self) -> &[NcDimension] {
271        &self.dimensions
272    }
273
274    /// Returns the dimension for a CF/NetCDF coordinate variable.
275    ///
276    /// A coordinate variable is one-dimensional and has the same name as its
277    /// dimension. NetCDF-4 stores these as HDF5 dimension scales, but they are
278    /// exposed here with the same shape as classic NetCDF coordinate variables.
279    pub fn coordinate_dimension(&self) -> Option<&NcDimension> {
280        match self.dimensions.as_slice() {
281            [dim] if dim.name == self.name => Some(dim),
282            _ => None,
283        }
284    }
285
286    /// Returns true when this variable is a CF/NetCDF coordinate variable.
287    pub fn is_coordinate_variable(&self) -> bool {
288        self.coordinate_dimension().is_some()
289    }
290
291    /// Returns true when this variable is the coordinate variable for a named
292    /// dimension.
293    pub fn is_coordinate_variable_for(&self, dimension_name: &str) -> bool {
294        self.coordinate_dimension()
295            .is_some_and(|dim| dim.name == dimension_name)
296    }
297
298    /// Variable data type.
299    pub fn dtype(&self) -> &NcType {
300        &self.dtype
301    }
302
303    /// Shape of the variable as a vector of dimension sizes.
304    pub fn shape(&self) -> Vec<u64> {
305        self.dimensions.iter().map(|d| d.size).collect()
306    }
307
308    /// Variable attributes.
309    pub fn attributes(&self) -> &[NcAttribute] {
310        &self.attributes
311    }
312
313    /// Find an attribute by name.
314    pub fn attribute(&self, name: &str) -> Option<&NcAttribute> {
315        self.attributes.iter().find(|a| a.name == name)
316    }
317
318    /// Number of dimensions.
319    pub fn ndim(&self) -> usize {
320        self.dimensions.len()
321    }
322
323    /// Total number of elements.
324    pub fn num_elements(&self) -> u64 {
325        if self.dimensions.is_empty() {
326            return 1; // scalar
327        }
328        self.dimensions.iter().map(|d| d.size).product()
329    }
330}
331
332/// A NetCDF group (NetCDF-4 only; classic files have one implicit root group).
333#[derive(Debug, Clone)]
334pub struct NcGroup {
335    pub name: String,
336    pub dimensions: Vec<NcDimension>,
337    pub variables: Vec<NcVariable>,
338    pub attributes: Vec<NcAttribute>,
339    pub groups: Vec<NcGroup>,
340}
341
342impl NcGroup {
343    /// Find a variable by name in this group.
344    pub fn variable(&self, name: &str) -> Option<&NcVariable> {
345        let (group_path, variable_name) = split_parent_path(name)?;
346        let group = self.group(group_path)?;
347        group.variables.iter().find(|v| v.name == variable_name)
348    }
349
350    /// Find a dimension by name in this group.
351    pub fn dimension(&self, name: &str) -> Option<&NcDimension> {
352        let (group_path, dimension_name) = split_parent_path(name)?;
353        let group = self.group(group_path)?;
354        group.dimensions.iter().find(|d| d.name == dimension_name)
355    }
356
357    /// Find the coordinate variable for a dimension in this group.
358    ///
359    /// `name` may be a local dimension name or a path relative to this group,
360    /// for example `time` or `forecast/time`.
361    pub fn coordinate_variable(&self, name: &str) -> Option<&NcVariable> {
362        let (group_path, dimension_name) = split_parent_path(name)?;
363        let group = self.group(group_path)?;
364        group
365            .variables
366            .iter()
367            .find(|var| var.is_coordinate_variable_for(dimension_name))
368    }
369
370    /// Iterate over coordinate variables declared in this group.
371    pub fn coordinate_variables(&self) -> impl Iterator<Item = &NcVariable> {
372        self.variables
373            .iter()
374            .filter(|var| var.is_coordinate_variable())
375    }
376
377    /// Find an attribute by name in this group.
378    pub fn attribute(&self, name: &str) -> Option<&NcAttribute> {
379        let (group_path, attribute_name) = split_parent_path(name)?;
380        let group = self.group(group_path)?;
381        group.attributes.iter().find(|a| a.name == attribute_name)
382    }
383
384    /// Find a child group by relative path.
385    pub fn group(&self, name: &str) -> Option<&NcGroup> {
386        let trimmed = name.trim_matches('/');
387        if trimmed.is_empty() {
388            return Some(self);
389        }
390
391        let mut group = self;
392        for component in trimmed.split('/').filter(|part| !part.is_empty()) {
393            group = group.groups.iter().find(|child| child.name == component)?;
394        }
395
396        Some(group)
397    }
398}
399
400fn split_parent_path(path: &str) -> Option<(&str, &str)> {
401    let trimmed = path.trim_matches('/');
402    if trimmed.is_empty() {
403        return None;
404    }
405
406    match trimmed.rsplit_once('/') {
407        Some((group_path, leaf_name)) if !leaf_name.is_empty() => Some((group_path, leaf_name)),
408        Some(_) => None,
409        None => Some(("", trimmed)),
410    }
411}
412
413pub(crate) fn checked_usize_from_u64(value: u64, context: &str) -> crate::Result<usize> {
414    usize::try_from(value)
415        .map_err(|_| crate::Error::InvalidData(format!("{context} exceeds platform usize")))
416}
417
418pub(crate) fn checked_mul_u64(lhs: u64, rhs: u64, context: &str) -> crate::Result<u64> {
419    lhs.checked_mul(rhs)
420        .ok_or_else(|| crate::Error::InvalidData(format!("{context} exceeds u64 capacity")))
421}
422
423pub(crate) fn checked_shape_elements(shape: &[u64], context: &str) -> crate::Result<u64> {
424    shape
425        .iter()
426        .try_fold(1u64, |acc, &dim| checked_mul_u64(acc, dim, context))
427}
428
429/// Hyperslab selection for reading slices of NetCDF variables.
430///
431/// Each element corresponds to one dimension of the variable.
432#[derive(Debug, Clone)]
433pub struct NcSliceInfo {
434    pub selections: Vec<NcSliceInfoElem>,
435}
436
437/// A single dimension's selection within a hyperslab.
438#[derive(Debug, Clone)]
439pub enum NcSliceInfoElem {
440    /// Select a single index (reduces dimensionality).
441    Index(u64),
442    /// Select a range with stride.
443    Slice { start: u64, end: u64, step: u64 },
444}
445
446impl NcSliceInfo {
447    /// Create a selection that reads everything for an `ndim`-dimensional variable.
448    pub fn all(ndim: usize) -> Self {
449        NcSliceInfo {
450            selections: vec![
451                NcSliceInfoElem::Slice {
452                    start: 0,
453                    end: u64::MAX,
454                    step: 1,
455                };
456                ndim
457            ],
458        }
459    }
460}
461
462#[cfg(feature = "netcdf4")]
463impl NcSliceInfo {
464    /// Convert to hdf5_reader::SliceInfo for NC4 delegation.
465    pub(crate) fn to_hdf5_slice_info(&self) -> hdf5_reader::SliceInfo {
466        hdf5_reader::SliceInfo {
467            selections: self
468                .selections
469                .iter()
470                .map(|s| match s {
471                    NcSliceInfoElem::Index(idx) => hdf5_reader::SliceInfoElem::Index(*idx),
472                    NcSliceInfoElem::Slice { start, end, step } => {
473                        hdf5_reader::SliceInfoElem::Slice {
474                            start: *start,
475                            end: *end,
476                            step: *step,
477                        }
478                    }
479                })
480                .collect(),
481        }
482    }
483}
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488
489    fn sample_group_tree() -> NcGroup {
490        NcGroup {
491            name: "/".to_string(),
492            dimensions: vec![NcDimension {
493                name: "root_dim".to_string(),
494                size: 2,
495                is_unlimited: false,
496            }],
497            variables: vec![NcVariable {
498                name: "root_var".to_string(),
499                dimensions: vec![],
500                dtype: NcType::Int,
501                attributes: vec![],
502                data_offset: 0,
503                _data_size: 0,
504                is_record_var: false,
505                record_size: 4,
506            }],
507            attributes: vec![NcAttribute {
508                name: "title".to_string(),
509                value: NcAttrValue::Chars("root".to_string()),
510            }],
511            groups: vec![NcGroup {
512                name: "obs".to_string(),
513                dimensions: vec![NcDimension {
514                    name: "time".to_string(),
515                    size: 3,
516                    is_unlimited: false,
517                }],
518                variables: vec![NcVariable {
519                    name: "temperature".to_string(),
520                    dimensions: vec![],
521                    dtype: NcType::Float,
522                    attributes: vec![],
523                    data_offset: 0,
524                    _data_size: 0,
525                    is_record_var: false,
526                    record_size: 4,
527                }],
528                attributes: vec![],
529                groups: vec![NcGroup {
530                    name: "surface".to_string(),
531                    dimensions: vec![],
532                    variables: vec![NcVariable {
533                        name: "pressure".to_string(),
534                        dimensions: vec![],
535                        dtype: NcType::Double,
536                        attributes: vec![],
537                        data_offset: 0,
538                        _data_size: 0,
539                        is_record_var: false,
540                        record_size: 8,
541                    }],
542                    attributes: vec![NcAttribute {
543                        name: "units".to_string(),
544                        value: NcAttrValue::Chars("hPa".to_string()),
545                    }],
546                    groups: vec![],
547                }],
548            }],
549        }
550    }
551
552    #[test]
553    fn test_group_path_lookup() {
554        let root = sample_group_tree();
555
556        let surface = root.group("obs/surface").unwrap();
557        assert_eq!(surface.name, "surface");
558        assert!(root.group("/obs/surface").is_some());
559        assert!(root.group("missing").is_none());
560    }
561
562    #[test]
563    fn test_variable_path_lookup() {
564        let root = sample_group_tree();
565
566        assert_eq!(root.variable("root_var").unwrap().name(), "root_var");
567        assert_eq!(
568            root.variable("obs/temperature").unwrap().dtype(),
569            &NcType::Float
570        );
571        assert_eq!(
572            root.variable("/obs/surface/pressure").unwrap().dtype(),
573            &NcType::Double
574        );
575        assert!(root.variable("pressure").is_none());
576    }
577
578    #[test]
579    fn test_dimension_and_attribute_path_lookup() {
580        let root = sample_group_tree();
581
582        assert_eq!(root.dimension("root_dim").unwrap().size, 2);
583        assert_eq!(root.dimension("obs/time").unwrap().size, 3);
584        assert_eq!(
585            root.attribute("title").unwrap().value.as_string().unwrap(),
586            "root"
587        );
588        assert_eq!(
589            root.attribute("obs/surface/units")
590                .unwrap()
591                .value
592                .as_string()
593                .unwrap(),
594            "hPa"
595        );
596    }
597
598    #[test]
599    fn test_coordinate_variable_detection_and_lookup() {
600        let time_dim = NcDimension {
601            name: "time".to_string(),
602            size: 3,
603            is_unlimited: false,
604        };
605        let lat_dim = NcDimension {
606            name: "lat".to_string(),
607            size: 2,
608            is_unlimited: false,
609        };
610        let time = NcVariable {
611            name: "time".to_string(),
612            dimensions: vec![time_dim.clone()],
613            dtype: NcType::Double,
614            attributes: vec![],
615            data_offset: 0,
616            _data_size: 0,
617            is_record_var: false,
618            record_size: 8,
619        };
620        let temperature = NcVariable {
621            name: "temperature".to_string(),
622            dimensions: vec![time_dim.clone(), lat_dim.clone()],
623            dtype: NcType::Float,
624            attributes: vec![],
625            data_offset: 0,
626            _data_size: 0,
627            is_record_var: false,
628            record_size: 4,
629        };
630        let group = NcGroup {
631            name: "/".to_string(),
632            dimensions: vec![time_dim, lat_dim],
633            variables: vec![time.clone(), temperature],
634            attributes: vec![],
635            groups: vec![],
636        };
637
638        assert!(time.is_coordinate_variable());
639        assert_eq!(time.coordinate_dimension().unwrap().name, "time");
640        assert_eq!(group.coordinate_variable("time").unwrap().name(), "time");
641        assert!(group.coordinate_variable("lat").is_none());
642
643        let names: Vec<&str> = group.coordinate_variables().map(NcVariable::name).collect();
644        assert_eq!(names, vec!["time"]);
645    }
646
647    #[test]
648    fn test_checked_shape_elements_overflow() {
649        let err = checked_shape_elements(&[u64::MAX, 2], "test overflow").unwrap_err();
650        assert!(matches!(err, crate::Error::InvalidData(_)));
651    }
652}