Skip to main content

netcdf_reader/
types.rs

1/// A NetCDF dimension.
2#[derive(Debug, Clone)]
3pub struct NcDimension {
4    pub name: String,
5    pub size: u64,
6    pub is_unlimited: bool,
7}
8
9/// A field within a compound (struct) type.
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct NcCompoundField {
12    pub name: String,
13    pub offset: u64,
14    pub dtype: NcType,
15}
16
17/// NetCDF data types.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum NcType {
20    /// NC_BYTE (i8)
21    Byte,
22    /// NC_CHAR (u8/char)
23    Char,
24    /// NC_SHORT (i16)
25    Short,
26    /// NC_INT (i32)
27    Int,
28    /// NC_FLOAT (f32)
29    Float,
30    /// NC_DOUBLE (f64)
31    Double,
32    /// NC_UBYTE (u8, CDF-5)
33    UByte,
34    /// NC_USHORT (u16, CDF-5)
35    UShort,
36    /// NC_UINT (u32, CDF-5)
37    UInt,
38    /// NC_INT64 (i64, CDF-5)
39    Int64,
40    /// NC_UINT64 (u64, CDF-5)
41    UInt64,
42    /// NetCDF-4 only (variable-length string)
43    String,
44    /// NetCDF-4 compound type (struct with named fields).
45    Compound {
46        size: u32,
47        fields: Vec<NcCompoundField>,
48    },
49    /// NetCDF-4 opaque type (uninterpreted byte blob).
50    Opaque { size: u32, tag: String },
51    /// NetCDF-4 array type (fixed-size array of a base type).
52    Array { base: Box<NcType>, dims: Vec<u64> },
53    /// NetCDF-4 variable-length type.
54    VLen { base: Box<NcType> },
55}
56
57impl NcType {
58    /// Size of a single element in bytes.
59    pub fn size(&self) -> usize {
60        match self {
61            NcType::Byte | NcType::Char | NcType::UByte => 1,
62            NcType::Short | NcType::UShort => 2,
63            NcType::Int | NcType::UInt | NcType::Float => 4,
64            NcType::Int64 | NcType::UInt64 | NcType::Double => 8,
65            // Variable-length string; no fixed element size, but pointer-sized in memory.
66            NcType::String => std::mem::size_of::<usize>(),
67            NcType::Compound { size, .. } => *size as usize,
68            NcType::Opaque { size, .. } => *size as usize,
69            NcType::Array { base, dims } => {
70                base.size() * dims.iter().map(|&d| d as usize).product::<usize>()
71            }
72            NcType::VLen { .. } => std::mem::size_of::<usize>(), // pointer-sized
73        }
74    }
75
76    /// The numeric type code used in CDF-1/2/5 headers.
77    pub fn classic_type_code(&self) -> Option<u32> {
78        match self {
79            NcType::Byte => Some(1),
80            NcType::Char => Some(2),
81            NcType::Short => Some(3),
82            NcType::Int => Some(4),
83            NcType::Float => Some(5),
84            NcType::Double => Some(6),
85            NcType::UByte => Some(7),
86            NcType::UShort => Some(8),
87            NcType::UInt => Some(9),
88            NcType::Int64 => Some(10),
89            NcType::UInt64 => Some(11),
90            // Extended types are not valid in classic format.
91            NcType::String
92            | NcType::Compound { .. }
93            | NcType::Opaque { .. }
94            | NcType::Array { .. }
95            | NcType::VLen { .. } => None,
96        }
97    }
98
99    /// Returns true if this is a primitive numeric or string type.
100    pub fn is_primitive(&self) -> bool {
101        matches!(
102            self,
103            NcType::Byte
104                | NcType::Char
105                | NcType::Short
106                | NcType::Int
107                | NcType::Float
108                | NcType::Double
109                | NcType::UByte
110                | NcType::UShort
111                | NcType::UInt
112                | NcType::Int64
113                | NcType::UInt64
114                | NcType::String
115        )
116    }
117}
118
119/// A NetCDF attribute value.
120#[derive(Debug, Clone)]
121pub enum NcAttrValue {
122    Bytes(Vec<i8>),
123    Chars(String),
124    Shorts(Vec<i16>),
125    Ints(Vec<i32>),
126    Floats(Vec<f32>),
127    Doubles(Vec<f64>),
128    UBytes(Vec<u8>),
129    UShorts(Vec<u16>),
130    UInts(Vec<u32>),
131    Int64s(Vec<i64>),
132    UInt64s(Vec<u64>),
133    Strings(Vec<String>),
134}
135
136impl NcAttrValue {
137    /// Get the value as a string (for Chars or single-element Strings).
138    pub fn as_string(&self) -> Option<String> {
139        match self {
140            NcAttrValue::Chars(s) => Some(s.clone()),
141            NcAttrValue::Strings(v) if v.len() == 1 => Some(v[0].clone()),
142            _ => None,
143        }
144    }
145
146    /// Get the value as f64 (with numeric promotion from the first element).
147    pub fn as_f64(&self) -> Option<f64> {
148        match self {
149            NcAttrValue::Bytes(v) => v.first().map(|&x| x as f64),
150            NcAttrValue::Shorts(v) => v.first().map(|&x| x as f64),
151            NcAttrValue::Ints(v) => v.first().map(|&x| x as f64),
152            NcAttrValue::Floats(v) => v.first().map(|&x| x as f64),
153            NcAttrValue::Doubles(v) => v.first().copied(),
154            NcAttrValue::UBytes(v) => v.first().map(|&x| x as f64),
155            NcAttrValue::UShorts(v) => v.first().map(|&x| x as f64),
156            NcAttrValue::UInts(v) => v.first().map(|&x| x as f64),
157            NcAttrValue::Int64s(v) => v.first().map(|&x| x as f64),
158            NcAttrValue::UInt64s(v) => v.first().map(|&x| x as f64),
159            NcAttrValue::Chars(_) | NcAttrValue::Strings(_) => None,
160        }
161    }
162
163    /// Get the value as a vector of f64 (with numeric promotion).
164    pub fn as_f64_vec(&self) -> Option<Vec<f64>> {
165        match self {
166            NcAttrValue::Bytes(v) => Some(v.iter().map(|&x| x as f64).collect()),
167            NcAttrValue::Shorts(v) => Some(v.iter().map(|&x| x as f64).collect()),
168            NcAttrValue::Ints(v) => Some(v.iter().map(|&x| x as f64).collect()),
169            NcAttrValue::Floats(v) => Some(v.iter().map(|&x| x as f64).collect()),
170            NcAttrValue::Doubles(v) => Some(v.clone()),
171            NcAttrValue::UBytes(v) => Some(v.iter().map(|&x| x as f64).collect()),
172            NcAttrValue::UShorts(v) => Some(v.iter().map(|&x| x as f64).collect()),
173            NcAttrValue::UInts(v) => Some(v.iter().map(|&x| x as f64).collect()),
174            NcAttrValue::Int64s(v) => Some(v.iter().map(|&x| x as f64).collect()),
175            NcAttrValue::UInt64s(v) => Some(v.iter().map(|&x| x as f64).collect()),
176            NcAttrValue::Chars(_) | NcAttrValue::Strings(_) => None,
177        }
178    }
179}
180
181/// A NetCDF attribute.
182#[derive(Debug, Clone)]
183pub struct NcAttribute {
184    pub name: String,
185    pub value: NcAttrValue,
186}
187
188/// A NetCDF variable (metadata only -- data is read on demand).
189#[derive(Debug, Clone)]
190pub struct NcVariable {
191    pub name: String,
192    pub dimensions: Vec<NcDimension>,
193    pub dtype: NcType,
194    pub attributes: Vec<NcAttribute>,
195    /// For classic: file byte offset to the start of this variable's data.
196    /// For nc4: HDF5 dataset object header address.
197    pub(crate) data_offset: u64,
198    /// Total data size in bytes (for non-record variables).
199    pub(crate) _data_size: u64,
200    /// Whether this variable uses the unlimited (record) dimension.
201    pub(crate) is_record_var: bool,
202    /// Size of one record slice in bytes (only meaningful for record variables).
203    pub(crate) record_size: u64,
204}
205
206impl NcVariable {
207    /// Variable name.
208    pub fn name(&self) -> &str {
209        &self.name
210    }
211
212    /// Variable dimensions.
213    pub fn dimensions(&self) -> &[NcDimension] {
214        &self.dimensions
215    }
216
217    /// Returns the dimension for a CF/NetCDF coordinate variable.
218    ///
219    /// A coordinate variable is one-dimensional and has the same name as its
220    /// dimension. NetCDF-4 stores these as HDF5 dimension scales, but they are
221    /// exposed here with the same shape as classic NetCDF coordinate variables.
222    pub fn coordinate_dimension(&self) -> Option<&NcDimension> {
223        match self.dimensions.as_slice() {
224            [dim] if dim.name == self.name => Some(dim),
225            _ => None,
226        }
227    }
228
229    /// Returns true when this variable is a CF/NetCDF coordinate variable.
230    pub fn is_coordinate_variable(&self) -> bool {
231        self.coordinate_dimension().is_some()
232    }
233
234    /// Returns true when this variable is the coordinate variable for a named
235    /// dimension.
236    pub fn is_coordinate_variable_for(&self, dimension_name: &str) -> bool {
237        self.coordinate_dimension()
238            .is_some_and(|dim| dim.name == dimension_name)
239    }
240
241    /// Variable data type.
242    pub fn dtype(&self) -> &NcType {
243        &self.dtype
244    }
245
246    /// Shape of the variable as a vector of dimension sizes.
247    pub fn shape(&self) -> Vec<u64> {
248        self.dimensions.iter().map(|d| d.size).collect()
249    }
250
251    /// Variable attributes.
252    pub fn attributes(&self) -> &[NcAttribute] {
253        &self.attributes
254    }
255
256    /// Find an attribute by name.
257    pub fn attribute(&self, name: &str) -> Option<&NcAttribute> {
258        self.attributes.iter().find(|a| a.name == name)
259    }
260
261    /// Number of dimensions.
262    pub fn ndim(&self) -> usize {
263        self.dimensions.len()
264    }
265
266    /// Total number of elements.
267    pub fn num_elements(&self) -> u64 {
268        if self.dimensions.is_empty() {
269            return 1; // scalar
270        }
271        self.dimensions.iter().map(|d| d.size).product()
272    }
273
274    pub(crate) fn checked_num_elements(&self) -> crate::Result<u64> {
275        checked_shape_elements(&self.shape(), "variable element count")
276    }
277}
278
279/// A NetCDF group (NetCDF-4 only; classic files have one implicit root group).
280#[derive(Debug, Clone)]
281pub struct NcGroup {
282    pub name: String,
283    pub dimensions: Vec<NcDimension>,
284    pub variables: Vec<NcVariable>,
285    pub attributes: Vec<NcAttribute>,
286    pub groups: Vec<NcGroup>,
287}
288
289impl NcGroup {
290    /// Find a variable by name in this group.
291    pub fn variable(&self, name: &str) -> Option<&NcVariable> {
292        let (group_path, variable_name) = split_parent_path(name)?;
293        let group = self.group(group_path)?;
294        group.variables.iter().find(|v| v.name == variable_name)
295    }
296
297    /// Find a dimension by name in this group.
298    pub fn dimension(&self, name: &str) -> Option<&NcDimension> {
299        let (group_path, dimension_name) = split_parent_path(name)?;
300        let group = self.group(group_path)?;
301        group.dimensions.iter().find(|d| d.name == dimension_name)
302    }
303
304    /// Find the coordinate variable for a dimension in this group.
305    ///
306    /// `name` may be a local dimension name or a path relative to this group,
307    /// for example `time` or `forecast/time`.
308    pub fn coordinate_variable(&self, name: &str) -> Option<&NcVariable> {
309        let (group_path, dimension_name) = split_parent_path(name)?;
310        let group = self.group(group_path)?;
311        group
312            .variables
313            .iter()
314            .find(|var| var.is_coordinate_variable_for(dimension_name))
315    }
316
317    /// Iterate over coordinate variables declared in this group.
318    pub fn coordinate_variables(&self) -> impl Iterator<Item = &NcVariable> {
319        self.variables
320            .iter()
321            .filter(|var| var.is_coordinate_variable())
322    }
323
324    /// Find an attribute by name in this group.
325    pub fn attribute(&self, name: &str) -> Option<&NcAttribute> {
326        let (group_path, attribute_name) = split_parent_path(name)?;
327        let group = self.group(group_path)?;
328        group.attributes.iter().find(|a| a.name == attribute_name)
329    }
330
331    /// Find a child group by relative path.
332    pub fn group(&self, name: &str) -> Option<&NcGroup> {
333        let trimmed = name.trim_matches('/');
334        if trimmed.is_empty() {
335            return Some(self);
336        }
337
338        let mut group = self;
339        for component in trimmed.split('/').filter(|part| !part.is_empty()) {
340            group = group.groups.iter().find(|child| child.name == component)?;
341        }
342
343        Some(group)
344    }
345}
346
347fn split_parent_path(path: &str) -> Option<(&str, &str)> {
348    let trimmed = path.trim_matches('/');
349    if trimmed.is_empty() {
350        return None;
351    }
352
353    match trimmed.rsplit_once('/') {
354        Some((group_path, leaf_name)) if !leaf_name.is_empty() => Some((group_path, leaf_name)),
355        Some(_) => None,
356        None => Some(("", trimmed)),
357    }
358}
359
360pub(crate) fn checked_usize_from_u64(value: u64, context: &str) -> crate::Result<usize> {
361    usize::try_from(value)
362        .map_err(|_| crate::Error::InvalidData(format!("{context} exceeds platform usize")))
363}
364
365pub(crate) fn checked_mul_u64(lhs: u64, rhs: u64, context: &str) -> crate::Result<u64> {
366    lhs.checked_mul(rhs)
367        .ok_or_else(|| crate::Error::InvalidData(format!("{context} exceeds u64 capacity")))
368}
369
370pub(crate) fn checked_shape_elements(shape: &[u64], context: &str) -> crate::Result<u64> {
371    shape
372        .iter()
373        .try_fold(1u64, |acc, &dim| checked_mul_u64(acc, dim, context))
374}
375
376/// Hyperslab selection for reading slices of NetCDF variables.
377///
378/// Each element corresponds to one dimension of the variable.
379#[derive(Debug, Clone)]
380pub struct NcSliceInfo {
381    pub selections: Vec<NcSliceInfoElem>,
382}
383
384/// A single dimension's selection within a hyperslab.
385#[derive(Debug, Clone)]
386pub enum NcSliceInfoElem {
387    /// Select a single index (reduces dimensionality).
388    Index(u64),
389    /// Select a range with stride.
390    Slice { start: u64, end: u64, step: u64 },
391}
392
393impl NcSliceInfo {
394    /// Create a selection that reads everything for an `ndim`-dimensional variable.
395    pub fn all(ndim: usize) -> Self {
396        NcSliceInfo {
397            selections: vec![
398                NcSliceInfoElem::Slice {
399                    start: 0,
400                    end: u64::MAX,
401                    step: 1,
402                };
403                ndim
404            ],
405        }
406    }
407}
408
409#[cfg(feature = "netcdf4")]
410impl NcSliceInfo {
411    /// Convert to hdf5_reader::SliceInfo for NC4 delegation.
412    pub(crate) fn to_hdf5_slice_info(&self) -> hdf5_reader::SliceInfo {
413        hdf5_reader::SliceInfo {
414            selections: self
415                .selections
416                .iter()
417                .map(|s| match s {
418                    NcSliceInfoElem::Index(idx) => hdf5_reader::SliceInfoElem::Index(*idx),
419                    NcSliceInfoElem::Slice { start, end, step } => {
420                        hdf5_reader::SliceInfoElem::Slice {
421                            start: *start,
422                            end: *end,
423                            step: *step,
424                        }
425                    }
426                })
427                .collect(),
428        }
429    }
430}
431
432#[cfg(test)]
433mod tests {
434    use super::*;
435
436    fn sample_group_tree() -> NcGroup {
437        NcGroup {
438            name: "/".to_string(),
439            dimensions: vec![NcDimension {
440                name: "root_dim".to_string(),
441                size: 2,
442                is_unlimited: false,
443            }],
444            variables: vec![NcVariable {
445                name: "root_var".to_string(),
446                dimensions: vec![],
447                dtype: NcType::Int,
448                attributes: vec![],
449                data_offset: 0,
450                _data_size: 0,
451                is_record_var: false,
452                record_size: 4,
453            }],
454            attributes: vec![NcAttribute {
455                name: "title".to_string(),
456                value: NcAttrValue::Chars("root".to_string()),
457            }],
458            groups: vec![NcGroup {
459                name: "obs".to_string(),
460                dimensions: vec![NcDimension {
461                    name: "time".to_string(),
462                    size: 3,
463                    is_unlimited: false,
464                }],
465                variables: vec![NcVariable {
466                    name: "temperature".to_string(),
467                    dimensions: vec![],
468                    dtype: NcType::Float,
469                    attributes: vec![],
470                    data_offset: 0,
471                    _data_size: 0,
472                    is_record_var: false,
473                    record_size: 4,
474                }],
475                attributes: vec![],
476                groups: vec![NcGroup {
477                    name: "surface".to_string(),
478                    dimensions: vec![],
479                    variables: vec![NcVariable {
480                        name: "pressure".to_string(),
481                        dimensions: vec![],
482                        dtype: NcType::Double,
483                        attributes: vec![],
484                        data_offset: 0,
485                        _data_size: 0,
486                        is_record_var: false,
487                        record_size: 8,
488                    }],
489                    attributes: vec![NcAttribute {
490                        name: "units".to_string(),
491                        value: NcAttrValue::Chars("hPa".to_string()),
492                    }],
493                    groups: vec![],
494                }],
495            }],
496        }
497    }
498
499    #[test]
500    fn test_group_path_lookup() {
501        let root = sample_group_tree();
502
503        let surface = root.group("obs/surface").unwrap();
504        assert_eq!(surface.name, "surface");
505        assert!(root.group("/obs/surface").is_some());
506        assert!(root.group("missing").is_none());
507    }
508
509    #[test]
510    fn test_variable_path_lookup() {
511        let root = sample_group_tree();
512
513        assert_eq!(root.variable("root_var").unwrap().name(), "root_var");
514        assert_eq!(
515            root.variable("obs/temperature").unwrap().dtype(),
516            &NcType::Float
517        );
518        assert_eq!(
519            root.variable("/obs/surface/pressure").unwrap().dtype(),
520            &NcType::Double
521        );
522        assert!(root.variable("pressure").is_none());
523    }
524
525    #[test]
526    fn test_dimension_and_attribute_path_lookup() {
527        let root = sample_group_tree();
528
529        assert_eq!(root.dimension("root_dim").unwrap().size, 2);
530        assert_eq!(root.dimension("obs/time").unwrap().size, 3);
531        assert_eq!(
532            root.attribute("title").unwrap().value.as_string().unwrap(),
533            "root"
534        );
535        assert_eq!(
536            root.attribute("obs/surface/units")
537                .unwrap()
538                .value
539                .as_string()
540                .unwrap(),
541            "hPa"
542        );
543    }
544
545    #[test]
546    fn test_coordinate_variable_detection_and_lookup() {
547        let time_dim = NcDimension {
548            name: "time".to_string(),
549            size: 3,
550            is_unlimited: false,
551        };
552        let lat_dim = NcDimension {
553            name: "lat".to_string(),
554            size: 2,
555            is_unlimited: false,
556        };
557        let time = NcVariable {
558            name: "time".to_string(),
559            dimensions: vec![time_dim.clone()],
560            dtype: NcType::Double,
561            attributes: vec![],
562            data_offset: 0,
563            _data_size: 0,
564            is_record_var: false,
565            record_size: 8,
566        };
567        let temperature = NcVariable {
568            name: "temperature".to_string(),
569            dimensions: vec![time_dim.clone(), lat_dim.clone()],
570            dtype: NcType::Float,
571            attributes: vec![],
572            data_offset: 0,
573            _data_size: 0,
574            is_record_var: false,
575            record_size: 4,
576        };
577        let group = NcGroup {
578            name: "/".to_string(),
579            dimensions: vec![time_dim, lat_dim],
580            variables: vec![time.clone(), temperature],
581            attributes: vec![],
582            groups: vec![],
583        };
584
585        assert!(time.is_coordinate_variable());
586        assert_eq!(time.coordinate_dimension().unwrap().name, "time");
587        assert_eq!(group.coordinate_variable("time").unwrap().name(), "time");
588        assert!(group.coordinate_variable("lat").is_none());
589
590        let names: Vec<&str> = group.coordinate_variables().map(NcVariable::name).collect();
591        assert_eq!(names, vec!["time"]);
592    }
593
594    #[test]
595    fn test_checked_shape_elements_overflow() {
596        let err = checked_shape_elements(&[u64::MAX, 2], "test overflow").unwrap_err();
597        assert!(matches!(err, crate::Error::InvalidData(_)));
598    }
599}