Skip to main content

netcdf_reader/
types.rs

1/// A NetCDF dimension.
2#[derive(Debug, Clone)]
3pub struct NcDimension {
4    pub name: String,
5    pub size: u64,
6    pub is_unlimited: bool,
7}
8
9/// A field within a compound (struct) type.
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct NcCompoundField {
12    pub name: String,
13    pub offset: u64,
14    pub dtype: NcType,
15}
16
17/// NetCDF data types.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum NcType {
20    /// NC_BYTE (i8)
21    Byte,
22    /// NC_CHAR (u8/char)
23    Char,
24    /// NC_SHORT (i16)
25    Short,
26    /// NC_INT (i32)
27    Int,
28    /// NC_FLOAT (f32)
29    Float,
30    /// NC_DOUBLE (f64)
31    Double,
32    /// NC_UBYTE (u8, CDF-5)
33    UByte,
34    /// NC_USHORT (u16, CDF-5)
35    UShort,
36    /// NC_UINT (u32, CDF-5)
37    UInt,
38    /// NC_INT64 (i64, CDF-5)
39    Int64,
40    /// NC_UINT64 (u64, CDF-5)
41    UInt64,
42    /// NetCDF-4 only (variable-length string)
43    String,
44    /// NetCDF-4 compound type (struct with named fields).
45    Compound {
46        size: u32,
47        fields: Vec<NcCompoundField>,
48    },
49    /// NetCDF-4 opaque type (uninterpreted byte blob).
50    Opaque { size: u32, tag: String },
51    /// NetCDF-4 array type (fixed-size array of a base type).
52    Array { base: Box<NcType>, dims: Vec<u64> },
53    /// NetCDF-4 variable-length type.
54    VLen { base: Box<NcType> },
55}
56
57impl NcType {
58    /// Size of a single element in bytes.
59    pub fn size(&self) -> usize {
60        match self {
61            NcType::Byte | NcType::Char | NcType::UByte => 1,
62            NcType::Short | NcType::UShort => 2,
63            NcType::Int | NcType::UInt | NcType::Float => 4,
64            NcType::Int64 | NcType::UInt64 | NcType::Double => 8,
65            // Variable-length string; no fixed element size, but pointer-sized in memory.
66            NcType::String => std::mem::size_of::<usize>(),
67            NcType::Compound { size, .. } => *size as usize,
68            NcType::Opaque { size, .. } => *size as usize,
69            NcType::Array { base, dims } => {
70                base.size() * dims.iter().map(|&d| d as usize).product::<usize>()
71            }
72            NcType::VLen { .. } => std::mem::size_of::<usize>(), // pointer-sized
73        }
74    }
75
76    /// The numeric type code used in CDF-1/2/5 headers.
77    pub fn classic_type_code(&self) -> Option<u32> {
78        match self {
79            NcType::Byte => Some(1),
80            NcType::Char => Some(2),
81            NcType::Short => Some(3),
82            NcType::Int => Some(4),
83            NcType::Float => Some(5),
84            NcType::Double => Some(6),
85            NcType::UByte => Some(7),
86            NcType::UShort => Some(8),
87            NcType::UInt => Some(9),
88            NcType::Int64 => Some(10),
89            NcType::UInt64 => Some(11),
90            // Extended types are not valid in classic format.
91            NcType::String
92            | NcType::Compound { .. }
93            | NcType::Opaque { .. }
94            | NcType::Array { .. }
95            | NcType::VLen { .. } => None,
96        }
97    }
98
99    /// Returns true if this is a primitive numeric or string type.
100    pub fn is_primitive(&self) -> bool {
101        matches!(
102            self,
103            NcType::Byte
104                | NcType::Char
105                | NcType::Short
106                | NcType::Int
107                | NcType::Float
108                | NcType::Double
109                | NcType::UByte
110                | NcType::UShort
111                | NcType::UInt
112                | NcType::Int64
113                | NcType::UInt64
114                | NcType::String
115        )
116    }
117}
118
119/// A NetCDF attribute value.
120#[derive(Debug, Clone)]
121pub enum NcAttrValue {
122    Bytes(Vec<i8>),
123    Chars(String),
124    Shorts(Vec<i16>),
125    Ints(Vec<i32>),
126    Floats(Vec<f32>),
127    Doubles(Vec<f64>),
128    UBytes(Vec<u8>),
129    UShorts(Vec<u16>),
130    UInts(Vec<u32>),
131    Int64s(Vec<i64>),
132    UInt64s(Vec<u64>),
133    Strings(Vec<String>),
134}
135
136impl NcAttrValue {
137    /// Get the value as a string (for Chars or single-element Strings).
138    pub fn as_string(&self) -> Option<String> {
139        match self {
140            NcAttrValue::Chars(s) => Some(s.clone()),
141            NcAttrValue::Strings(v) if v.len() == 1 => Some(v[0].clone()),
142            _ => None,
143        }
144    }
145
146    /// Get the value as f64 (with numeric promotion from the first element).
147    pub fn as_f64(&self) -> Option<f64> {
148        match self {
149            NcAttrValue::Bytes(v) => v.first().map(|&x| x as f64),
150            NcAttrValue::Shorts(v) => v.first().map(|&x| x as f64),
151            NcAttrValue::Ints(v) => v.first().map(|&x| x as f64),
152            NcAttrValue::Floats(v) => v.first().map(|&x| x as f64),
153            NcAttrValue::Doubles(v) => v.first().copied(),
154            NcAttrValue::UBytes(v) => v.first().map(|&x| x as f64),
155            NcAttrValue::UShorts(v) => v.first().map(|&x| x as f64),
156            NcAttrValue::UInts(v) => v.first().map(|&x| x as f64),
157            NcAttrValue::Int64s(v) => v.first().map(|&x| x as f64),
158            NcAttrValue::UInt64s(v) => v.first().map(|&x| x as f64),
159            NcAttrValue::Chars(_) | NcAttrValue::Strings(_) => None,
160        }
161    }
162
163    /// Get the value as a vector of f64 (with numeric promotion).
164    pub fn as_f64_vec(&self) -> Option<Vec<f64>> {
165        match self {
166            NcAttrValue::Bytes(v) => Some(v.iter().map(|&x| x as f64).collect()),
167            NcAttrValue::Shorts(v) => Some(v.iter().map(|&x| x as f64).collect()),
168            NcAttrValue::Ints(v) => Some(v.iter().map(|&x| x as f64).collect()),
169            NcAttrValue::Floats(v) => Some(v.iter().map(|&x| x as f64).collect()),
170            NcAttrValue::Doubles(v) => Some(v.clone()),
171            NcAttrValue::UBytes(v) => Some(v.iter().map(|&x| x as f64).collect()),
172            NcAttrValue::UShorts(v) => Some(v.iter().map(|&x| x as f64).collect()),
173            NcAttrValue::UInts(v) => Some(v.iter().map(|&x| x as f64).collect()),
174            NcAttrValue::Int64s(v) => Some(v.iter().map(|&x| x as f64).collect()),
175            NcAttrValue::UInt64s(v) => Some(v.iter().map(|&x| x as f64).collect()),
176            NcAttrValue::Chars(_) | NcAttrValue::Strings(_) => None,
177        }
178    }
179}
180
181/// A NetCDF attribute.
182#[derive(Debug, Clone)]
183pub struct NcAttribute {
184    pub name: String,
185    pub value: NcAttrValue,
186}
187
188/// A NetCDF variable (metadata only -- data is read on demand).
189#[derive(Debug, Clone)]
190pub struct NcVariable {
191    pub name: String,
192    pub dimensions: Vec<NcDimension>,
193    pub dtype: NcType,
194    pub attributes: Vec<NcAttribute>,
195    /// For classic: file byte offset to the start of this variable's data.
196    /// For nc4: HDF5 dataset object header address.
197    pub(crate) data_offset: u64,
198    /// Total data size in bytes (for non-record variables).
199    pub(crate) _data_size: u64,
200    /// Whether this variable uses the unlimited (record) dimension.
201    pub(crate) is_record_var: bool,
202    /// Size of one record slice in bytes (only meaningful for record variables).
203    pub(crate) record_size: u64,
204}
205
206impl NcVariable {
207    /// Variable name.
208    pub fn name(&self) -> &str {
209        &self.name
210    }
211
212    /// Variable dimensions.
213    pub fn dimensions(&self) -> &[NcDimension] {
214        &self.dimensions
215    }
216
217    /// Variable data type.
218    pub fn dtype(&self) -> &NcType {
219        &self.dtype
220    }
221
222    /// Shape of the variable as a vector of dimension sizes.
223    pub fn shape(&self) -> Vec<u64> {
224        self.dimensions.iter().map(|d| d.size).collect()
225    }
226
227    /// Variable attributes.
228    pub fn attributes(&self) -> &[NcAttribute] {
229        &self.attributes
230    }
231
232    /// Find an attribute by name.
233    pub fn attribute(&self, name: &str) -> Option<&NcAttribute> {
234        self.attributes.iter().find(|a| a.name == name)
235    }
236
237    /// Number of dimensions.
238    pub fn ndim(&self) -> usize {
239        self.dimensions.len()
240    }
241
242    /// Total number of elements.
243    pub fn num_elements(&self) -> u64 {
244        if self.dimensions.is_empty() {
245            return 1; // scalar
246        }
247        self.dimensions.iter().map(|d| d.size).product()
248    }
249
250    pub(crate) fn checked_num_elements(&self) -> crate::Result<u64> {
251        checked_shape_elements(&self.shape(), "variable element count")
252    }
253}
254
255/// A NetCDF group (NetCDF-4 only; classic files have one implicit root group).
256#[derive(Debug, Clone)]
257pub struct NcGroup {
258    pub name: String,
259    pub dimensions: Vec<NcDimension>,
260    pub variables: Vec<NcVariable>,
261    pub attributes: Vec<NcAttribute>,
262    pub groups: Vec<NcGroup>,
263}
264
265impl NcGroup {
266    /// Find a variable by name in this group.
267    pub fn variable(&self, name: &str) -> Option<&NcVariable> {
268        let (group_path, variable_name) = split_parent_path(name)?;
269        let group = self.group(group_path)?;
270        group.variables.iter().find(|v| v.name == variable_name)
271    }
272
273    /// Find a dimension by name in this group.
274    pub fn dimension(&self, name: &str) -> Option<&NcDimension> {
275        let (group_path, dimension_name) = split_parent_path(name)?;
276        let group = self.group(group_path)?;
277        group.dimensions.iter().find(|d| d.name == dimension_name)
278    }
279
280    /// Find an attribute by name in this group.
281    pub fn attribute(&self, name: &str) -> Option<&NcAttribute> {
282        let (group_path, attribute_name) = split_parent_path(name)?;
283        let group = self.group(group_path)?;
284        group.attributes.iter().find(|a| a.name == attribute_name)
285    }
286
287    /// Find a child group by relative path.
288    pub fn group(&self, name: &str) -> Option<&NcGroup> {
289        let trimmed = name.trim_matches('/');
290        if trimmed.is_empty() {
291            return Some(self);
292        }
293
294        let mut group = self;
295        for component in trimmed.split('/').filter(|part| !part.is_empty()) {
296            group = group.groups.iter().find(|child| child.name == component)?;
297        }
298
299        Some(group)
300    }
301}
302
303fn split_parent_path(path: &str) -> Option<(&str, &str)> {
304    let trimmed = path.trim_matches('/');
305    if trimmed.is_empty() {
306        return None;
307    }
308
309    match trimmed.rsplit_once('/') {
310        Some((group_path, leaf_name)) if !leaf_name.is_empty() => Some((group_path, leaf_name)),
311        Some(_) => None,
312        None => Some(("", trimmed)),
313    }
314}
315
316pub(crate) fn checked_usize_from_u64(value: u64, context: &str) -> crate::Result<usize> {
317    usize::try_from(value)
318        .map_err(|_| crate::Error::InvalidData(format!("{context} exceeds platform usize")))
319}
320
321pub(crate) fn checked_mul_u64(lhs: u64, rhs: u64, context: &str) -> crate::Result<u64> {
322    lhs.checked_mul(rhs)
323        .ok_or_else(|| crate::Error::InvalidData(format!("{context} exceeds u64 capacity")))
324}
325
326pub(crate) fn checked_shape_elements(shape: &[u64], context: &str) -> crate::Result<u64> {
327    shape
328        .iter()
329        .try_fold(1u64, |acc, &dim| checked_mul_u64(acc, dim, context))
330}
331
332/// Hyperslab selection for reading slices of NetCDF variables.
333///
334/// Each element corresponds to one dimension of the variable.
335#[derive(Debug, Clone)]
336pub struct NcSliceInfo {
337    pub selections: Vec<NcSliceInfoElem>,
338}
339
340/// A single dimension's selection within a hyperslab.
341#[derive(Debug, Clone)]
342pub enum NcSliceInfoElem {
343    /// Select a single index (reduces dimensionality).
344    Index(u64),
345    /// Select a range with stride.
346    Slice { start: u64, end: u64, step: u64 },
347}
348
349impl NcSliceInfo {
350    /// Create a selection that reads everything for an `ndim`-dimensional variable.
351    pub fn all(ndim: usize) -> Self {
352        NcSliceInfo {
353            selections: vec![
354                NcSliceInfoElem::Slice {
355                    start: 0,
356                    end: u64::MAX,
357                    step: 1,
358                };
359                ndim
360            ],
361        }
362    }
363}
364
365#[cfg(feature = "netcdf4")]
366impl NcSliceInfo {
367    /// Convert to hdf5_reader::SliceInfo for NC4 delegation.
368    pub(crate) fn to_hdf5_slice_info(&self) -> hdf5_reader::SliceInfo {
369        hdf5_reader::SliceInfo {
370            selections: self
371                .selections
372                .iter()
373                .map(|s| match s {
374                    NcSliceInfoElem::Index(idx) => hdf5_reader::SliceInfoElem::Index(*idx),
375                    NcSliceInfoElem::Slice { start, end, step } => {
376                        hdf5_reader::SliceInfoElem::Slice {
377                            start: *start,
378                            end: *end,
379                            step: *step,
380                        }
381                    }
382                })
383                .collect(),
384        }
385    }
386}
387
388#[cfg(test)]
389mod tests {
390    use super::*;
391
392    fn sample_group_tree() -> NcGroup {
393        NcGroup {
394            name: "/".to_string(),
395            dimensions: vec![NcDimension {
396                name: "root_dim".to_string(),
397                size: 2,
398                is_unlimited: false,
399            }],
400            variables: vec![NcVariable {
401                name: "root_var".to_string(),
402                dimensions: vec![],
403                dtype: NcType::Int,
404                attributes: vec![],
405                data_offset: 0,
406                _data_size: 0,
407                is_record_var: false,
408                record_size: 4,
409            }],
410            attributes: vec![NcAttribute {
411                name: "title".to_string(),
412                value: NcAttrValue::Chars("root".to_string()),
413            }],
414            groups: vec![NcGroup {
415                name: "obs".to_string(),
416                dimensions: vec![NcDimension {
417                    name: "time".to_string(),
418                    size: 3,
419                    is_unlimited: false,
420                }],
421                variables: vec![NcVariable {
422                    name: "temperature".to_string(),
423                    dimensions: vec![],
424                    dtype: NcType::Float,
425                    attributes: vec![],
426                    data_offset: 0,
427                    _data_size: 0,
428                    is_record_var: false,
429                    record_size: 4,
430                }],
431                attributes: vec![],
432                groups: vec![NcGroup {
433                    name: "surface".to_string(),
434                    dimensions: vec![],
435                    variables: vec![NcVariable {
436                        name: "pressure".to_string(),
437                        dimensions: vec![],
438                        dtype: NcType::Double,
439                        attributes: vec![],
440                        data_offset: 0,
441                        _data_size: 0,
442                        is_record_var: false,
443                        record_size: 8,
444                    }],
445                    attributes: vec![NcAttribute {
446                        name: "units".to_string(),
447                        value: NcAttrValue::Chars("hPa".to_string()),
448                    }],
449                    groups: vec![],
450                }],
451            }],
452        }
453    }
454
455    #[test]
456    fn test_group_path_lookup() {
457        let root = sample_group_tree();
458
459        let surface = root.group("obs/surface").unwrap();
460        assert_eq!(surface.name, "surface");
461        assert!(root.group("/obs/surface").is_some());
462        assert!(root.group("missing").is_none());
463    }
464
465    #[test]
466    fn test_variable_path_lookup() {
467        let root = sample_group_tree();
468
469        assert_eq!(root.variable("root_var").unwrap().name(), "root_var");
470        assert_eq!(
471            root.variable("obs/temperature").unwrap().dtype(),
472            &NcType::Float
473        );
474        assert_eq!(
475            root.variable("/obs/surface/pressure").unwrap().dtype(),
476            &NcType::Double
477        );
478        assert!(root.variable("pressure").is_none());
479    }
480
481    #[test]
482    fn test_dimension_and_attribute_path_lookup() {
483        let root = sample_group_tree();
484
485        assert_eq!(root.dimension("root_dim").unwrap().size, 2);
486        assert_eq!(root.dimension("obs/time").unwrap().size, 3);
487        assert_eq!(
488            root.attribute("title").unwrap().value.as_string().unwrap(),
489            "root"
490        );
491        assert_eq!(
492            root.attribute("obs/surface/units")
493                .unwrap()
494                .value
495                .as_string()
496                .unwrap(),
497            "hPa"
498        );
499    }
500
501    #[test]
502    fn test_checked_shape_elements_overflow() {
503        let err = checked_shape_elements(&[u64::MAX, 2], "test overflow").unwrap_err();
504        assert!(matches!(err, crate::Error::InvalidData(_)));
505    }
506}