Skip to main content

netcdf_reader/nc4/
mod.rs

1//! NetCDF-4 (HDF5-backed) format support.
2//!
3//! This module maps the HDF5 data model to the NetCDF data model:
4//! - HDF5 groups become NetCDF groups
5//! - HDF5 datasets become NetCDF variables
6//! - HDF5 attributes become NetCDF attributes
7//! - Dimensions are reconstructed from `DIMENSION_LIST` and `_Netcdf4Dimid` attributes
8//!
9//! Requires the `netcdf4` feature (enabled by default).
10
11pub mod attributes;
12pub mod dimensions;
13pub mod groups;
14pub mod types;
15pub mod variables;
16
17use std::path::Path;
18
19use hdf5_reader::datatype_api::H5Type;
20use hdf5_reader::Hdf5File;
21use ndarray::ArrayD;
22#[cfg(feature = "rayon")]
23use rayon::ThreadPool;
24
25use crate::error::{Error, Result};
26use crate::types::{NcGroup, NcType};
27
28/// Dispatch on `NcType` to read data and promote to `f64`.
29///
30/// `$dtype` must be an expression of type `&NcType`.
31/// `$read_expr` is a macro-like callback: for each numeric type `$T`,
32/// the macro evaluates `$read_expr` with `$T` substituted in.
33///
34/// Usage:
35/// ```ignore
36/// dispatch_read_as_f64!(&var.dtype, |$T| dataset.read_array::<$T>())
37/// ```
38macro_rules! dispatch_read_as_f64 {
39    ($dtype:expr, |$T:ident| $read_expr:expr) => {{
40        use crate::types::NcType;
41        match $dtype {
42            NcType::Byte => {
43                type $T = i8;
44                let arr = $read_expr?;
45                Ok(arr.mapv(|v| v as f64))
46            }
47            NcType::Short => {
48                type $T = i16;
49                let arr = $read_expr?;
50                Ok(arr.mapv(|v| v as f64))
51            }
52            NcType::Int => {
53                type $T = i32;
54                let arr = $read_expr?;
55                Ok(arr.mapv(|v| v as f64))
56            }
57            NcType::Float => {
58                type $T = f32;
59                let arr = $read_expr?;
60                Ok(arr.mapv(|v| v as f64))
61            }
62            NcType::Double => {
63                type $T = f64;
64                Ok($read_expr?)
65            }
66            NcType::UByte => {
67                type $T = u8;
68                let arr = $read_expr?;
69                Ok(arr.mapv(|v| v as f64))
70            }
71            NcType::UShort => {
72                type $T = u16;
73                let arr = $read_expr?;
74                Ok(arr.mapv(|v| v as f64))
75            }
76            NcType::UInt => {
77                type $T = u32;
78                let arr = $read_expr?;
79                Ok(arr.mapv(|v| v as f64))
80            }
81            NcType::Int64 => {
82                type $T = i64;
83                let arr = $read_expr?;
84                Ok(arr.mapv(|v| v as f64))
85            }
86            NcType::UInt64 => {
87                type $T = u64;
88                let arr = $read_expr?;
89                Ok(arr.mapv(|v| v as f64))
90            }
91            NcType::Char => Err(Error::TypeMismatch {
92                expected: "numeric type".to_string(),
93                actual: "Char".to_string(),
94            }),
95            NcType::String => Err(Error::TypeMismatch {
96                expected: "numeric type".to_string(),
97                actual: "String".to_string(),
98            }),
99            other => Err(Error::TypeMismatch {
100                expected: "numeric type".to_string(),
101                actual: format!("{:?}", other),
102            }),
103        }
104    }};
105}
106
107/// An opened NetCDF-4 file (backed by HDF5).
108pub struct Nc4File {
109    hdf5: Hdf5File,
110    root_group: NcGroup,
111}
112
113impl Nc4File {
114    pub(crate) fn from_hdf5(hdf5: Hdf5File, root_group: NcGroup) -> Self {
115        Nc4File { hdf5, root_group }
116    }
117
118    /// Open a NetCDF-4 file from disk.
119    pub fn open(path: &Path) -> Result<Self> {
120        Self::open_with_options(path, crate::NcOpenOptions::default())
121    }
122
123    /// Open a NetCDF-4 file from disk with custom options.
124    pub fn open_with_options(path: &Path, options: crate::NcOpenOptions) -> Result<Self> {
125        let hdf5 = Hdf5File::open_with_options(
126            path,
127            hdf5_reader::OpenOptions {
128                chunk_cache_bytes: options.chunk_cache_bytes,
129                chunk_cache_slots: options.chunk_cache_slots,
130                filter_registry: options.filter_registry,
131            },
132        )?;
133        let root_group = groups::build_root_group(&hdf5)?;
134        Ok(Nc4File { hdf5, root_group })
135    }
136
137    /// Open a NetCDF-4 file from in-memory bytes.
138    pub fn from_bytes(data: &[u8]) -> Result<Self> {
139        Self::from_bytes_with_options(data, crate::NcOpenOptions::default())
140    }
141
142    /// Open a NetCDF-4 file from in-memory bytes with custom options.
143    pub fn from_bytes_with_options(data: &[u8], options: crate::NcOpenOptions) -> Result<Self> {
144        let hdf5 = Hdf5File::from_bytes_with_options(
145            data,
146            hdf5_reader::OpenOptions {
147                chunk_cache_bytes: options.chunk_cache_bytes,
148                chunk_cache_slots: options.chunk_cache_slots,
149                filter_registry: options.filter_registry,
150            },
151        )?;
152        let root_group = groups::build_root_group(&hdf5)?;
153        Ok(Nc4File { hdf5, root_group })
154    }
155
156    /// The root group.
157    pub fn root_group(&self) -> &NcGroup {
158        &self.root_group
159    }
160
161    /// Check if this file uses the classic data model (`_nc3_strict`).
162    ///
163    /// This checks the raw HDF5 root group attributes (before the internal
164    /// attribute filter removes `_nc3_strict`).
165    pub fn is_classic_model(&self) -> bool {
166        self.hdf5
167            .root_group()
168            .ok()
169            .and_then(|g| g.attribute("_nc3_strict").ok())
170            .is_some()
171    }
172
173    /// Read a variable's data as a typed array.
174    ///
175    /// Looks up the variable by path relative to the root group, then opens the
176    /// matching HDF5 dataset and reads the data.
177    pub fn read_variable<T: H5Type>(&self, path: &str) -> Result<ArrayD<T>> {
178        let normalized = normalize_dataset_path(path)?;
179        let dataset = self.hdf5.dataset(normalized)?;
180        Ok(dataset.read_array::<T>()?)
181    }
182
183    /// Read a string variable as a single string.
184    pub fn read_variable_as_string(&self, path: &str) -> Result<String> {
185        let mut strings = self.read_variable_as_strings(path)?;
186        match strings.len() {
187            1 => Ok(strings.swap_remove(0)),
188            0 => Err(Error::InvalidData(format!(
189                "variable '{}' contains no string elements",
190                path
191            ))),
192            count => Err(Error::InvalidData(format!(
193                "variable '{}' contains {count} string elements; use read_variable_as_strings()",
194                path
195            ))),
196        }
197    }
198
199    /// Read a string variable as a flat vector of strings.
200    pub fn read_variable_as_strings(&self, path: &str) -> Result<Vec<String>> {
201        let normalized = normalize_dataset_path(path)?;
202        let var = self
203            .root_group
204            .variable(normalized)
205            .ok_or_else(|| Error::VariableNotFound(path.to_string()))?;
206        if var.dtype != NcType::String {
207            return Err(Error::TypeMismatch {
208                expected: "String".to_string(),
209                actual: format!("{:?}", var.dtype),
210            });
211        }
212
213        let dataset = self.hdf5.dataset(normalized)?;
214        Ok(dataset.read_strings()?)
215    }
216
217    #[cfg(feature = "rayon")]
218    pub fn read_variable_parallel<T: H5Type>(&self, path: &str) -> Result<ArrayD<T>> {
219        let normalized = normalize_dataset_path(path)?;
220        let dataset = self.hdf5.dataset(normalized)?;
221        Ok(dataset.read_array_parallel::<T>()?)
222    }
223
224    #[cfg(feature = "rayon")]
225    pub fn read_variable_in_pool<T: H5Type>(
226        &self,
227        path: &str,
228        pool: &ThreadPool,
229    ) -> Result<ArrayD<T>> {
230        let normalized = normalize_dataset_path(path)?;
231        let dataset = self.hdf5.dataset(normalized)?;
232        Ok(dataset.read_array_in_pool::<T>(pool)?)
233    }
234}
235
236impl Nc4File {
237    /// Read a variable with automatic type promotion to f64.
238    ///
239    /// Reads in the native HDF5 type and promotes to f64 via `mapv`.
240    pub fn read_variable_as_f64(&self, path: &str) -> Result<ArrayD<f64>> {
241        let normalized = normalize_dataset_path(path)?;
242        let var = self
243            .root_group
244            .variable(normalized)
245            .ok_or_else(|| Error::VariableNotFound(path.to_string()))?;
246        let dataset = self.hdf5.dataset(normalized)?;
247
248        debug_assert_eq!(dataset.shape(), &var.shape()[..]);
249
250        dispatch_read_as_f64!(&var.dtype, |T| dataset.read_array::<T>())
251    }
252
253    /// Read a slice of a variable with automatic type promotion to f64.
254    pub fn read_variable_slice_as_f64(
255        &self,
256        path: &str,
257        selection: &crate::types::NcSliceInfo,
258    ) -> Result<ArrayD<f64>> {
259        let normalized = normalize_dataset_path(path)?;
260        let var = self
261            .root_group
262            .variable(normalized)
263            .ok_or_else(|| Error::VariableNotFound(path.to_string()))?;
264        let dataset = self.hdf5.dataset(normalized)?;
265        let hdf5_sel = selection.to_hdf5_slice_info();
266
267        dispatch_read_as_f64!(&var.dtype, |T| dataset.read_slice::<T>(&hdf5_sel))
268    }
269
270    /// Read a typed slice of a variable (NC4 delegation).
271    pub fn read_variable_slice<T: H5Type>(
272        &self,
273        path: &str,
274        selection: &crate::types::NcSliceInfo,
275    ) -> Result<ArrayD<T>> {
276        let normalized = normalize_dataset_path(path)?;
277        let dataset = self.hdf5.dataset(normalized)?;
278        let hdf5_sel = selection.to_hdf5_slice_info();
279        Ok(dataset.read_slice::<T>(&hdf5_sel)?)
280    }
281
282    /// Read a typed slice of a variable using chunk-level parallelism.
283    ///
284    /// Chunked datasets decompress overlapping chunks in parallel via Rayon.
285    /// Non-chunked layouts fall back to `read_variable_slice`.
286    #[cfg(feature = "rayon")]
287    pub fn read_variable_slice_parallel<T: H5Type>(
288        &self,
289        path: &str,
290        selection: &crate::types::NcSliceInfo,
291    ) -> Result<ArrayD<T>> {
292        let normalized = normalize_dataset_path(path)?;
293        let dataset = self.hdf5.dataset(normalized)?;
294        let hdf5_sel = selection.to_hdf5_slice_info();
295        Ok(dataset.read_slice_parallel::<T>(&hdf5_sel)?)
296    }
297}
298
299fn normalize_dataset_path(path: &str) -> Result<&str> {
300    let trimmed = path.trim_matches('/');
301    if trimmed.is_empty() {
302        return Err(Error::VariableNotFound(path.to_string()));
303    }
304    Ok(trimmed)
305}