Skip to main content

netcdf_reader/nc4/
mod.rs

1//! NetCDF-4 (HDF5-backed) format support.
2//!
3//! This module maps the HDF5 data model to the NetCDF data model:
4//! - HDF5 groups become NetCDF groups
5//! - HDF5 datasets become NetCDF variables
6//! - HDF5 attributes become NetCDF attributes
7//! - Dimensions are reconstructed from `DIMENSION_LIST` and `_Netcdf4Dimid` attributes
8//!
9//! Requires the `netcdf4` feature (enabled by default).
10
11pub mod attributes;
12pub mod dimensions;
13pub mod groups;
14pub mod types;
15pub mod variables;
16
17use std::path::Path;
18use std::sync::OnceLock;
19
20use hdf5_reader::datatype_api::H5Type;
21use hdf5_reader::storage::DynStorage;
22use hdf5_reader::Hdf5File;
23use ndarray::ArrayD;
24#[cfg(feature = "rayon")]
25use rayon::ThreadPool;
26
27use crate::error::{Error, Result};
28use crate::types::{NcGroup, NcType};
29
30/// Dispatch on `NcType` to read data and promote to `f64`.
31///
32/// `$dtype` must be an expression of type `&NcType`.
33/// `$read_expr` is a macro-like callback: for each numeric type `$T`,
34/// the macro evaluates `$read_expr` with `$T` substituted in.
35///
36/// Usage:
37/// ```ignore
38/// dispatch_read_as_f64!(&var.dtype, |$T| dataset.read_array::<$T>())
39/// ```
40macro_rules! dispatch_read_as_f64 {
41    ($dtype:expr, |$T:ident| $read_expr:expr) => {{
42        use crate::types::NcType;
43        match $dtype {
44            NcType::Byte => {
45                type $T = i8;
46                let arr = $read_expr?;
47                Ok(arr.mapv(|v| v as f64))
48            }
49            NcType::Short => {
50                type $T = i16;
51                let arr = $read_expr?;
52                Ok(arr.mapv(|v| v as f64))
53            }
54            NcType::Int => {
55                type $T = i32;
56                let arr = $read_expr?;
57                Ok(arr.mapv(|v| v as f64))
58            }
59            NcType::Float => {
60                type $T = f32;
61                let arr = $read_expr?;
62                Ok(arr.mapv(|v| v as f64))
63            }
64            NcType::Double => {
65                type $T = f64;
66                Ok($read_expr?)
67            }
68            NcType::UByte => {
69                type $T = u8;
70                let arr = $read_expr?;
71                Ok(arr.mapv(|v| v as f64))
72            }
73            NcType::UShort => {
74                type $T = u16;
75                let arr = $read_expr?;
76                Ok(arr.mapv(|v| v as f64))
77            }
78            NcType::UInt => {
79                type $T = u32;
80                let arr = $read_expr?;
81                Ok(arr.mapv(|v| v as f64))
82            }
83            NcType::Int64 => {
84                type $T = i64;
85                let arr = $read_expr?;
86                Ok(arr.mapv(|v| v as f64))
87            }
88            NcType::UInt64 => {
89                type $T = u64;
90                let arr = $read_expr?;
91                Ok(arr.mapv(|v| v as f64))
92            }
93            NcType::Char => Err(Error::TypeMismatch {
94                expected: "numeric type".to_string(),
95                actual: "Char".to_string(),
96            }),
97            NcType::String => Err(Error::TypeMismatch {
98                expected: "numeric type".to_string(),
99                actual: "String".to_string(),
100            }),
101            other => Err(Error::TypeMismatch {
102                expected: "numeric type".to_string(),
103                actual: format!("{:?}", other),
104            }),
105        }
106    }};
107}
108
109/// An opened NetCDF-4 file (backed by HDF5).
110pub struct Nc4File {
111    hdf5: Hdf5File,
112    metadata_mode: crate::NcMetadataMode,
113    root_metadata: OnceLock<NcGroup>,
114    metadata_tree: OnceLock<NcGroup>,
115}
116
117impl Nc4File {
118    pub(crate) fn from_hdf5(hdf5: Hdf5File, metadata_mode: crate::NcMetadataMode) -> Result<Self> {
119        Ok(Nc4File {
120            hdf5,
121            metadata_mode,
122            root_metadata: OnceLock::new(),
123            metadata_tree: OnceLock::new(),
124        })
125    }
126
127    /// Open a NetCDF-4 file from disk.
128    pub fn open(path: &Path) -> Result<Self> {
129        Self::open_with_options(path, crate::NcOpenOptions::default())
130    }
131
132    /// Open a NetCDF-4 file from disk with custom options.
133    pub fn open_with_options(path: &Path, options: crate::NcOpenOptions) -> Result<Self> {
134        let hdf5 = Hdf5File::open_with_options(
135            path,
136            hdf5_reader::OpenOptions {
137                chunk_cache_bytes: options.chunk_cache_bytes,
138                chunk_cache_slots: options.chunk_cache_slots,
139                filter_registry: options.filter_registry,
140            },
141        )?;
142        Nc4File::from_hdf5(hdf5, options.metadata_mode)
143    }
144
145    /// Open a NetCDF-4 file from in-memory bytes.
146    pub fn from_bytes(data: &[u8]) -> Result<Self> {
147        Self::from_bytes_with_options(data, crate::NcOpenOptions::default())
148    }
149
150    /// Open a NetCDF-4 file from in-memory bytes with custom options.
151    pub fn from_bytes_with_options(data: &[u8], options: crate::NcOpenOptions) -> Result<Self> {
152        let hdf5 = Hdf5File::from_bytes_with_options(
153            data,
154            hdf5_reader::OpenOptions {
155                chunk_cache_bytes: options.chunk_cache_bytes,
156                chunk_cache_slots: options.chunk_cache_slots,
157                filter_registry: options.filter_registry,
158            },
159        )?;
160        Nc4File::from_hdf5(hdf5, options.metadata_mode)
161    }
162
163    /// Open a NetCDF-4 file from a custom random-access storage backend.
164    pub fn from_storage(storage: DynStorage) -> Result<Self> {
165        Self::from_storage_with_options(storage, crate::NcOpenOptions::default())
166    }
167
168    /// Open a NetCDF-4 file from a custom random-access storage backend with custom options.
169    pub fn from_storage_with_options(
170        storage: DynStorage,
171        options: crate::NcOpenOptions,
172    ) -> Result<Self> {
173        let hdf5 = Hdf5File::from_storage_with_options(
174            storage,
175            hdf5_reader::OpenOptions {
176                chunk_cache_bytes: options.chunk_cache_bytes,
177                chunk_cache_slots: options.chunk_cache_slots,
178                filter_registry: options.filter_registry,
179            },
180        )?;
181        Nc4File::from_hdf5(hdf5, options.metadata_mode)
182    }
183
184    /// The root group.
185    pub fn root_group(&self) -> Result<&NcGroup> {
186        if let Some(group) = self.metadata_tree.get() {
187            return Ok(group);
188        }
189        let metadata_tree = groups::build_root_group(&self.hdf5, self.metadata_mode)?;
190        let _ = self.metadata_tree.set(metadata_tree);
191        Ok(self
192            .metadata_tree
193            .get()
194            .expect("metadata tree must be initialized after successful build"))
195    }
196
197    /// Check if this file uses the classic data model (`_nc3_strict`).
198    ///
199    /// This checks the raw HDF5 root group attributes (before the internal
200    /// attribute filter removes `_nc3_strict`).
201    pub fn is_classic_model(&self) -> bool {
202        self.hdf5
203            .root_group()
204            .ok()
205            .and_then(|g| g.attribute("_nc3_strict").ok())
206            .is_some()
207    }
208
209    pub fn dimensions(&self) -> Result<&[crate::types::NcDimension]> {
210        Ok(&self.root_metadata()?.dimensions)
211    }
212
213    pub fn variables(&self) -> Result<&[crate::types::NcVariable]> {
214        Ok(&self.root_metadata()?.variables)
215    }
216
217    pub fn global_attributes(&self) -> Result<&[crate::types::NcAttribute]> {
218        Ok(&self.root_metadata()?.attributes)
219    }
220
221    pub fn group(&self, path: &str) -> Result<&NcGroup> {
222        let normalized = normalize_group_path(path)?;
223        let root = self.root_group()?;
224        if normalized.is_empty() {
225            return Ok(root);
226        }
227        root.group(normalized)
228            .ok_or_else(|| Error::GroupNotFound(path.to_string()))
229    }
230
231    pub fn variable(&self, path: &str) -> Result<&crate::types::NcVariable> {
232        self.root_group()?
233            .variable(path)
234            .ok_or_else(|| Error::VariableNotFound(path.to_string()))
235    }
236
237    pub fn dimension(&self, path: &str) -> Result<&crate::types::NcDimension> {
238        self.root_group()?
239            .dimension(path)
240            .ok_or_else(|| Error::DimensionNotFound(path.to_string()))
241    }
242
243    pub fn global_attribute(&self, path: &str) -> Result<&crate::types::NcAttribute> {
244        self.root_group()?
245            .attribute(path)
246            .ok_or_else(|| Error::AttributeNotFound(path.to_string()))
247    }
248
249    /// Read a variable's data as a typed array.
250    ///
251    /// Looks up the variable by path relative to the root group, then opens the
252    /// matching HDF5 dataset and reads the data.
253    pub fn read_variable<T: H5Type>(&self, path: &str) -> Result<ArrayD<T>> {
254        let normalized = normalize_dataset_path(path)?;
255        let dataset = self.hdf5.dataset(normalized)?;
256        Ok(dataset.read_array::<T>()?)
257    }
258
259    /// Read a string variable as a single string.
260    pub fn read_variable_as_string(&self, path: &str) -> Result<String> {
261        let mut strings = self.read_variable_as_strings(path)?;
262        match strings.len() {
263            1 => Ok(strings.swap_remove(0)),
264            0 => Err(Error::InvalidData(format!(
265                "variable '{}' contains no string elements",
266                path
267            ))),
268            count => Err(Error::InvalidData(format!(
269                "variable '{}' contains {count} string elements; use read_variable_as_strings()",
270                path
271            ))),
272        }
273    }
274
275    /// Read a string variable as a flat vector of strings.
276    pub fn read_variable_as_strings(&self, path: &str) -> Result<Vec<String>> {
277        let normalized = normalize_dataset_path(path)?;
278        let dataset = self.hdf5.dataset(normalized)?;
279        let dtype = dataset_nc_type(&dataset)?;
280        if dtype != NcType::String {
281            return Err(Error::TypeMismatch {
282                expected: "String".to_string(),
283                actual: format!("{dtype:?}"),
284            });
285        }
286        Ok(dataset.read_strings()?)
287    }
288
289    #[cfg(feature = "rayon")]
290    pub fn read_variable_parallel<T: H5Type>(&self, path: &str) -> Result<ArrayD<T>> {
291        let normalized = normalize_dataset_path(path)?;
292        let dataset = self.hdf5.dataset(normalized)?;
293        Ok(dataset.read_array_parallel::<T>()?)
294    }
295
296    #[cfg(feature = "rayon")]
297    pub fn read_variable_in_pool<T: H5Type>(
298        &self,
299        path: &str,
300        pool: &ThreadPool,
301    ) -> Result<ArrayD<T>> {
302        let normalized = normalize_dataset_path(path)?;
303        let dataset = self.hdf5.dataset(normalized)?;
304        Ok(dataset.read_array_in_pool::<T>(pool)?)
305    }
306}
307
308impl Nc4File {
309    fn root_metadata(&self) -> Result<&NcGroup> {
310        if let Some(group) = self.root_metadata.get() {
311            return Ok(group);
312        }
313        let root_metadata = groups::build_root_group_metadata(&self.hdf5, self.metadata_mode)?;
314        let _ = self.root_metadata.set(root_metadata);
315        Ok(self
316            .root_metadata
317            .get()
318            .expect("root metadata must be initialized after successful build"))
319    }
320}
321
322impl Nc4File {
323    /// Read a variable with automatic type promotion to f64.
324    ///
325    /// Reads in the native HDF5 type and promotes to f64 via `mapv`.
326    pub fn read_variable_as_f64(&self, path: &str) -> Result<ArrayD<f64>> {
327        let normalized = normalize_dataset_path(path)?;
328        let dataset = self.hdf5.dataset(normalized)?;
329        let dtype = dataset_nc_type(&dataset)?;
330        dispatch_read_as_f64!(&dtype, |T| dataset.read_array::<T>())
331    }
332
333    /// Read a slice of a variable with automatic type promotion to f64.
334    pub fn read_variable_slice_as_f64(
335        &self,
336        path: &str,
337        selection: &crate::types::NcSliceInfo,
338    ) -> Result<ArrayD<f64>> {
339        let normalized = normalize_dataset_path(path)?;
340        let dataset = self.hdf5.dataset(normalized)?;
341        let hdf5_sel = selection.to_hdf5_slice_info();
342        let dtype = dataset_nc_type(&dataset)?;
343        dispatch_read_as_f64!(&dtype, |T| dataset.read_slice::<T>(&hdf5_sel))
344    }
345
346    /// Read a typed slice of a variable (NC4 delegation).
347    pub fn read_variable_slice<T: H5Type>(
348        &self,
349        path: &str,
350        selection: &crate::types::NcSliceInfo,
351    ) -> Result<ArrayD<T>> {
352        let normalized = normalize_dataset_path(path)?;
353        let dataset = self.hdf5.dataset(normalized)?;
354        let hdf5_sel = selection.to_hdf5_slice_info();
355        Ok(dataset.read_slice::<T>(&hdf5_sel)?)
356    }
357
358    /// Read a typed slice of a variable using chunk-level parallelism.
359    ///
360    /// Chunked datasets decompress overlapping chunks in parallel via Rayon.
361    /// Non-chunked layouts fall back to `read_variable_slice`.
362    #[cfg(feature = "rayon")]
363    pub fn read_variable_slice_parallel<T: H5Type>(
364        &self,
365        path: &str,
366        selection: &crate::types::NcSliceInfo,
367    ) -> Result<ArrayD<T>> {
368        let normalized = normalize_dataset_path(path)?;
369        let dataset = self.hdf5.dataset(normalized)?;
370        let hdf5_sel = selection.to_hdf5_slice_info();
371        Ok(dataset.read_slice_parallel::<T>(&hdf5_sel)?)
372    }
373}
374
375fn normalize_dataset_path(path: &str) -> Result<&str> {
376    let trimmed = path.trim_matches('/');
377    if trimmed.is_empty() {
378        return Err(Error::VariableNotFound(path.to_string()));
379    }
380    Ok(trimmed)
381}
382
383fn normalize_group_path(path: &str) -> Result<&str> {
384    Ok(path.trim_matches('/'))
385}
386
387fn dataset_nc_type(dataset: &hdf5_reader::Dataset) -> Result<NcType> {
388    self::types::hdf5_to_nc_type(dataset.dtype()).map_err(|err| {
389        Error::InvalidData(format!(
390            "dataset '{}' cannot be mapped to a NetCDF-4 type: {err}",
391            dataset.name()
392        ))
393    })
394}