Skip to main content

netcdf_reader/nc4/
mod.rs

1//! NetCDF-4 (HDF5-backed) format support.
2//!
3//! This module maps the HDF5 data model to the NetCDF data model:
4//! - HDF5 groups become NetCDF groups
5//! - HDF5 datasets become NetCDF variables
6//! - HDF5 attributes become NetCDF attributes
7//! - Dimensions are reconstructed from `DIMENSION_LIST` and `_Netcdf4Dimid` attributes
8//!
9//! Requires the `netcdf4` feature (enabled by default).
10
11pub mod attributes;
12pub mod dimensions;
13pub mod groups;
14pub mod types;
15pub mod variables;
16
17use std::path::Path;
18use std::sync::OnceLock;
19
20use hdf5_reader::datatype_api::H5Type;
21use hdf5_reader::storage::DynStorage;
22use hdf5_reader::Hdf5File;
23use ndarray::ArrayD;
24#[cfg(feature = "rayon")]
25use rayon::ThreadPool;
26
27use crate::error::{Error, Result};
28use crate::types::{NcGroup, NcType};
29
30/// Dispatch on `NcType` to read data and promote to `f64`.
31///
32/// `$dtype` must be an expression of type `&NcType`.
33/// `$read_expr` is a macro-like callback: for each numeric type `$T`,
34/// the macro evaluates `$read_expr` with `$T` substituted in.
35///
36/// Usage:
37/// ```ignore
38/// dispatch_read_as_f64!(&var.dtype, |$T| dataset.read_array::<$T>())
39/// ```
40macro_rules! dispatch_read_as_f64 {
41    ($dtype:expr, |$T:ident| $read_expr:expr) => {{
42        use crate::types::NcType;
43        match $dtype {
44            NcType::Byte => {
45                type $T = i8;
46                let arr = $read_expr?;
47                Ok(arr.mapv(|v| v as f64))
48            }
49            NcType::Short => {
50                type $T = i16;
51                let arr = $read_expr?;
52                Ok(arr.mapv(|v| v as f64))
53            }
54            NcType::Int => {
55                type $T = i32;
56                let arr = $read_expr?;
57                Ok(arr.mapv(|v| v as f64))
58            }
59            NcType::Float => {
60                type $T = f32;
61                let arr = $read_expr?;
62                Ok(arr.mapv(|v| v as f64))
63            }
64            NcType::Double => {
65                type $T = f64;
66                Ok($read_expr?)
67            }
68            NcType::UByte => {
69                type $T = u8;
70                let arr = $read_expr?;
71                Ok(arr.mapv(|v| v as f64))
72            }
73            NcType::UShort => {
74                type $T = u16;
75                let arr = $read_expr?;
76                Ok(arr.mapv(|v| v as f64))
77            }
78            NcType::UInt => {
79                type $T = u32;
80                let arr = $read_expr?;
81                Ok(arr.mapv(|v| v as f64))
82            }
83            NcType::Int64 => {
84                type $T = i64;
85                let arr = $read_expr?;
86                Ok(arr.mapv(|v| v as f64))
87            }
88            NcType::UInt64 => {
89                type $T = u64;
90                let arr = $read_expr?;
91                Ok(arr.mapv(|v| v as f64))
92            }
93            NcType::Char => Err(Error::TypeMismatch {
94                expected: "numeric type".to_string(),
95                actual: "Char".to_string(),
96            }),
97            NcType::String => Err(Error::TypeMismatch {
98                expected: "numeric type".to_string(),
99                actual: "String".to_string(),
100            }),
101            other => Err(Error::TypeMismatch {
102                expected: "numeric type".to_string(),
103                actual: format!("{:?}", other),
104            }),
105        }
106    }};
107}
108
109/// An opened NetCDF-4 file (backed by HDF5).
110pub struct Nc4File {
111    hdf5: Hdf5File,
112    metadata_mode: crate::NcMetadataMode,
113    root_metadata: OnceLock<NcGroup>,
114    metadata_tree: OnceLock<NcGroup>,
115}
116
117impl Nc4File {
118    pub(crate) fn from_hdf5(hdf5: Hdf5File, metadata_mode: crate::NcMetadataMode) -> Result<Self> {
119        Ok(Nc4File {
120            hdf5,
121            metadata_mode,
122            root_metadata: OnceLock::new(),
123            metadata_tree: OnceLock::new(),
124        })
125    }
126
127    /// Open a NetCDF-4 file from disk.
128    pub fn open(path: &Path) -> Result<Self> {
129        Self::open_with_options(path, crate::NcOpenOptions::default())
130    }
131
132    /// Open a NetCDF-4 file from disk with custom options.
133    pub fn open_with_options(path: &Path, options: crate::NcOpenOptions) -> Result<Self> {
134        let hdf5 = Hdf5File::open_with_options(
135            path,
136            hdf5_reader::OpenOptions {
137                chunk_cache_bytes: options.chunk_cache_bytes,
138                chunk_cache_slots: options.chunk_cache_slots,
139                filter_registry: options.filter_registry,
140                ..Default::default()
141            },
142        )?;
143        Nc4File::from_hdf5(hdf5, options.metadata_mode)
144    }
145
146    /// Open a NetCDF-4 file from in-memory bytes.
147    pub fn from_bytes(data: &[u8]) -> Result<Self> {
148        Self::from_bytes_with_options(data, crate::NcOpenOptions::default())
149    }
150
151    /// Open a NetCDF-4 file from in-memory bytes with custom options.
152    pub fn from_bytes_with_options(data: &[u8], options: crate::NcOpenOptions) -> Result<Self> {
153        let hdf5 = Hdf5File::from_bytes_with_options(
154            data,
155            hdf5_reader::OpenOptions {
156                chunk_cache_bytes: options.chunk_cache_bytes,
157                chunk_cache_slots: options.chunk_cache_slots,
158                filter_registry: options.filter_registry,
159                ..Default::default()
160            },
161        )?;
162        Nc4File::from_hdf5(hdf5, options.metadata_mode)
163    }
164
165    /// Open a NetCDF-4 file from a custom random-access storage backend.
166    pub fn from_storage(storage: DynStorage) -> Result<Self> {
167        Self::from_storage_with_options(storage, crate::NcOpenOptions::default())
168    }
169
170    /// Open a NetCDF-4 file from a custom random-access storage backend with custom options.
171    pub fn from_storage_with_options(
172        storage: DynStorage,
173        options: crate::NcOpenOptions,
174    ) -> Result<Self> {
175        let hdf5 = Hdf5File::from_storage_with_options(
176            storage,
177            hdf5_reader::OpenOptions {
178                chunk_cache_bytes: options.chunk_cache_bytes,
179                chunk_cache_slots: options.chunk_cache_slots,
180                filter_registry: options.filter_registry,
181                ..Default::default()
182            },
183        )?;
184        Nc4File::from_hdf5(hdf5, options.metadata_mode)
185    }
186
187    /// The root group.
188    pub fn root_group(&self) -> Result<&NcGroup> {
189        if let Some(group) = self.metadata_tree.get() {
190            return Ok(group);
191        }
192        let metadata_tree = groups::build_root_group(&self.hdf5, self.metadata_mode)?;
193        let _ = self.metadata_tree.set(metadata_tree);
194        Ok(self
195            .metadata_tree
196            .get()
197            .expect("metadata tree must be initialized after successful build"))
198    }
199
200    /// Check if this file uses the classic data model (`_nc3_strict`).
201    ///
202    /// This checks the raw HDF5 root group attributes (before the internal
203    /// attribute filter removes `_nc3_strict`).
204    pub fn is_classic_model(&self) -> bool {
205        self.hdf5
206            .root_group()
207            .ok()
208            .and_then(|g| g.attribute("_nc3_strict").ok())
209            .is_some()
210    }
211
212    pub fn dimensions(&self) -> Result<&[crate::types::NcDimension]> {
213        Ok(&self.root_metadata()?.dimensions)
214    }
215
216    pub fn variables(&self) -> Result<&[crate::types::NcVariable]> {
217        Ok(&self.root_metadata()?.variables)
218    }
219
220    pub fn global_attributes(&self) -> Result<&[crate::types::NcAttribute]> {
221        Ok(&self.root_metadata()?.attributes)
222    }
223
224    pub fn group(&self, path: &str) -> Result<&NcGroup> {
225        let normalized = normalize_group_path(path)?;
226        let root = self.root_group()?;
227        if normalized.is_empty() {
228            return Ok(root);
229        }
230        root.group(normalized)
231            .ok_or_else(|| Error::GroupNotFound(path.to_string()))
232    }
233
234    pub fn variable(&self, path: &str) -> Result<&crate::types::NcVariable> {
235        self.root_group()?
236            .variable(path)
237            .ok_or_else(|| Error::VariableNotFound(path.to_string()))
238    }
239
240    pub fn dimension(&self, path: &str) -> Result<&crate::types::NcDimension> {
241        self.root_group()?
242            .dimension(path)
243            .ok_or_else(|| Error::DimensionNotFound(path.to_string()))
244    }
245
246    pub fn global_attribute(&self, path: &str) -> Result<&crate::types::NcAttribute> {
247        self.root_group()?
248            .attribute(path)
249            .ok_or_else(|| Error::AttributeNotFound(path.to_string()))
250    }
251
252    /// Read a variable's data as a typed array.
253    ///
254    /// Looks up the variable by path relative to the root group, then opens the
255    /// matching HDF5 dataset and reads the data.
256    pub fn read_variable<T: H5Type>(&self, path: &str) -> Result<ArrayD<T>> {
257        let normalized = normalize_dataset_path(path)?;
258        let dataset = self.hdf5.dataset(normalized)?;
259        Ok(dataset.read_array::<T>()?)
260    }
261
262    /// Read a string variable as a single string.
263    pub fn read_variable_as_string(&self, path: &str) -> Result<String> {
264        let mut strings = self.read_variable_as_strings(path)?;
265        match strings.len() {
266            1 => Ok(strings.swap_remove(0)),
267            0 => Err(Error::InvalidData(format!(
268                "variable '{}' contains no string elements",
269                path
270            ))),
271            count => Err(Error::InvalidData(format!(
272                "variable '{}' contains {count} string elements; use read_variable_as_strings()",
273                path
274            ))),
275        }
276    }
277
278    /// Read a string variable as a flat vector of strings.
279    pub fn read_variable_as_strings(&self, path: &str) -> Result<Vec<String>> {
280        let normalized = normalize_dataset_path(path)?;
281        let dataset = self.hdf5.dataset(normalized)?;
282        let dtype = dataset_nc_type(&dataset)?;
283        if dtype != NcType::String {
284            return Err(Error::TypeMismatch {
285                expected: "String".to_string(),
286                actual: format!("{dtype:?}"),
287            });
288        }
289        Ok(dataset.read_strings()?)
290    }
291
292    #[cfg(feature = "rayon")]
293    pub fn read_variable_parallel<T: H5Type>(&self, path: &str) -> Result<ArrayD<T>> {
294        let normalized = normalize_dataset_path(path)?;
295        let dataset = self.hdf5.dataset(normalized)?;
296        Ok(dataset.read_array_parallel::<T>()?)
297    }
298
299    #[cfg(feature = "rayon")]
300    pub fn read_variable_in_pool<T: H5Type>(
301        &self,
302        path: &str,
303        pool: &ThreadPool,
304    ) -> Result<ArrayD<T>> {
305        let normalized = normalize_dataset_path(path)?;
306        let dataset = self.hdf5.dataset(normalized)?;
307        Ok(dataset.read_array_in_pool::<T>(pool)?)
308    }
309}
310
311impl Nc4File {
312    fn root_metadata(&self) -> Result<&NcGroup> {
313        if let Some(group) = self.root_metadata.get() {
314            return Ok(group);
315        }
316        let root_metadata = groups::build_root_group_metadata(&self.hdf5, self.metadata_mode)?;
317        let _ = self.root_metadata.set(root_metadata);
318        Ok(self
319            .root_metadata
320            .get()
321            .expect("root metadata must be initialized after successful build"))
322    }
323}
324
325impl Nc4File {
326    /// Read a variable with automatic type promotion to f64.
327    ///
328    /// Reads in the native HDF5 type and promotes to f64 via `mapv`.
329    pub fn read_variable_as_f64(&self, path: &str) -> Result<ArrayD<f64>> {
330        let normalized = normalize_dataset_path(path)?;
331        let dataset = self.hdf5.dataset(normalized)?;
332        let dtype = dataset_nc_type(&dataset)?;
333        dispatch_read_as_f64!(&dtype, |T| dataset.read_array::<T>())
334    }
335
336    /// Read a slice of a variable with automatic type promotion to f64.
337    pub fn read_variable_slice_as_f64(
338        &self,
339        path: &str,
340        selection: &crate::types::NcSliceInfo,
341    ) -> Result<ArrayD<f64>> {
342        let normalized = normalize_dataset_path(path)?;
343        let dataset = self.hdf5.dataset(normalized)?;
344        let hdf5_sel = selection.to_hdf5_slice_info();
345        let dtype = dataset_nc_type(&dataset)?;
346        dispatch_read_as_f64!(&dtype, |T| dataset.read_slice::<T>(&hdf5_sel))
347    }
348
349    /// Read a typed slice of a variable (NC4 delegation).
350    pub fn read_variable_slice<T: H5Type>(
351        &self,
352        path: &str,
353        selection: &crate::types::NcSliceInfo,
354    ) -> Result<ArrayD<T>> {
355        let normalized = normalize_dataset_path(path)?;
356        let dataset = self.hdf5.dataset(normalized)?;
357        let hdf5_sel = selection.to_hdf5_slice_info();
358        Ok(dataset.read_slice::<T>(&hdf5_sel)?)
359    }
360
361    /// Read a typed slice of a variable using chunk-level parallelism.
362    ///
363    /// Chunked datasets decompress overlapping chunks in parallel via Rayon.
364    /// Non-chunked layouts fall back to `read_variable_slice`.
365    #[cfg(feature = "rayon")]
366    pub fn read_variable_slice_parallel<T: H5Type>(
367        &self,
368        path: &str,
369        selection: &crate::types::NcSliceInfo,
370    ) -> Result<ArrayD<T>> {
371        let normalized = normalize_dataset_path(path)?;
372        let dataset = self.hdf5.dataset(normalized)?;
373        let hdf5_sel = selection.to_hdf5_slice_info();
374        Ok(dataset.read_slice_parallel::<T>(&hdf5_sel)?)
375    }
376}
377
378fn normalize_dataset_path(path: &str) -> Result<&str> {
379    let trimmed = path.trim_matches('/');
380    if trimmed.is_empty() {
381        return Err(Error::VariableNotFound(path.to_string()));
382    }
383    Ok(trimmed)
384}
385
386fn normalize_group_path(path: &str) -> Result<&str> {
387    Ok(path.trim_matches('/'))
388}
389
390fn dataset_nc_type(dataset: &hdf5_reader::Dataset) -> Result<NcType> {
391    self::types::hdf5_to_nc_type(dataset.dtype()).map_err(|err| {
392        Error::InvalidData(format!(
393            "dataset '{}' cannot be mapped to a NetCDF-4 type: {err}",
394            dataset.name()
395        ))
396    })
397}