Skip to main content

netcdf_reader/nc4/
mod.rs

1//! NetCDF-4 (HDF5-backed) format support.
2//!
3//! This module maps the HDF5 data model to the NetCDF data model:
4//! - HDF5 groups become NetCDF groups
5//! - HDF5 datasets become NetCDF variables
6//! - HDF5 attributes become NetCDF attributes
7//! - Dimensions are reconstructed from `DIMENSION_LIST` and `_Netcdf4Dimid` attributes
8//!
9//! Requires the `netcdf4` feature (enabled by default).
10
11pub mod attributes;
12pub mod dimensions;
13pub mod groups;
14pub mod types;
15pub mod variables;
16
17use std::path::Path;
18use std::sync::OnceLock;
19
20use hdf5_reader::datatype_api::H5Type;
21use hdf5_reader::storage::DynStorage;
22use hdf5_reader::Hdf5File;
23use ndarray::ArrayD;
24#[cfg(feature = "rayon")]
25use rayon::ThreadPool;
26
27use crate::error::{Error, Result};
28use crate::types::{NcGroup, NcType};
29
30/// Dispatch on `NcType` to read data and promote to `f64`.
31///
32/// `$dtype` must be an expression of type `&NcType`.
33/// `$read_expr` is a macro-like callback: for each numeric type `$T`,
34/// the macro evaluates `$read_expr` with `$T` substituted in.
35///
36/// Usage:
37/// ```ignore
38/// dispatch_read_as_f64!(&var.dtype, |$T| dataset.read_array::<$T>())
39/// ```
40macro_rules! dispatch_read_as_f64 {
41    ($dtype:expr, |$T:ident| $read_expr:expr) => {{
42        use crate::types::NcType;
43        match $dtype {
44            NcType::Byte => {
45                type $T = i8;
46                let arr = $read_expr?;
47                Ok(arr.mapv(|v| v as f64))
48            }
49            NcType::Short => {
50                type $T = i16;
51                let arr = $read_expr?;
52                Ok(arr.mapv(|v| v as f64))
53            }
54            NcType::Int => {
55                type $T = i32;
56                let arr = $read_expr?;
57                Ok(arr.mapv(|v| v as f64))
58            }
59            NcType::Float => {
60                type $T = f32;
61                let arr = $read_expr?;
62                Ok(arr.mapv(|v| v as f64))
63            }
64            NcType::Double => {
65                type $T = f64;
66                Ok($read_expr?)
67            }
68            NcType::UByte => {
69                type $T = u8;
70                let arr = $read_expr?;
71                Ok(arr.mapv(|v| v as f64))
72            }
73            NcType::UShort => {
74                type $T = u16;
75                let arr = $read_expr?;
76                Ok(arr.mapv(|v| v as f64))
77            }
78            NcType::UInt => {
79                type $T = u32;
80                let arr = $read_expr?;
81                Ok(arr.mapv(|v| v as f64))
82            }
83            NcType::Int64 => {
84                type $T = i64;
85                let arr = $read_expr?;
86                Ok(arr.mapv(|v| v as f64))
87            }
88            NcType::UInt64 => {
89                type $T = u64;
90                let arr = $read_expr?;
91                Ok(arr.mapv(|v| v as f64))
92            }
93            NcType::Char => Err(Error::TypeMismatch {
94                expected: "numeric type".to_string(),
95                actual: "Char".to_string(),
96            }),
97            NcType::String => Err(Error::TypeMismatch {
98                expected: "numeric type".to_string(),
99                actual: "String".to_string(),
100            }),
101            other => Err(Error::TypeMismatch {
102                expected: "numeric type".to_string(),
103                actual: format!("{:?}", other),
104            }),
105        }
106    }};
107}
108
109/// An opened NetCDF-4 file (backed by HDF5).
110pub struct Nc4File {
111    hdf5: Hdf5File,
112    metadata_mode: crate::NcMetadataMode,
113    root_metadata: OnceLock<NcGroup>,
114    metadata_tree: OnceLock<NcGroup>,
115}
116
117impl Nc4File {
118    pub(crate) fn from_hdf5(hdf5: Hdf5File, metadata_mode: crate::NcMetadataMode) -> Result<Self> {
119        Ok(Nc4File {
120            hdf5,
121            metadata_mode,
122            root_metadata: OnceLock::new(),
123            metadata_tree: OnceLock::new(),
124        })
125    }
126
127    /// Open a NetCDF-4 file from disk.
128    pub fn open(path: &Path) -> Result<Self> {
129        Self::open_with_options(path, crate::NcOpenOptions::default())
130    }
131
132    /// Open a NetCDF-4 file from disk with custom options.
133    pub fn open_with_options(path: &Path, options: crate::NcOpenOptions) -> Result<Self> {
134        let metadata_mode = options.metadata_mode;
135        let hdf5 = Hdf5File::open_with_options(
136            path,
137            hdf5_reader::OpenOptions {
138                chunk_cache_bytes: options.chunk_cache_bytes,
139                chunk_cache_slots: options.chunk_cache_slots,
140                filter_registry: options.filter_registry,
141                external_file_resolver: options.external_file_resolver,
142                external_link_resolver: options.external_link_resolver,
143            },
144        )?;
145        Nc4File::from_hdf5(hdf5, metadata_mode)
146    }
147
148    /// Open a NetCDF-4 file from in-memory bytes.
149    pub fn from_bytes(data: &[u8]) -> Result<Self> {
150        Self::from_bytes_with_options(data, crate::NcOpenOptions::default())
151    }
152
153    /// Open a NetCDF-4 file from in-memory bytes with custom options.
154    pub fn from_bytes_with_options(data: &[u8], options: crate::NcOpenOptions) -> Result<Self> {
155        let metadata_mode = options.metadata_mode;
156        let hdf5 = Hdf5File::from_bytes_with_options(
157            data,
158            hdf5_reader::OpenOptions {
159                chunk_cache_bytes: options.chunk_cache_bytes,
160                chunk_cache_slots: options.chunk_cache_slots,
161                filter_registry: options.filter_registry,
162                external_file_resolver: options.external_file_resolver,
163                external_link_resolver: options.external_link_resolver,
164            },
165        )?;
166        Nc4File::from_hdf5(hdf5, metadata_mode)
167    }
168
169    /// Open a NetCDF-4 file from a custom random-access storage backend.
170    pub fn from_storage(storage: DynStorage) -> Result<Self> {
171        Self::from_storage_with_options(storage, crate::NcOpenOptions::default())
172    }
173
174    /// Open a NetCDF-4 file from a custom random-access storage backend with custom options.
175    pub fn from_storage_with_options(
176        storage: DynStorage,
177        options: crate::NcOpenOptions,
178    ) -> Result<Self> {
179        let metadata_mode = options.metadata_mode;
180        let hdf5 = Hdf5File::from_storage_with_options(
181            storage,
182            hdf5_reader::OpenOptions {
183                chunk_cache_bytes: options.chunk_cache_bytes,
184                chunk_cache_slots: options.chunk_cache_slots,
185                filter_registry: options.filter_registry,
186                external_file_resolver: options.external_file_resolver,
187                external_link_resolver: options.external_link_resolver,
188            },
189        )?;
190        Nc4File::from_hdf5(hdf5, metadata_mode)
191    }
192
193    /// The root group.
194    pub fn root_group(&self) -> Result<&NcGroup> {
195        if let Some(group) = self.metadata_tree.get() {
196            return Ok(group);
197        }
198        let metadata_tree = groups::build_root_group(&self.hdf5, self.metadata_mode)?;
199        let _ = self.metadata_tree.set(metadata_tree);
200        Ok(self
201            .metadata_tree
202            .get()
203            .expect("metadata tree must be initialized after successful build"))
204    }
205
206    /// Check if this file uses the classic data model (`_nc3_strict`).
207    ///
208    /// This checks the raw HDF5 root group attributes (before the internal
209    /// attribute filter removes `_nc3_strict`).
210    pub fn is_classic_model(&self) -> bool {
211        self.hdf5
212            .root_group()
213            .ok()
214            .and_then(|g| g.attribute("_nc3_strict").ok())
215            .is_some()
216    }
217
218    pub fn dimensions(&self) -> Result<&[crate::types::NcDimension]> {
219        Ok(&self.root_metadata()?.dimensions)
220    }
221
222    pub fn variables(&self) -> Result<&[crate::types::NcVariable]> {
223        Ok(&self.root_metadata()?.variables)
224    }
225
226    pub fn global_attributes(&self) -> Result<&[crate::types::NcAttribute]> {
227        Ok(&self.root_metadata()?.attributes)
228    }
229
230    pub fn group(&self, path: &str) -> Result<&NcGroup> {
231        let normalized = normalize_group_path(path)?;
232        let root = self.root_group()?;
233        if normalized.is_empty() {
234            return Ok(root);
235        }
236        root.group(normalized)
237            .ok_or_else(|| Error::GroupNotFound(path.to_string()))
238    }
239
240    pub fn variable(&self, path: &str) -> Result<&crate::types::NcVariable> {
241        self.root_group()?
242            .variable(path)
243            .ok_or_else(|| Error::VariableNotFound(path.to_string()))
244    }
245
246    pub fn dimension(&self, path: &str) -> Result<&crate::types::NcDimension> {
247        self.root_group()?
248            .dimension(path)
249            .ok_or_else(|| Error::DimensionNotFound(path.to_string()))
250    }
251
252    pub fn global_attribute(&self, path: &str) -> Result<&crate::types::NcAttribute> {
253        self.root_group()?
254            .attribute(path)
255            .ok_or_else(|| Error::AttributeNotFound(path.to_string()))
256    }
257
258    /// Read a variable's data as a typed array.
259    ///
260    /// Looks up the variable by path relative to the root group, then opens the
261    /// matching HDF5 dataset and reads the data.
262    pub fn read_variable<T: H5Type>(&self, path: &str) -> Result<ArrayD<T>> {
263        let normalized = normalize_dataset_path(path)?;
264        let dataset = self.hdf5.dataset(normalized)?;
265        Ok(dataset.read_array::<T>()?)
266    }
267
268    /// Read a variable into a caller-provided typed buffer.
269    pub fn read_variable_into<T: H5Type>(&self, path: &str, dst: &mut [T]) -> Result<()> {
270        let normalized = normalize_dataset_path(path)?;
271        let dataset = self.hdf5.dataset(normalized)?;
272        Ok(dataset.read_into::<T>(dst)?)
273    }
274
275    /// Read a variable as logical raw bytes in HDF5 datatype byte order.
276    pub fn read_variable_raw_bytes(&self, path: &str) -> Result<Vec<u8>> {
277        let normalized = normalize_dataset_path(path)?;
278        let dataset = self.hdf5.dataset(normalized)?;
279        Ok(dataset.read_raw_bytes()?)
280    }
281
282    /// Read logical raw bytes into a caller-provided buffer.
283    pub fn read_variable_raw_bytes_into(&self, path: &str, dst: &mut [u8]) -> Result<()> {
284        let normalized = normalize_dataset_path(path)?;
285        let dataset = self.hdf5.dataset(normalized)?;
286        Ok(dataset.read_raw_bytes_into(dst)?)
287    }
288
289    /// Read a variable as logical raw bytes with numeric fields in native endian.
290    pub fn read_variable_native_bytes(&self, path: &str) -> Result<Vec<u8>> {
291        let normalized = normalize_dataset_path(path)?;
292        let dataset = self.hdf5.dataset(normalized)?;
293        Ok(dataset.read_native_bytes()?)
294    }
295
296    /// Read native-endian logical raw bytes into a caller-provided buffer.
297    pub fn read_variable_native_bytes_into(&self, path: &str, dst: &mut [u8]) -> Result<()> {
298        let normalized = normalize_dataset_path(path)?;
299        let dataset = self.hdf5.dataset(normalized)?;
300        Ok(dataset.read_native_bytes_into(dst)?)
301    }
302
303    /// Iterate decoded HDF5 chunks for a chunked variable.
304    pub fn iter_variable_chunks(&self, path: &str) -> Result<hdf5_reader::DatasetChunkIterator> {
305        let normalized = normalize_dataset_path(path)?;
306        let dataset = self.hdf5.dataset(normalized)?;
307        Ok(dataset.iter_chunks()?)
308    }
309
310    /// Return current chunk-cache statistics.
311    pub fn chunk_cache_stats(&self) -> hdf5_reader::ChunkCacheStats {
312        self.hdf5.chunk_cache_stats()
313    }
314
315    /// Read a string variable as a single string.
316    pub fn read_variable_as_string(&self, path: &str) -> Result<String> {
317        let mut strings = self.read_variable_as_strings(path)?;
318        match strings.len() {
319            1 => Ok(strings.swap_remove(0)),
320            0 => Err(Error::InvalidData(format!(
321                "variable '{}' contains no string elements",
322                path
323            ))),
324            count => Err(Error::InvalidData(format!(
325                "variable '{}' contains {count} string elements; use read_variable_as_strings()",
326                path
327            ))),
328        }
329    }
330
331    /// Read a string variable as a flat vector of strings.
332    pub fn read_variable_as_strings(&self, path: &str) -> Result<Vec<String>> {
333        let normalized = normalize_dataset_path(path)?;
334        let dataset = self.hdf5.dataset(normalized)?;
335        let dtype = dataset_nc_type(&dataset)?;
336        if dtype != NcType::String {
337            return Err(Error::TypeMismatch {
338                expected: "String".to_string(),
339                actual: format!("{dtype:?}"),
340            });
341        }
342        Ok(dataset.read_strings()?)
343    }
344
345    /// Read a variable containing NetCDF-4 user-defined values.
346    pub fn read_variable_user_defined(
347        &self,
348        path: &str,
349    ) -> Result<ArrayD<crate::user_defined::NcValue>> {
350        let normalized = normalize_dataset_path(path)?;
351        let dataset = self.hdf5.dataset(normalized)?;
352        crate::user_defined::read_dataset_values(&dataset)
353    }
354
355    /// Read a NetCDF-4 user-defined variable through a custom decoder.
356    pub fn read_variable_user_defined_with<T, F>(&self, path: &str, decoder: F) -> Result<ArrayD<T>>
357    where
358        F: FnMut(crate::user_defined::NcValueView<'_>) -> Result<T>,
359    {
360        let normalized = normalize_dataset_path(path)?;
361        let dataset = self.hdf5.dataset(normalized)?;
362        crate::user_defined::read_dataset_with_decoder(&dataset, decoder)
363    }
364
365    #[cfg(feature = "rayon")]
366    pub fn read_variable_parallel<T: H5Type>(&self, path: &str) -> Result<ArrayD<T>> {
367        let normalized = normalize_dataset_path(path)?;
368        let dataset = self.hdf5.dataset(normalized)?;
369        Ok(dataset.read_array_parallel::<T>()?)
370    }
371
372    #[cfg(feature = "rayon")]
373    pub fn read_variable_in_pool<T: H5Type>(
374        &self,
375        path: &str,
376        pool: &ThreadPool,
377    ) -> Result<ArrayD<T>> {
378        let normalized = normalize_dataset_path(path)?;
379        let dataset = self.hdf5.dataset(normalized)?;
380        Ok(dataset.read_array_in_pool::<T>(pool)?)
381    }
382}
383
384impl Nc4File {
385    fn root_metadata(&self) -> Result<&NcGroup> {
386        if let Some(group) = self.root_metadata.get() {
387            return Ok(group);
388        }
389        let root_metadata = groups::build_root_group_metadata(&self.hdf5, self.metadata_mode)?;
390        let _ = self.root_metadata.set(root_metadata);
391        Ok(self
392            .root_metadata
393            .get()
394            .expect("root metadata must be initialized after successful build"))
395    }
396}
397
398impl Nc4File {
399    /// Read a variable with automatic type promotion to f64.
400    ///
401    /// Reads in the native HDF5 type and promotes to f64 via `mapv`.
402    pub fn read_variable_as_f64(&self, path: &str) -> Result<ArrayD<f64>> {
403        let normalized = normalize_dataset_path(path)?;
404        let dataset = self.hdf5.dataset(normalized)?;
405        let dtype = dataset_nc_type(&dataset)?;
406        dispatch_read_as_f64!(&dtype, |T| dataset.read_array::<T>())
407    }
408
409    /// Read a slice of a variable with automatic type promotion to f64.
410    pub fn read_variable_slice_as_f64(
411        &self,
412        path: &str,
413        selection: &crate::types::NcSliceInfo,
414    ) -> Result<ArrayD<f64>> {
415        let normalized = normalize_dataset_path(path)?;
416        let dataset = self.hdf5.dataset(normalized)?;
417        let hdf5_sel = selection.to_hdf5_slice_info();
418        let dtype = dataset_nc_type(&dataset)?;
419        dispatch_read_as_f64!(&dtype, |T| dataset.read_slice::<T>(&hdf5_sel))
420    }
421
422    /// Read a typed slice of a variable (NC4 delegation).
423    pub fn read_variable_slice<T: H5Type>(
424        &self,
425        path: &str,
426        selection: &crate::types::NcSliceInfo,
427    ) -> Result<ArrayD<T>> {
428        let normalized = normalize_dataset_path(path)?;
429        let dataset = self.hdf5.dataset(normalized)?;
430        let hdf5_sel = selection.to_hdf5_slice_info();
431        Ok(dataset.read_slice::<T>(&hdf5_sel)?)
432    }
433
434    /// Read a typed slice of a variable using chunk-level parallelism.
435    ///
436    /// Chunked datasets decompress overlapping chunks in parallel via Rayon.
437    /// Non-chunked layouts fall back to `read_variable_slice`.
438    #[cfg(feature = "rayon")]
439    pub fn read_variable_slice_parallel<T: H5Type>(
440        &self,
441        path: &str,
442        selection: &crate::types::NcSliceInfo,
443    ) -> Result<ArrayD<T>> {
444        let normalized = normalize_dataset_path(path)?;
445        let dataset = self.hdf5.dataset(normalized)?;
446        let hdf5_sel = selection.to_hdf5_slice_info();
447        Ok(dataset.read_slice_parallel::<T>(&hdf5_sel)?)
448    }
449}
450
451fn normalize_dataset_path(path: &str) -> Result<&str> {
452    let trimmed = path.trim_matches('/');
453    if trimmed.is_empty() {
454        return Err(Error::VariableNotFound(path.to_string()));
455    }
456    Ok(trimmed)
457}
458
459fn normalize_group_path(path: &str) -> Result<&str> {
460    Ok(path.trim_matches('/'))
461}
462
463fn dataset_nc_type(dataset: &hdf5_reader::Dataset) -> Result<NcType> {
464    self::types::hdf5_to_nc_type(dataset.dtype()).map_err(|err| {
465        Error::InvalidData(format!(
466            "dataset '{}' cannot be mapped to a NetCDF-4 type: {err}",
467            dataset.name()
468        ))
469    })
470}