Skip to main content

netcdf_reader/nc4/
dimensions.rs

1//! Reconstruct NetCDF-4 dimensions from HDF5 metadata.
2//!
3//! NetCDF-4 stores dimension information as:
4//! - Scale datasets (one per dimension) with `CLASS=DIMENSION_SCALE` attribute
5//! - `DIMENSION_LIST` attribute on each variable dataset referencing the scales
6//! - `REFERENCE_LIST` attribute on scale datasets (back-references)
7//! - `_Netcdf4Dimid` attribute to assign stable dimension IDs
8//! - `NAME` attribute on scale datasets gives the dimension name
9//!
10//! Unlimited dimensions are represented by chunked datasets whose maximum
11//! dimension in the dataspace is `H5S_UNLIMITED`.
12
13use std::collections::HashMap;
14
15use hdf5_reader::group::Group;
16
17use crate::error::{Error, Result};
18use crate::types::NcDimension;
19
20fn leaf_name(name: &str) -> &str {
21    name.rsplit('/').next().unwrap_or(name)
22}
23
24/// Extract dimensions from an HDF5 group.
25///
26/// Returns a tuple of:
27/// - The list of dimensions (sorted by `_Netcdf4Dimid` if available)
28/// - A map from dataset object-header address to the corresponding dimension
29///
30/// The address map is used by `extract_variables` to resolve `DIMENSION_LIST`
31/// references back to the correct dimension by address rather than by size.
32pub fn extract_dimensions(
33    group: &Group,
34    metadata_mode: crate::NcMetadataMode,
35) -> Result<(Vec<NcDimension>, HashMap<u64, NcDimension>)> {
36    let datasets = group.datasets()?;
37    extract_dimensions_from_datasets(&datasets, metadata_mode)
38}
39
40pub fn extract_dimensions_from_datasets(
41    datasets: &[hdf5_reader::Dataset],
42    metadata_mode: crate::NcMetadataMode,
43) -> Result<(Vec<NcDimension>, HashMap<u64, NcDimension>)> {
44    let mut dims: Vec<(Option<i64>, NcDimension, u64)> = Vec::new();
45
46    for ds in datasets {
47        if let Some((dimid, dim, address)) = extract_dimension_entry(ds, metadata_mode)? {
48            dims.push((dimid, dim, address));
49        }
50    }
51
52    // Sort by _Netcdf4Dimid if available, otherwise preserve order
53    dims.sort_by_key(|(id, _, _)| id.unwrap_or(i64::MAX));
54
55    let addr_map: HashMap<u64, NcDimension> =
56        dims.iter().map(|(_, d, addr)| (*addr, d.clone())).collect();
57
58    let dim_list: Vec<NcDimension> = dims.into_iter().map(|(_, d, _)| d).collect();
59
60    Ok((dim_list, addr_map))
61}
62
63fn extract_dimension_entry(
64    ds: &hdf5_reader::Dataset,
65    metadata_mode: crate::NcMetadataMode,
66) -> Result<Option<(Option<i64>, NcDimension, u64)>> {
67    let strict = metadata_mode == crate::NcMetadataMode::Strict;
68
69    let is_dim_scale = match ds.attribute("CLASS") {
70        Ok(attr) => match attr.read_string() {
71            Ok(value) => value == "DIMENSION_SCALE",
72            Err(err) if strict => {
73                return Err(Error::InvalidData(format!(
74                    "dataset '{}' has unreadable CLASS attribute: {err}",
75                    ds.name()
76                )))
77            }
78            Err(_) => false,
79        },
80        Err(_) => false,
81    };
82
83    if !is_dim_scale {
84        return Ok(None);
85    }
86
87    let name = match ds.attribute("NAME") {
88        Ok(attr) => match attr.read_string() {
89            Ok(value) => {
90                if value.starts_with("This is a netCDF dimension but not a netCDF variable") {
91                    leaf_name(ds.name()).to_string()
92                } else {
93                    value
94                }
95            }
96            Err(err) if strict => {
97                return Err(Error::InvalidData(format!(
98                    "dimension scale '{}' has unreadable NAME attribute: {err}",
99                    ds.name()
100                )))
101            }
102            Err(_) => leaf_name(ds.name()).to_string(),
103        },
104        Err(_) => leaf_name(ds.name()).to_string(),
105    };
106
107    let shape = ds.shape();
108    let size = if shape.is_empty() { 0 } else { shape[0] };
109    let is_unlimited = ds
110        .max_dims()
111        .is_some_and(|md| !md.is_empty() && md[0] == u64::MAX);
112    let dimid = match ds.attribute("_Netcdf4Dimid") {
113        Ok(attr) => match attr.read_scalar::<i32>() {
114            Ok(id) => Some(id as i64),
115            Err(err) if strict => {
116                return Err(Error::InvalidData(format!(
117                    "dimension scale '{}' has unreadable _Netcdf4Dimid attribute: {err}",
118                    ds.name()
119                )))
120            }
121            Err(_) => None,
122        },
123        Err(_) => None,
124    };
125
126    Ok(Some((
127        dimid,
128        NcDimension {
129            name,
130            size,
131            is_unlimited,
132        },
133        ds.address(),
134    )))
135}