Skip to main content

netcdf_reader/nc4/
dimensions.rs

1//! Reconstruct NetCDF-4 dimensions from HDF5 metadata.
2//!
3//! NetCDF-4 stores dimension information as:
4//! - Scale datasets (one per dimension) with `CLASS=DIMENSION_SCALE` attribute
5//! - `DIMENSION_LIST` attribute on each variable dataset referencing the scales
6//! - `REFERENCE_LIST` attribute on scale datasets (back-references)
7//! - `_Netcdf4Dimid` attribute to assign stable dimension IDs
8//! - `NAME` attribute on scale datasets gives the dimension name
9//!
10//! Unlimited dimensions are represented by chunked datasets whose maximum
11//! dimension in the dataspace is `H5S_UNLIMITED`.
12
13use std::collections::HashMap;
14
15use hdf5_reader::group::Group;
16
17use crate::error::Result;
18use crate::types::NcDimension;
19
20fn leaf_name(name: &str) -> &str {
21    name.rsplit('/').next().unwrap_or(name)
22}
23
24/// Extract dimensions from an HDF5 group.
25///
26/// Returns a tuple of:
27/// - The list of dimensions (sorted by `_Netcdf4Dimid` if available)
28/// - A map from dataset object-header address to the corresponding dimension
29///
30/// The address map is used by `extract_variables` to resolve `DIMENSION_LIST`
31/// references back to the correct dimension by address rather than by size.
32pub fn extract_dimensions(
33    group: &Group<'_>,
34) -> Result<(Vec<NcDimension>, HashMap<u64, NcDimension>)> {
35    let datasets = match group.datasets() {
36        Ok(ds) => ds,
37        Err(_) => return Ok((Vec::new(), HashMap::new())),
38    };
39    extract_dimensions_from_datasets(&datasets)
40}
41
42pub fn extract_dimensions_from_datasets(
43    datasets: &[hdf5_reader::Dataset<'_>],
44) -> Result<(Vec<NcDimension>, HashMap<u64, NcDimension>)> {
45    let mut dims: Vec<(Option<i64>, NcDimension, u64)> = Vec::new();
46
47    for ds in datasets {
48        // Check for CLASS=DIMENSION_SCALE attribute
49        let is_dim_scale = ds
50            .attribute("CLASS")
51            .ok()
52            .and_then(|attr| attr.read_string().ok())
53            .map(|s| s == "DIMENSION_SCALE")
54            .unwrap_or(false);
55
56        if !is_dim_scale {
57            continue;
58        }
59
60        // Get dimension name from NAME attribute, falling back to dataset name
61        let name = ds
62            .attribute("NAME")
63            .ok()
64            .and_then(|attr| attr.read_string().ok())
65            .map(|s| {
66                // NetCDF-4 uses "This is a netCDF dimension but not a netCDF variable."
67                // as a sentinel for anonymous dimensions. In that case, use the dataset name.
68                if s.starts_with("This is a netCDF dimension but not a netCDF variable") {
69                    leaf_name(ds.name()).to_string()
70                } else {
71                    s
72                }
73            })
74            .unwrap_or_else(|| leaf_name(ds.name()).to_string());
75
76        // Get current size from dataspace
77        let shape = ds.shape();
78        let size = if shape.is_empty() { 0 } else { shape[0] };
79
80        // Check max dims for unlimited
81        let is_unlimited = ds
82            .max_dims()
83            .is_some_and(|md| !md.is_empty() && md[0] == u64::MAX);
84
85        // Get stable ordering from _Netcdf4Dimid
86        let dimid = ds
87            .attribute("_Netcdf4Dimid")
88            .ok()
89            .and_then(|attr| attr.read_scalar::<i32>().ok())
90            .map(|id| id as i64);
91
92        let address = ds.address();
93
94        dims.push((
95            dimid,
96            NcDimension {
97                name,
98                size,
99                is_unlimited,
100            },
101            address,
102        ));
103    }
104
105    // Sort by _Netcdf4Dimid if available, otherwise preserve order
106    dims.sort_by_key(|(id, _, _)| id.unwrap_or(i64::MAX));
107
108    let addr_map: HashMap<u64, NcDimension> =
109        dims.iter().map(|(_, d, addr)| (*addr, d.clone())).collect();
110
111    let dim_list: Vec<NcDimension> = dims.into_iter().map(|(_, d, _)| d).collect();
112
113    Ok((dim_list, addr_map))
114}