netcdf_reader/nc4/dimensions.rs
1//! Reconstruct NetCDF-4 dimensions from HDF5 metadata.
2//!
3//! NetCDF-4 stores dimension information as:
4//! - Scale datasets (one per dimension) with `CLASS=DIMENSION_SCALE` attribute
5//! - `DIMENSION_LIST` attribute on each variable dataset referencing the scales
6//! - `REFERENCE_LIST` attribute on scale datasets (back-references)
7//! - `_Netcdf4Dimid` attribute to assign stable dimension IDs
8//! - `NAME` attribute on scale datasets gives the dimension name
9//!
10//! Unlimited dimensions are represented by chunked datasets whose maximum
11//! dimension in the dataspace is `H5S_UNLIMITED`.
12
13use std::collections::HashMap;
14
15use hdf5_reader::group::Group;
16
17use crate::error::Result;
18use crate::types::NcDimension;
19
20fn leaf_name(name: &str) -> &str {
21 name.rsplit('/').next().unwrap_or(name)
22}
23
24/// Extract dimensions from an HDF5 group.
25///
26/// Returns a tuple of:
27/// - The list of dimensions (sorted by `_Netcdf4Dimid` if available)
28/// - A map from dataset object-header address to the corresponding dimension
29///
30/// The address map is used by `extract_variables` to resolve `DIMENSION_LIST`
31/// references back to the correct dimension by address rather than by size.
32pub fn extract_dimensions(
33 group: &Group<'_>,
34) -> Result<(Vec<NcDimension>, HashMap<u64, NcDimension>)> {
35 let datasets = match group.datasets() {
36 Ok(ds) => ds,
37 Err(_) => return Ok((Vec::new(), HashMap::new())),
38 };
39 extract_dimensions_from_datasets(&datasets)
40}
41
42pub fn extract_dimensions_from_datasets(
43 datasets: &[hdf5_reader::Dataset<'_>],
44) -> Result<(Vec<NcDimension>, HashMap<u64, NcDimension>)> {
45 let mut dims: Vec<(Option<i64>, NcDimension, u64)> = Vec::new();
46
47 for ds in datasets {
48 // Check for CLASS=DIMENSION_SCALE attribute
49 let is_dim_scale = ds
50 .attribute("CLASS")
51 .ok()
52 .and_then(|attr| attr.read_string().ok())
53 .map(|s| s == "DIMENSION_SCALE")
54 .unwrap_or(false);
55
56 if !is_dim_scale {
57 continue;
58 }
59
60 // Get dimension name from NAME attribute, falling back to dataset name
61 let name = ds
62 .attribute("NAME")
63 .ok()
64 .and_then(|attr| attr.read_string().ok())
65 .map(|s| {
66 // NetCDF-4 uses "This is a netCDF dimension but not a netCDF variable."
67 // as a sentinel for anonymous dimensions. In that case, use the dataset name.
68 if s.starts_with("This is a netCDF dimension but not a netCDF variable") {
69 leaf_name(ds.name()).to_string()
70 } else {
71 s
72 }
73 })
74 .unwrap_or_else(|| leaf_name(ds.name()).to_string());
75
76 // Get current size from dataspace
77 let shape = ds.shape();
78 let size = if shape.is_empty() { 0 } else { shape[0] };
79
80 // Check max dims for unlimited
81 let is_unlimited = ds
82 .max_dims()
83 .is_some_and(|md| !md.is_empty() && md[0] == u64::MAX);
84
85 // Get stable ordering from _Netcdf4Dimid
86 let dimid = ds
87 .attribute("_Netcdf4Dimid")
88 .ok()
89 .and_then(|attr| attr.read_scalar::<i32>().ok())
90 .map(|id| id as i64);
91
92 let address = ds.address();
93
94 dims.push((
95 dimid,
96 NcDimension {
97 name,
98 size,
99 is_unlimited,
100 },
101 address,
102 ));
103 }
104
105 // Sort by _Netcdf4Dimid if available, otherwise preserve order
106 dims.sort_by_key(|(id, _, _)| id.unwrap_or(i64::MAX));
107
108 let addr_map: HashMap<u64, NcDimension> =
109 dims.iter().map(|(_, d, addr)| (*addr, d.clone())).collect();
110
111 let dim_list: Vec<NcDimension> = dims.into_iter().map(|(_, d, _)| d).collect();
112
113 Ok((dim_list, addr_map))
114}