netcdf_reader/nc4/
variables.rs1use std::collections::HashMap;
8
9use hdf5_reader::group::Group;
10
11use crate::error::Result;
12use crate::types::{NcDimension, NcVariable};
13
14use super::attributes;
15use super::types::hdf5_to_nc_type;
16
17fn leaf_name(name: &str) -> &str {
18 name.rsplit('/').next().unwrap_or(name)
19}
20
21pub fn extract_variables(
29 group: &Group<'_>,
30 dimensions: &[NcDimension],
31 dim_addr_map: &HashMap<u64, NcDimension>,
32) -> Result<Vec<NcVariable>> {
33 let datasets = match group.datasets() {
34 Ok(ds) => ds,
35 Err(_) => return Ok(Vec::new()),
36 };
37 extract_variables_from_datasets(&datasets, group, dimensions, dim_addr_map)
38}
39
40pub fn extract_variables_from_datasets(
41 datasets: &[hdf5_reader::Dataset<'_>],
42 group: &Group<'_>,
43 dimensions: &[NcDimension],
44 dim_addr_map: &HashMap<u64, NcDimension>,
45) -> Result<Vec<NcVariable>> {
46 let mut variables = Vec::new();
47
48 for ds in datasets {
49 let is_dim_scale = ds
51 .attribute("CLASS")
52 .ok()
53 .and_then(|attr| attr.read_string().ok())
54 .map(|s| s == "DIMENSION_SCALE")
55 .unwrap_or(false);
56
57 if is_dim_scale {
58 continue;
59 }
60
61 let nc_type = match hdf5_to_nc_type(ds.dtype()) {
63 Ok(t) => t,
64 Err(_) => continue, };
66
67 let var_dims = resolve_variable_dimensions_from_dimlist(ds, group, dim_addr_map)
69 .unwrap_or_else(|| resolve_variable_dimensions_by_size(ds, dimensions));
70
71 let is_unlimited = var_dims.iter().any(|d| d.is_unlimited);
73
74 let shape = ds.shape();
75 let (data_size, record_size) =
76 compute_storage_sizes(shape, nc_type.size() as u64, is_unlimited)?;
77
78 let var_attrs = attributes::extract_variable_attributes(ds)?;
80
81 variables.push(NcVariable {
82 name: leaf_name(ds.name()).to_string(),
83 dimensions: var_dims,
84 dtype: nc_type,
85 attributes: var_attrs,
86 data_offset: ds.address(),
87 _data_size: data_size,
88 is_record_var: is_unlimited,
89 record_size,
90 });
91 }
92
93 Ok(variables)
94}
95
96fn resolve_variable_dimensions_from_dimlist(
104 ds: &hdf5_reader::Dataset<'_>,
105 group: &Group<'_>,
106 dim_addr_map: &HashMap<u64, NcDimension>,
107) -> Option<Vec<NcDimension>> {
108 let attr = ds.attribute("DIMENSION_LIST").ok()?;
109 let raw_data = &attr.raw_data;
110 let ndim = ds.ndim();
111 let offset_size = group.offset_size();
112 let file_data = group.file_data();
113
114 if raw_data.is_empty() || ndim == 0 {
115 return None;
116 }
117
118 let entry_size = 4 + offset_size as usize + 4;
123 if raw_data.len() < ndim * entry_size {
124 return None;
125 }
126
127 let mut var_dims = Vec::with_capacity(ndim);
128 let mut cursor = hdf5_reader::io::Cursor::new(raw_data);
129
130 for _ in 0..ndim {
131 let seq_len = cursor.read_u32_le().ok()? as usize;
132 let heap_addr = cursor.read_offset(offset_size).ok()?;
133 let heap_idx = cursor.read_u32_le().ok()? as u16;
134
135 if seq_len == 0 || hdf5_reader::io::Cursor::is_undefined_offset(heap_addr, offset_size) {
136 return None;
138 }
139
140 let mut heap_cursor = hdf5_reader::io::Cursor::new(file_data);
142 heap_cursor.set_position(heap_addr);
143 let collection = hdf5_reader::global_heap::GlobalHeapCollection::parse(
144 &mut heap_cursor,
145 offset_size,
146 group.length_size(),
147 )
148 .ok()?;
149
150 let heap_obj = collection.get_object(heap_idx)?;
151
152 let refs =
155 hdf5_reader::reference::read_object_references(&heap_obj.data, offset_size).ok()?;
156
157 if refs.is_empty() {
158 return None;
159 }
160
161 let dim_addr = refs[0];
163 if let Some(dim) = dim_addr_map.get(&dim_addr) {
164 var_dims.push(dim.clone());
165 } else {
166 return None;
168 }
169 }
170
171 if let Some(max_dims) = ds.max_dims() {
173 for (i, md) in max_dims.iter().enumerate() {
174 if *md == u64::MAX && i < var_dims.len() {
175 var_dims[i].is_unlimited = true;
176 }
177 }
178 }
179
180 Some(var_dims)
181}
182
183fn compute_storage_sizes(shape: &[u64], elem_size: u64, is_unlimited: bool) -> Result<(u64, u64)> {
184 let total_elements =
185 crate::types::checked_shape_elements(shape, "NetCDF-4 variable element count")?;
186 let data_size = crate::types::checked_mul_u64(
187 total_elements,
188 elem_size,
189 "NetCDF-4 variable size in bytes",
190 )?;
191
192 let record_elements = if is_unlimited && shape.len() > 1 {
193 crate::types::checked_shape_elements(&shape[1..], "NetCDF-4 record element count")?
194 } else {
195 1
196 };
197 let record_size =
198 crate::types::checked_mul_u64(record_elements, elem_size, "NetCDF-4 record size in bytes")?;
199
200 Ok((data_size, record_size))
201}
202
203fn resolve_variable_dimensions_by_size(
206 ds: &hdf5_reader::Dataset<'_>,
207 dimensions: &[NcDimension],
208) -> Vec<NcDimension> {
209 let shape = ds.shape();
210
211 let mut var_dims = Vec::with_capacity(shape.len());
214 let mut used = vec![false; dimensions.len()];
215
216 for &dim_size in shape {
217 let mut matched = false;
218 for (i, dim) in dimensions.iter().enumerate() {
219 if !used[i] && dim.size == dim_size {
220 var_dims.push(dim.clone());
221 used[i] = true;
222 matched = true;
223 break;
224 }
225 }
226 if !matched {
227 var_dims.push(NcDimension {
229 name: format!("dim_{}", dim_size),
230 size: dim_size,
231 is_unlimited: false,
232 });
233 }
234 }
235
236 if let Some(max_dims) = ds.max_dims() {
238 for (i, md) in max_dims.iter().enumerate() {
239 if *md == u64::MAX && i < var_dims.len() {
240 var_dims[i].is_unlimited = true;
241 }
242 }
243 }
244
245 var_dims
246}
247
248#[cfg(test)]
249mod tests {
250 use super::compute_storage_sizes;
251
252 #[test]
253 fn test_compute_storage_sizes_detects_overflow() {
254 let err = compute_storage_sizes(&[u64::MAX, 2], 8, false).unwrap_err();
255 assert!(matches!(err, crate::Error::InvalidData(_)));
256 }
257
258 #[test]
259 fn test_compute_storage_sizes_record_dims() {
260 let (data_size, record_size) = compute_storage_sizes(&[10, 3, 4], 4, true).unwrap();
261 assert_eq!(data_size, 480);
262 assert_eq!(record_size, 48);
263 }
264}