1use std::collections::HashMap;
10
11use hdf5_reader::group::Group;
12
13use crate::error::{Error, Result};
14use crate::types::{NcDimension, NcVariable};
15
16use super::attributes;
17use super::types::hdf5_to_nc_type;
18
19fn leaf_name(name: &str) -> &str {
20 name.rsplit('/').next().unwrap_or(name)
21}
22
23pub fn extract_variables(
31 group: &Group,
32 dimensions: &[NcDimension],
33 dim_addr_map: &HashMap<u64, NcDimension>,
34 metadata_mode: crate::NcMetadataMode,
35) -> Result<Vec<NcVariable>> {
36 let datasets = group.datasets()?;
37 extract_variables_from_datasets(&datasets, group, dimensions, dim_addr_map, metadata_mode)
38}
39
40pub fn extract_variables_from_datasets(
41 datasets: &[hdf5_reader::Dataset],
42 group: &Group,
43 dimensions: &[NcDimension],
44 dim_addr_map: &HashMap<u64, NcDimension>,
45 metadata_mode: crate::NcMetadataMode,
46) -> Result<Vec<NcVariable>> {
47 let mut variables = Vec::new();
48
49 for ds in datasets {
50 if let Some(variable) =
51 extract_variable(ds, group, dimensions, dim_addr_map, metadata_mode)?
52 {
53 variables.push(variable);
54 }
55 }
56
57 Ok(variables)
58}
59
60pub fn extract_variable(
61 ds: &hdf5_reader::Dataset,
62 group: &Group,
63 dimensions: &[NcDimension],
64 dim_addr_map: &HashMap<u64, NcDimension>,
65 metadata_mode: crate::NcMetadataMode,
66) -> Result<Option<NcVariable>> {
67 let strict = metadata_mode == crate::NcMetadataMode::Strict;
68
69 let is_dim_scale = match ds.attribute("CLASS") {
70 Ok(attr) => match attr.read_string() {
71 Ok(value) => value == "DIMENSION_SCALE",
72 Err(err) if strict => {
73 return Err(Error::InvalidData(format!(
74 "dataset '{}' has unreadable CLASS attribute: {err}",
75 ds.name()
76 )))
77 }
78 Err(_) => false,
79 },
80 Err(_) => false,
81 };
82
83 if is_dim_scale && is_dimension_only_scale(ds, strict)? {
84 return Ok(None);
85 }
86
87 let nc_type = match hdf5_to_nc_type(ds.dtype()) {
88 Ok(t) => t,
89 Err(err) if strict => {
90 return Err(Error::InvalidData(format!(
91 "dataset '{}' uses unsupported NetCDF-4 type: {err}",
92 ds.name()
93 )))
94 }
95 Err(_) => return Ok(None),
96 };
97
98 let var_dims = if is_dim_scale {
99 resolve_coordinate_variable_dimensions(ds, dim_addr_map, metadata_mode)?
100 } else {
101 resolve_variable_dimensions(ds, group, dimensions, dim_addr_map, metadata_mode)?
102 };
103 let is_unlimited = var_dims.iter().any(|d| d.is_unlimited);
104 let shape = ds.shape();
105 let (data_size, record_size) =
106 compute_storage_sizes(shape, nc_type.size() as u64, is_unlimited)?;
107 let var_attrs = attributes::extract_variable_attributes(ds, metadata_mode)?;
108
109 Ok(Some(NcVariable {
110 name: leaf_name(ds.name()).to_string(),
111 dimensions: var_dims,
112 dtype: nc_type,
113 attributes: var_attrs,
114 data_offset: ds.address(),
115 _data_size: data_size,
116 is_record_var: is_unlimited,
117 record_size,
118 }))
119}
120
121fn is_dimension_only_scale(ds: &hdf5_reader::Dataset, strict: bool) -> Result<bool> {
122 match ds.attribute("NAME") {
123 Ok(attr) => match attr.read_string() {
124 Ok(value) => Ok(super::dimensions::is_dimension_without_variable_name(
125 &value,
126 )),
127 Err(err) if strict => Err(Error::InvalidData(format!(
128 "dimension scale '{}' has unreadable NAME attribute: {err}",
129 ds.name()
130 ))),
131 Err(_) => Ok(false),
132 },
133 Err(_) => Ok(false),
134 }
135}
136
137fn resolve_coordinate_variable_dimensions(
138 ds: &hdf5_reader::Dataset,
139 dim_addr_map: &HashMap<u64, NcDimension>,
140 metadata_mode: crate::NcMetadataMode,
141) -> Result<Vec<NcDimension>> {
142 if let Some(dim) = dim_addr_map.get(&ds.address()) {
143 Ok(vec![dim.clone()])
144 } else if metadata_mode == crate::NcMetadataMode::Lossy {
145 Ok(vec![NcDimension {
146 name: leaf_name(ds.name()).to_string(),
147 size: ds.shape().first().copied().unwrap_or(0),
148 is_unlimited: ds
149 .max_dims()
150 .is_some_and(|md| !md.is_empty() && md[0] == u64::MAX),
151 }])
152 } else {
153 Err(Error::InvalidData(format!(
154 "coordinate variable '{}' is not registered as a dimension scale",
155 ds.name()
156 )))
157 }
158}
159
160fn resolve_variable_dimensions(
167 ds: &hdf5_reader::Dataset,
168 group: &Group,
169 dimensions: &[NcDimension],
170 dim_addr_map: &HashMap<u64, NcDimension>,
171 metadata_mode: crate::NcMetadataMode,
172) -> Result<Vec<NcDimension>> {
173 resolve_variable_dimensions_with_mode(ds, group, dimensions, dim_addr_map, metadata_mode)
174}
175
176fn resolve_variable_dimensions_with_mode(
177 ds: &hdf5_reader::Dataset,
178 group: &Group,
179 dimensions: &[NcDimension],
180 dim_addr_map: &HashMap<u64, NcDimension>,
181 metadata_mode: crate::NcMetadataMode,
182) -> Result<Vec<NcDimension>> {
183 match resolve_variable_dimensions_from_dimlist(ds, group, dim_addr_map) {
184 Ok(dims) => Ok(dims),
185 Err(_err) if metadata_mode == crate::NcMetadataMode::Lossy => {
186 Ok(resolve_variable_dimensions_by_size(ds, dimensions))
187 }
188 Err(err) => Err(err),
189 }
190}
191
192fn resolve_variable_dimensions_from_dimlist(
194 ds: &hdf5_reader::Dataset,
195 group: &Group,
196 dim_addr_map: &HashMap<u64, NcDimension>,
197) -> Result<Vec<NcDimension>> {
198 let dim_addrs = resolve_dimension_scale_addresses(ds, group)?;
199 let mut var_dims = Vec::with_capacity(dim_addrs.len());
200 for dim_addr in dim_addrs {
201 if let Some(dim) = dim_addr_map.get(&dim_addr) {
202 var_dims.push(dim.clone());
203 } else {
204 return Err(Error::InvalidData(format!(
205 "dataset '{}' references unknown dimension scale address {dim_addr:#x}",
206 ds.name()
207 )));
208 }
209 }
210
211 if let Some(max_dims) = ds.max_dims() {
213 for (i, md) in max_dims.iter().enumerate() {
214 if *md == u64::MAX && i < var_dims.len() {
215 var_dims[i].is_unlimited = true;
216 }
217 }
218 }
219
220 Ok(var_dims)
221}
222
223pub(crate) fn resolve_dimension_scale_addresses(
224 ds: &hdf5_reader::Dataset,
225 group: &Group,
226) -> Result<Vec<u64>> {
227 let attr = ds.attribute("DIMENSION_LIST").map_err(|_| {
228 Error::InvalidData(format!(
229 "dataset '{}' is missing required DIMENSION_LIST metadata",
230 ds.name()
231 ))
232 })?;
233 let raw_data = &attr.raw_data;
234 let ndim = ds.ndim();
235 let offset_size = group.offset_size();
236
237 if ndim == 0 {
238 return Ok(Vec::new());
239 }
240 if raw_data.is_empty() {
241 return Err(Error::InvalidData(format!(
242 "dataset '{}' has empty DIMENSION_LIST metadata",
243 ds.name()
244 )));
245 }
246
247 let entry_size = 4 + usize::from(offset_size) + 4;
248 if raw_data.len() < ndim * entry_size {
249 return Err(Error::InvalidData(format!(
250 "dataset '{}' has truncated DIMENSION_LIST metadata",
251 ds.name()
252 )));
253 }
254
255 let mut dim_addrs = Vec::with_capacity(ndim);
256 let mut cursor = hdf5_reader::io::Cursor::new(raw_data);
257
258 for _ in 0..ndim {
259 let seq_len = cursor.read_u32_le().map_err(|err| {
260 Error::InvalidData(format!(
261 "dataset '{}' has invalid DIMENSION_LIST entry count: {err}",
262 ds.name()
263 ))
264 })? as usize;
265 let heap_addr = cursor.read_offset(offset_size).map_err(|err| {
266 Error::InvalidData(format!(
267 "dataset '{}' has invalid DIMENSION_LIST heap address: {err}",
268 ds.name()
269 ))
270 })?;
271 let heap_idx = cursor.read_u32_le().map_err(|err| {
272 Error::InvalidData(format!(
273 "dataset '{}' has invalid DIMENSION_LIST heap index: {err}",
274 ds.name()
275 ))
276 })? as u16;
277
278 if seq_len == 0 || hdf5_reader::io::Cursor::is_undefined_offset(heap_addr, offset_size) {
279 return Err(Error::InvalidData(format!(
280 "dataset '{}' has an unresolved DIMENSION_LIST reference",
281 ds.name()
282 )));
283 }
284
285 let collection = hdf5_reader::global_heap::GlobalHeapCollection::parse_at_storage(
286 group.storage(),
287 heap_addr,
288 offset_size,
289 group.length_size(),
290 )
291 .map_err(|err| {
292 Error::InvalidData(format!(
293 "dataset '{}' has unreadable DIMENSION_LIST heap object: {err}",
294 ds.name()
295 ))
296 })?;
297
298 let heap_obj = collection.get_object(heap_idx).ok_or_else(|| {
299 Error::InvalidData(format!(
300 "dataset '{}' references missing DIMENSION_LIST heap object {}",
301 ds.name(),
302 heap_idx
303 ))
304 })?;
305
306 let refs = hdf5_reader::reference::read_object_references(&heap_obj.data, offset_size)
307 .map_err(|err| {
308 Error::InvalidData(format!(
309 "dataset '{}' has invalid DIMENSION_LIST references: {err}",
310 ds.name()
311 ))
312 })?;
313
314 if refs.is_empty() {
315 return Err(Error::InvalidData(format!(
316 "dataset '{}' has empty DIMENSION_LIST references",
317 ds.name()
318 )));
319 }
320
321 dim_addrs.push(refs[0]);
322 }
323
324 Ok(dim_addrs)
325}
326
327fn compute_storage_sizes(shape: &[u64], elem_size: u64, is_unlimited: bool) -> Result<(u64, u64)> {
328 let total_elements =
329 crate::types::checked_shape_elements(shape, "NetCDF-4 variable element count")?;
330 let data_size = crate::types::checked_mul_u64(
331 total_elements,
332 elem_size,
333 "NetCDF-4 variable size in bytes",
334 )?;
335
336 let record_elements = if is_unlimited && shape.len() > 1 {
337 crate::types::checked_shape_elements(&shape[1..], "NetCDF-4 record element count")?
338 } else {
339 1
340 };
341 let record_size =
342 crate::types::checked_mul_u64(record_elements, elem_size, "NetCDF-4 record size in bytes")?;
343
344 Ok((data_size, record_size))
345}
346
347fn resolve_variable_dimensions_by_size(
350 ds: &hdf5_reader::Dataset,
351 dimensions: &[NcDimension],
352) -> Vec<NcDimension> {
353 let shape = ds.shape();
354
355 let mut var_dims = Vec::with_capacity(shape.len());
358 let mut used = vec![false; dimensions.len()];
359
360 for &dim_size in shape {
361 let mut matched = false;
362 for (i, dim) in dimensions.iter().enumerate() {
363 if !used[i] && dim.size == dim_size {
364 var_dims.push(dim.clone());
365 used[i] = true;
366 matched = true;
367 break;
368 }
369 }
370 if !matched {
371 var_dims.push(NcDimension {
373 name: format!("dim_{}", dim_size),
374 size: dim_size,
375 is_unlimited: false,
376 });
377 }
378 }
379
380 if let Some(max_dims) = ds.max_dims() {
382 for (i, md) in max_dims.iter().enumerate() {
383 if *md == u64::MAX && i < var_dims.len() {
384 var_dims[i].is_unlimited = true;
385 }
386 }
387 }
388
389 var_dims
390}
391
392#[cfg(test)]
393mod tests {
394 use super::compute_storage_sizes;
395
396 #[test]
397 fn test_compute_storage_sizes_detects_overflow() {
398 let err = compute_storage_sizes(&[u64::MAX, 2], 8, false).unwrap_err();
399 assert!(matches!(err, crate::Error::InvalidData(_)));
400 }
401
402 #[test]
403 fn test_compute_storage_sizes_record_dims() {
404 let (data_size, record_size) = compute_storage_sizes(&[10, 3, 4], 4, true).unwrap();
405 assert_eq!(data_size, 480);
406 assert_eq!(record_size, 48);
407 }
408}