1use std::collections::HashMap;
10
11use hdf5_reader::group::Group;
12
13use crate::error::{Error, Result};
14use crate::types::{NcDimension, NcVariable};
15
16use super::attributes;
17use super::types::hdf5_to_nc_type;
18
19fn leaf_name(name: &str) -> &str {
20 name.rsplit('/').next().unwrap_or(name)
21}
22
23pub fn extract_variables(
31 group: &Group,
32 dimensions: &[NcDimension],
33 dim_addr_map: &HashMap<u64, NcDimension>,
34 metadata_mode: crate::NcMetadataMode,
35) -> Result<Vec<NcVariable>> {
36 let datasets = group.datasets()?;
37 extract_variables_from_datasets(&datasets, group, dimensions, dim_addr_map, metadata_mode)
38}
39
40pub fn extract_variables_from_datasets(
41 datasets: &[hdf5_reader::Dataset],
42 group: &Group,
43 dimensions: &[NcDimension],
44 dim_addr_map: &HashMap<u64, NcDimension>,
45 metadata_mode: crate::NcMetadataMode,
46) -> Result<Vec<NcVariable>> {
47 let mut variables = Vec::new();
48
49 for ds in datasets {
50 if let Some(variable) =
51 extract_variable(ds, group, dimensions, dim_addr_map, metadata_mode)?
52 {
53 variables.push(variable);
54 }
55 }
56
57 Ok(variables)
58}
59
60pub fn extract_variable(
61 ds: &hdf5_reader::Dataset,
62 group: &Group,
63 dimensions: &[NcDimension],
64 dim_addr_map: &HashMap<u64, NcDimension>,
65 metadata_mode: crate::NcMetadataMode,
66) -> Result<Option<NcVariable>> {
67 let strict = metadata_mode == crate::NcMetadataMode::Strict;
68
69 let is_dim_scale = match ds.attribute("CLASS") {
70 Ok(attr) => match attr.read_string() {
71 Ok(value) => value == "DIMENSION_SCALE",
72 Err(err) if strict => {
73 return Err(Error::InvalidData(format!(
74 "dataset '{}' has unreadable CLASS attribute: {err}",
75 ds.name()
76 )))
77 }
78 Err(_) => false,
79 },
80 Err(_) => false,
81 };
82
83 if is_dim_scale && is_dimension_only_scale(ds, strict)? {
84 return Ok(None);
85 }
86
87 let nc_type = match hdf5_to_nc_type(ds.dtype()) {
88 Ok(t) => t,
89 Err(err) if strict => {
90 return Err(Error::InvalidData(format!(
91 "dataset '{}' uses unsupported NetCDF-4 type: {err}",
92 ds.name()
93 )))
94 }
95 Err(_) => return Ok(None),
96 };
97
98 let var_dims = if is_dim_scale {
99 resolve_coordinate_variable_dimensions(ds, dim_addr_map, metadata_mode)?
100 } else {
101 resolve_variable_dimensions(ds, group, dimensions, dim_addr_map, metadata_mode)?
102 };
103 let is_unlimited = var_dims.iter().any(|d| d.is_unlimited);
104 let shape = ds.shape();
105 let elem_size = u64::try_from(nc_type.size()?)
106 .map_err(|_| Error::InvalidData("NetCDF-4 type size exceeds u64 capacity".to_string()))?;
107 let (data_size, record_size) = compute_storage_sizes(shape, elem_size, is_unlimited)?;
108 let var_attrs = attributes::extract_variable_attributes(ds, metadata_mode)?;
109
110 Ok(Some(NcVariable {
111 name: leaf_name(ds.name()).to_string(),
112 dimensions: var_dims,
113 dtype: nc_type,
114 attributes: var_attrs,
115 data_offset: ds.address(),
116 _data_size: data_size,
117 is_record_var: is_unlimited,
118 record_size,
119 }))
120}
121
122fn is_dimension_only_scale(ds: &hdf5_reader::Dataset, strict: bool) -> Result<bool> {
123 match ds.attribute("NAME") {
124 Ok(attr) => match attr.read_string() {
125 Ok(value) => Ok(super::dimensions::is_dimension_without_variable_name(
126 &value,
127 )),
128 Err(err) if strict => Err(Error::InvalidData(format!(
129 "dimension scale '{}' has unreadable NAME attribute: {err}",
130 ds.name()
131 ))),
132 Err(_) => Ok(false),
133 },
134 Err(_) => Ok(false),
135 }
136}
137
138fn resolve_coordinate_variable_dimensions(
139 ds: &hdf5_reader::Dataset,
140 dim_addr_map: &HashMap<u64, NcDimension>,
141 metadata_mode: crate::NcMetadataMode,
142) -> Result<Vec<NcDimension>> {
143 if let Some(dim) = dim_addr_map.get(&ds.address()) {
144 Ok(vec![dim.clone()])
145 } else if metadata_mode == crate::NcMetadataMode::Lossy {
146 Ok(vec![NcDimension {
147 name: leaf_name(ds.name()).to_string(),
148 size: ds.shape().first().copied().unwrap_or(0),
149 is_unlimited: ds
150 .max_dims()
151 .is_some_and(|md| !md.is_empty() && md[0] == u64::MAX),
152 }])
153 } else {
154 Err(Error::InvalidData(format!(
155 "coordinate variable '{}' is not registered as a dimension scale",
156 ds.name()
157 )))
158 }
159}
160
161fn resolve_variable_dimensions(
168 ds: &hdf5_reader::Dataset,
169 group: &Group,
170 dimensions: &[NcDimension],
171 dim_addr_map: &HashMap<u64, NcDimension>,
172 metadata_mode: crate::NcMetadataMode,
173) -> Result<Vec<NcDimension>> {
174 resolve_variable_dimensions_with_mode(ds, group, dimensions, dim_addr_map, metadata_mode)
175}
176
177fn resolve_variable_dimensions_with_mode(
178 ds: &hdf5_reader::Dataset,
179 group: &Group,
180 dimensions: &[NcDimension],
181 dim_addr_map: &HashMap<u64, NcDimension>,
182 metadata_mode: crate::NcMetadataMode,
183) -> Result<Vec<NcDimension>> {
184 match resolve_variable_dimensions_from_dimlist(ds, group, dim_addr_map) {
185 Ok(dims) => Ok(dims),
186 Err(_err) if metadata_mode == crate::NcMetadataMode::Lossy => {
187 Ok(resolve_variable_dimensions_by_size(ds, dimensions))
188 }
189 Err(err) => Err(err),
190 }
191}
192
193fn resolve_variable_dimensions_from_dimlist(
195 ds: &hdf5_reader::Dataset,
196 group: &Group,
197 dim_addr_map: &HashMap<u64, NcDimension>,
198) -> Result<Vec<NcDimension>> {
199 let dim_addrs = resolve_dimension_scale_addresses(ds, group)?;
200 let mut var_dims = Vec::with_capacity(dim_addrs.len());
201 for dim_addr in dim_addrs {
202 if let Some(dim) = dim_addr_map.get(&dim_addr) {
203 var_dims.push(dim.clone());
204 } else {
205 return Err(Error::InvalidData(format!(
206 "dataset '{}' references unknown dimension scale address {dim_addr:#x}",
207 ds.name()
208 )));
209 }
210 }
211
212 if let Some(max_dims) = ds.max_dims() {
214 for (i, md) in max_dims.iter().enumerate() {
215 if *md == u64::MAX && i < var_dims.len() {
216 var_dims[i].is_unlimited = true;
217 }
218 }
219 }
220
221 Ok(var_dims)
222}
223
224pub(crate) fn resolve_dimension_scale_addresses(
225 ds: &hdf5_reader::Dataset,
226 group: &Group,
227) -> Result<Vec<u64>> {
228 let attr = ds.attribute("DIMENSION_LIST").map_err(|_| {
229 Error::InvalidData(format!(
230 "dataset '{}' is missing required DIMENSION_LIST metadata",
231 ds.name()
232 ))
233 })?;
234 let raw_data = &attr.raw_data;
235 let ndim = ds.ndim();
236 let offset_size = group.offset_size();
237
238 if ndim == 0 {
239 return Ok(Vec::new());
240 }
241 if raw_data.is_empty() {
242 return Err(Error::InvalidData(format!(
243 "dataset '{}' has empty DIMENSION_LIST metadata",
244 ds.name()
245 )));
246 }
247
248 let entry_size = 4 + usize::from(offset_size) + 4;
249 if raw_data.len() < ndim * entry_size {
250 return Err(Error::InvalidData(format!(
251 "dataset '{}' has truncated DIMENSION_LIST metadata",
252 ds.name()
253 )));
254 }
255
256 let mut dim_addrs = Vec::with_capacity(ndim);
257 let mut cursor = hdf5_reader::io::Cursor::new(raw_data);
258
259 for _ in 0..ndim {
260 let seq_len = cursor.read_u32_le().map_err(|err| {
261 Error::InvalidData(format!(
262 "dataset '{}' has invalid DIMENSION_LIST entry count: {err}",
263 ds.name()
264 ))
265 })? as usize;
266 let heap_addr = cursor.read_offset(offset_size).map_err(|err| {
267 Error::InvalidData(format!(
268 "dataset '{}' has invalid DIMENSION_LIST heap address: {err}",
269 ds.name()
270 ))
271 })?;
272 let heap_idx = cursor.read_u32_le().map_err(|err| {
273 Error::InvalidData(format!(
274 "dataset '{}' has invalid DIMENSION_LIST heap index: {err}",
275 ds.name()
276 ))
277 })? as u16;
278
279 if seq_len == 0 || hdf5_reader::io::Cursor::is_undefined_offset(heap_addr, offset_size) {
280 return Err(Error::InvalidData(format!(
281 "dataset '{}' has an unresolved DIMENSION_LIST reference",
282 ds.name()
283 )));
284 }
285
286 let collection = hdf5_reader::global_heap::GlobalHeapCollection::parse_at_storage(
287 group.storage(),
288 heap_addr,
289 offset_size,
290 group.length_size(),
291 )
292 .map_err(|err| {
293 Error::InvalidData(format!(
294 "dataset '{}' has unreadable DIMENSION_LIST heap object: {err}",
295 ds.name()
296 ))
297 })?;
298
299 let heap_obj = collection.get_object(heap_idx).ok_or_else(|| {
300 Error::InvalidData(format!(
301 "dataset '{}' references missing DIMENSION_LIST heap object {}",
302 ds.name(),
303 heap_idx
304 ))
305 })?;
306
307 let refs = hdf5_reader::reference::read_object_references(&heap_obj.data, offset_size)
308 .map_err(|err| {
309 Error::InvalidData(format!(
310 "dataset '{}' has invalid DIMENSION_LIST references: {err}",
311 ds.name()
312 ))
313 })?;
314
315 if refs.is_empty() {
316 return Err(Error::InvalidData(format!(
317 "dataset '{}' has empty DIMENSION_LIST references",
318 ds.name()
319 )));
320 }
321
322 dim_addrs.push(refs[0]);
323 }
324
325 Ok(dim_addrs)
326}
327
328fn compute_storage_sizes(shape: &[u64], elem_size: u64, is_unlimited: bool) -> Result<(u64, u64)> {
329 let total_elements =
330 crate::types::checked_shape_elements(shape, "NetCDF-4 variable element count")?;
331 let data_size = crate::types::checked_mul_u64(
332 total_elements,
333 elem_size,
334 "NetCDF-4 variable size in bytes",
335 )?;
336
337 let record_elements = if is_unlimited && shape.len() > 1 {
338 crate::types::checked_shape_elements(&shape[1..], "NetCDF-4 record element count")?
339 } else {
340 1
341 };
342 let record_size =
343 crate::types::checked_mul_u64(record_elements, elem_size, "NetCDF-4 record size in bytes")?;
344
345 Ok((data_size, record_size))
346}
347
348fn resolve_variable_dimensions_by_size(
351 ds: &hdf5_reader::Dataset,
352 dimensions: &[NcDimension],
353) -> Vec<NcDimension> {
354 let shape = ds.shape();
355
356 let mut var_dims = Vec::with_capacity(shape.len());
359 let mut used = vec![false; dimensions.len()];
360
361 for &dim_size in shape {
362 let mut matched = false;
363 for (i, dim) in dimensions.iter().enumerate() {
364 if !used[i] && dim.size == dim_size {
365 var_dims.push(dim.clone());
366 used[i] = true;
367 matched = true;
368 break;
369 }
370 }
371 if !matched {
372 var_dims.push(NcDimension {
374 name: format!("dim_{}", dim_size),
375 size: dim_size,
376 is_unlimited: false,
377 });
378 }
379 }
380
381 if let Some(max_dims) = ds.max_dims() {
383 for (i, md) in max_dims.iter().enumerate() {
384 if *md == u64::MAX && i < var_dims.len() {
385 var_dims[i].is_unlimited = true;
386 }
387 }
388 }
389
390 var_dims
391}
392
393#[cfg(test)]
394mod tests {
395 use super::compute_storage_sizes;
396
397 #[test]
398 fn compute_storage_sizes_detects_overflow() {
399 let err = compute_storage_sizes(&[u64::MAX, 2], 8, false).unwrap_err();
400 assert!(matches!(err, crate::Error::InvalidData(_)));
401 }
402
403 #[test]
404 fn compute_storage_sizes_record_dims() {
405 let (data_size, record_size) = compute_storage_sizes(&[10, 3, 4], 4, true).unwrap();
406 assert_eq!(data_size, 480);
407 assert_eq!(record_size, 48);
408 }
409}