1use std::collections::HashMap;
8
9use hdf5_reader::group::Group;
10
11use crate::error::{Error, Result};
12use crate::types::{NcDimension, NcVariable};
13
14use super::attributes;
15use super::types::hdf5_to_nc_type;
16
17fn leaf_name(name: &str) -> &str {
18 name.rsplit('/').next().unwrap_or(name)
19}
20
21pub fn extract_variables(
29 group: &Group,
30 dimensions: &[NcDimension],
31 dim_addr_map: &HashMap<u64, NcDimension>,
32 metadata_mode: crate::NcMetadataMode,
33) -> Result<Vec<NcVariable>> {
34 let datasets = group.datasets()?;
35 extract_variables_from_datasets(&datasets, group, dimensions, dim_addr_map, metadata_mode)
36}
37
38pub fn extract_variables_from_datasets(
39 datasets: &[hdf5_reader::Dataset],
40 group: &Group,
41 dimensions: &[NcDimension],
42 dim_addr_map: &HashMap<u64, NcDimension>,
43 metadata_mode: crate::NcMetadataMode,
44) -> Result<Vec<NcVariable>> {
45 let mut variables = Vec::new();
46
47 for ds in datasets {
48 if let Some(variable) =
49 extract_variable(ds, group, dimensions, dim_addr_map, metadata_mode)?
50 {
51 variables.push(variable);
52 }
53 }
54
55 Ok(variables)
56}
57
58pub fn extract_variable(
59 ds: &hdf5_reader::Dataset,
60 group: &Group,
61 dimensions: &[NcDimension],
62 dim_addr_map: &HashMap<u64, NcDimension>,
63 metadata_mode: crate::NcMetadataMode,
64) -> Result<Option<NcVariable>> {
65 let strict = metadata_mode == crate::NcMetadataMode::Strict;
66
67 let is_dim_scale = match ds.attribute("CLASS") {
68 Ok(attr) => match attr.read_string() {
69 Ok(value) => value == "DIMENSION_SCALE",
70 Err(err) if strict => {
71 return Err(Error::InvalidData(format!(
72 "dataset '{}' has unreadable CLASS attribute: {err}",
73 ds.name()
74 )))
75 }
76 Err(_) => false,
77 },
78 Err(_) => false,
79 };
80
81 if is_dim_scale {
82 return Ok(None);
83 }
84
85 let nc_type = match hdf5_to_nc_type(ds.dtype()) {
86 Ok(t) => t,
87 Err(err) if strict => {
88 return Err(Error::InvalidData(format!(
89 "dataset '{}' uses unsupported NetCDF-4 type: {err}",
90 ds.name()
91 )))
92 }
93 Err(_) => return Ok(None),
94 };
95
96 let var_dims = resolve_variable_dimensions(ds, group, dimensions, dim_addr_map, metadata_mode)?;
97 let is_unlimited = var_dims.iter().any(|d| d.is_unlimited);
98 let shape = ds.shape();
99 let (data_size, record_size) =
100 compute_storage_sizes(shape, nc_type.size() as u64, is_unlimited)?;
101 let var_attrs = attributes::extract_variable_attributes(ds, metadata_mode)?;
102
103 Ok(Some(NcVariable {
104 name: leaf_name(ds.name()).to_string(),
105 dimensions: var_dims,
106 dtype: nc_type,
107 attributes: var_attrs,
108 data_offset: ds.address(),
109 _data_size: data_size,
110 is_record_var: is_unlimited,
111 record_size,
112 }))
113}
114
115fn resolve_variable_dimensions(
122 ds: &hdf5_reader::Dataset,
123 group: &Group,
124 dimensions: &[NcDimension],
125 dim_addr_map: &HashMap<u64, NcDimension>,
126 metadata_mode: crate::NcMetadataMode,
127) -> Result<Vec<NcDimension>> {
128 resolve_variable_dimensions_with_mode(ds, group, dimensions, dim_addr_map, metadata_mode)
129}
130
131fn resolve_variable_dimensions_with_mode(
132 ds: &hdf5_reader::Dataset,
133 group: &Group,
134 dimensions: &[NcDimension],
135 dim_addr_map: &HashMap<u64, NcDimension>,
136 metadata_mode: crate::NcMetadataMode,
137) -> Result<Vec<NcDimension>> {
138 match resolve_variable_dimensions_from_dimlist(ds, group, dim_addr_map) {
139 Ok(dims) => Ok(dims),
140 Err(_err) if metadata_mode == crate::NcMetadataMode::Lossy => {
141 Ok(resolve_variable_dimensions_by_size(ds, dimensions))
142 }
143 Err(err) => Err(err),
144 }
145}
146
147fn resolve_variable_dimensions_from_dimlist(
149 ds: &hdf5_reader::Dataset,
150 group: &Group,
151 dim_addr_map: &HashMap<u64, NcDimension>,
152) -> Result<Vec<NcDimension>> {
153 let dim_addrs = resolve_dimension_scale_addresses(ds, group)?;
154 let mut var_dims = Vec::with_capacity(dim_addrs.len());
155 for dim_addr in dim_addrs {
156 if let Some(dim) = dim_addr_map.get(&dim_addr) {
157 var_dims.push(dim.clone());
158 } else {
159 return Err(Error::InvalidData(format!(
160 "dataset '{}' references unknown dimension scale address {dim_addr:#x}",
161 ds.name()
162 )));
163 }
164 }
165
166 if let Some(max_dims) = ds.max_dims() {
168 for (i, md) in max_dims.iter().enumerate() {
169 if *md == u64::MAX && i < var_dims.len() {
170 var_dims[i].is_unlimited = true;
171 }
172 }
173 }
174
175 Ok(var_dims)
176}
177
178pub(crate) fn resolve_dimension_scale_addresses(
179 ds: &hdf5_reader::Dataset,
180 group: &Group,
181) -> Result<Vec<u64>> {
182 let attr = ds.attribute("DIMENSION_LIST").map_err(|_| {
183 Error::InvalidData(format!(
184 "dataset '{}' is missing required DIMENSION_LIST metadata",
185 ds.name()
186 ))
187 })?;
188 let raw_data = &attr.raw_data;
189 let ndim = ds.ndim();
190 let offset_size = group.offset_size();
191
192 if ndim == 0 {
193 return Ok(Vec::new());
194 }
195 if raw_data.is_empty() {
196 return Err(Error::InvalidData(format!(
197 "dataset '{}' has empty DIMENSION_LIST metadata",
198 ds.name()
199 )));
200 }
201
202 let entry_size = 4 + usize::from(offset_size) + 4;
203 if raw_data.len() < ndim * entry_size {
204 return Err(Error::InvalidData(format!(
205 "dataset '{}' has truncated DIMENSION_LIST metadata",
206 ds.name()
207 )));
208 }
209
210 let mut dim_addrs = Vec::with_capacity(ndim);
211 let mut cursor = hdf5_reader::io::Cursor::new(raw_data);
212
213 for _ in 0..ndim {
214 let seq_len = cursor.read_u32_le().map_err(|err| {
215 Error::InvalidData(format!(
216 "dataset '{}' has invalid DIMENSION_LIST entry count: {err}",
217 ds.name()
218 ))
219 })? as usize;
220 let heap_addr = cursor.read_offset(offset_size).map_err(|err| {
221 Error::InvalidData(format!(
222 "dataset '{}' has invalid DIMENSION_LIST heap address: {err}",
223 ds.name()
224 ))
225 })?;
226 let heap_idx = cursor.read_u32_le().map_err(|err| {
227 Error::InvalidData(format!(
228 "dataset '{}' has invalid DIMENSION_LIST heap index: {err}",
229 ds.name()
230 ))
231 })? as u16;
232
233 if seq_len == 0 || hdf5_reader::io::Cursor::is_undefined_offset(heap_addr, offset_size) {
234 return Err(Error::InvalidData(format!(
235 "dataset '{}' has an unresolved DIMENSION_LIST reference",
236 ds.name()
237 )));
238 }
239
240 let collection = hdf5_reader::global_heap::GlobalHeapCollection::parse_at_storage(
241 group.storage(),
242 heap_addr,
243 offset_size,
244 group.length_size(),
245 )
246 .map_err(|err| {
247 Error::InvalidData(format!(
248 "dataset '{}' has unreadable DIMENSION_LIST heap object: {err}",
249 ds.name()
250 ))
251 })?;
252
253 let heap_obj = collection.get_object(heap_idx).ok_or_else(|| {
254 Error::InvalidData(format!(
255 "dataset '{}' references missing DIMENSION_LIST heap object {}",
256 ds.name(),
257 heap_idx
258 ))
259 })?;
260
261 let refs = hdf5_reader::reference::read_object_references(&heap_obj.data, offset_size)
262 .map_err(|err| {
263 Error::InvalidData(format!(
264 "dataset '{}' has invalid DIMENSION_LIST references: {err}",
265 ds.name()
266 ))
267 })?;
268
269 if refs.is_empty() {
270 return Err(Error::InvalidData(format!(
271 "dataset '{}' has empty DIMENSION_LIST references",
272 ds.name()
273 )));
274 }
275
276 dim_addrs.push(refs[0]);
277 }
278
279 Ok(dim_addrs)
280}
281
282fn compute_storage_sizes(shape: &[u64], elem_size: u64, is_unlimited: bool) -> Result<(u64, u64)> {
283 let total_elements =
284 crate::types::checked_shape_elements(shape, "NetCDF-4 variable element count")?;
285 let data_size = crate::types::checked_mul_u64(
286 total_elements,
287 elem_size,
288 "NetCDF-4 variable size in bytes",
289 )?;
290
291 let record_elements = if is_unlimited && shape.len() > 1 {
292 crate::types::checked_shape_elements(&shape[1..], "NetCDF-4 record element count")?
293 } else {
294 1
295 };
296 let record_size =
297 crate::types::checked_mul_u64(record_elements, elem_size, "NetCDF-4 record size in bytes")?;
298
299 Ok((data_size, record_size))
300}
301
302fn resolve_variable_dimensions_by_size(
305 ds: &hdf5_reader::Dataset,
306 dimensions: &[NcDimension],
307) -> Vec<NcDimension> {
308 let shape = ds.shape();
309
310 let mut var_dims = Vec::with_capacity(shape.len());
313 let mut used = vec![false; dimensions.len()];
314
315 for &dim_size in shape {
316 let mut matched = false;
317 for (i, dim) in dimensions.iter().enumerate() {
318 if !used[i] && dim.size == dim_size {
319 var_dims.push(dim.clone());
320 used[i] = true;
321 matched = true;
322 break;
323 }
324 }
325 if !matched {
326 var_dims.push(NcDimension {
328 name: format!("dim_{}", dim_size),
329 size: dim_size,
330 is_unlimited: false,
331 });
332 }
333 }
334
335 if let Some(max_dims) = ds.max_dims() {
337 for (i, md) in max_dims.iter().enumerate() {
338 if *md == u64::MAX && i < var_dims.len() {
339 var_dims[i].is_unlimited = true;
340 }
341 }
342 }
343
344 var_dims
345}
346
347#[cfg(test)]
348mod tests {
349 use super::compute_storage_sizes;
350
351 #[test]
352 fn test_compute_storage_sizes_detects_overflow() {
353 let err = compute_storage_sizes(&[u64::MAX, 2], 8, false).unwrap_err();
354 assert!(matches!(err, crate::Error::InvalidData(_)));
355 }
356
357 #[test]
358 fn test_compute_storage_sizes_record_dims() {
359 let (data_size, record_size) = compute_storage_sizes(&[10, 3, 4], 4, true).unwrap();
360 assert_eq!(data_size, 480);
361 assert_eq!(record_size, 48);
362 }
363}