use std::collections::HashMap;
use hdf5_reader::group::Group;
use crate::error::Result;
use crate::types::{NcDimension, NcVariable};
use super::attributes;
use super::types::hdf5_to_nc_type;
fn leaf_name(name: &str) -> &str {
name.rsplit('/').next().unwrap_or(name)
}
pub fn extract_variables(
group: &Group<'_>,
dimensions: &[NcDimension],
dim_addr_map: &HashMap<u64, NcDimension>,
) -> Result<Vec<NcVariable>> {
let datasets = match group.datasets() {
Ok(ds) => ds,
Err(_) => return Ok(Vec::new()),
};
extract_variables_from_datasets(&datasets, group, dimensions, dim_addr_map)
}
pub fn extract_variables_from_datasets(
datasets: &[hdf5_reader::Dataset<'_>],
group: &Group<'_>,
dimensions: &[NcDimension],
dim_addr_map: &HashMap<u64, NcDimension>,
) -> Result<Vec<NcVariable>> {
let mut variables = Vec::new();
for ds in datasets {
let is_dim_scale = ds
.attribute("CLASS")
.ok()
.and_then(|attr| attr.read_string().ok())
.map(|s| s == "DIMENSION_SCALE")
.unwrap_or(false);
if is_dim_scale {
continue;
}
let nc_type = match hdf5_to_nc_type(ds.dtype()) {
Ok(t) => t,
Err(_) => continue, };
let var_dims = resolve_variable_dimensions_from_dimlist(ds, group, dim_addr_map)
.unwrap_or_else(|| resolve_variable_dimensions_by_size(ds, dimensions));
let is_unlimited = var_dims.iter().any(|d| d.is_unlimited);
let shape = ds.shape();
let (data_size, record_size) =
compute_storage_sizes(shape, nc_type.size() as u64, is_unlimited)?;
let var_attrs = attributes::extract_variable_attributes(ds)?;
variables.push(NcVariable {
name: leaf_name(ds.name()).to_string(),
dimensions: var_dims,
dtype: nc_type,
attributes: var_attrs,
data_offset: ds.address(),
_data_size: data_size,
is_record_var: is_unlimited,
record_size,
});
}
Ok(variables)
}
fn resolve_variable_dimensions_from_dimlist(
ds: &hdf5_reader::Dataset<'_>,
group: &Group<'_>,
dim_addr_map: &HashMap<u64, NcDimension>,
) -> Option<Vec<NcDimension>> {
let attr = ds.attribute("DIMENSION_LIST").ok()?;
let raw_data = &attr.raw_data;
let ndim = ds.ndim();
let offset_size = group.offset_size();
let file_data = group.file_data();
if raw_data.is_empty() || ndim == 0 {
return None;
}
let entry_size = 4 + offset_size as usize + 4;
if raw_data.len() < ndim * entry_size {
return None;
}
let mut var_dims = Vec::with_capacity(ndim);
let mut cursor = hdf5_reader::io::Cursor::new(raw_data);
for _ in 0..ndim {
let seq_len = cursor.read_u32_le().ok()? as usize;
let heap_addr = cursor.read_offset(offset_size).ok()?;
let heap_idx = cursor.read_u32_le().ok()? as u16;
if seq_len == 0 || hdf5_reader::io::Cursor::is_undefined_offset(heap_addr, offset_size) {
return None;
}
let mut heap_cursor = hdf5_reader::io::Cursor::new(file_data);
heap_cursor.set_position(heap_addr);
let collection = hdf5_reader::global_heap::GlobalHeapCollection::parse(
&mut heap_cursor,
offset_size,
group.length_size(),
)
.ok()?;
let heap_obj = collection.get_object(heap_idx)?;
let refs =
hdf5_reader::reference::read_object_references(&heap_obj.data, offset_size).ok()?;
if refs.is_empty() {
return None;
}
let dim_addr = refs[0];
if let Some(dim) = dim_addr_map.get(&dim_addr) {
var_dims.push(dim.clone());
} else {
return None;
}
}
if let Some(max_dims) = ds.max_dims() {
for (i, md) in max_dims.iter().enumerate() {
if *md == u64::MAX && i < var_dims.len() {
var_dims[i].is_unlimited = true;
}
}
}
Some(var_dims)
}
fn compute_storage_sizes(shape: &[u64], elem_size: u64, is_unlimited: bool) -> Result<(u64, u64)> {
let total_elements =
crate::types::checked_shape_elements(shape, "NetCDF-4 variable element count")?;
let data_size = crate::types::checked_mul_u64(
total_elements,
elem_size,
"NetCDF-4 variable size in bytes",
)?;
let record_elements = if is_unlimited && shape.len() > 1 {
crate::types::checked_shape_elements(&shape[1..], "NetCDF-4 record element count")?
} else {
1
};
let record_size =
crate::types::checked_mul_u64(record_elements, elem_size, "NetCDF-4 record size in bytes")?;
Ok((data_size, record_size))
}
fn resolve_variable_dimensions_by_size(
ds: &hdf5_reader::Dataset<'_>,
dimensions: &[NcDimension],
) -> Vec<NcDimension> {
let shape = ds.shape();
let mut var_dims = Vec::with_capacity(shape.len());
let mut used = vec![false; dimensions.len()];
for &dim_size in shape {
let mut matched = false;
for (i, dim) in dimensions.iter().enumerate() {
if !used[i] && dim.size == dim_size {
var_dims.push(dim.clone());
used[i] = true;
matched = true;
break;
}
}
if !matched {
var_dims.push(NcDimension {
name: format!("dim_{}", dim_size),
size: dim_size,
is_unlimited: false,
});
}
}
if let Some(max_dims) = ds.max_dims() {
for (i, md) in max_dims.iter().enumerate() {
if *md == u64::MAX && i < var_dims.len() {
var_dims[i].is_unlimited = true;
}
}
}
var_dims
}
#[cfg(test)]
mod tests {
use super::compute_storage_sizes;
#[test]
fn test_compute_storage_sizes_detects_overflow() {
let err = compute_storage_sizes(&[u64::MAX, 2], 8, false).unwrap_err();
assert!(matches!(err, crate::Error::InvalidData(_)));
}
#[test]
fn test_compute_storage_sizes_record_dims() {
let (data_size, record_size) = compute_storage_sizes(&[10, 3, 4], 4, true).unwrap();
assert_eq!(data_size, 480);
assert_eq!(record_size, 48);
}
}