pub mod attributes;
pub mod dimensions;
pub mod groups;
pub mod types;
pub mod variables;
use std::path::Path;
use std::sync::OnceLock;
use hdf5_reader::datatype_api::H5Type;
use hdf5_reader::storage::DynStorage;
use hdf5_reader::Hdf5File;
use ndarray::ArrayD;
#[cfg(feature = "rayon")]
use rayon::ThreadPool;
use crate::error::{Error, Result};
use crate::types::{NcGroup, NcType};
macro_rules! dispatch_read_as_f64 {
($dtype:expr, |$T:ident| $read_expr:expr) => {{
use crate::types::NcType;
match $dtype {
NcType::Byte => {
type $T = i8;
let arr = $read_expr?;
Ok(arr.mapv(|v| v as f64))
}
NcType::Short => {
type $T = i16;
let arr = $read_expr?;
Ok(arr.mapv(|v| v as f64))
}
NcType::Int => {
type $T = i32;
let arr = $read_expr?;
Ok(arr.mapv(|v| v as f64))
}
NcType::Float => {
type $T = f32;
let arr = $read_expr?;
Ok(arr.mapv(|v| v as f64))
}
NcType::Double => {
type $T = f64;
Ok($read_expr?)
}
NcType::UByte => {
type $T = u8;
let arr = $read_expr?;
Ok(arr.mapv(|v| v as f64))
}
NcType::UShort => {
type $T = u16;
let arr = $read_expr?;
Ok(arr.mapv(|v| v as f64))
}
NcType::UInt => {
type $T = u32;
let arr = $read_expr?;
Ok(arr.mapv(|v| v as f64))
}
NcType::Int64 => {
type $T = i64;
let arr = $read_expr?;
Ok(arr.mapv(|v| v as f64))
}
NcType::UInt64 => {
type $T = u64;
let arr = $read_expr?;
Ok(arr.mapv(|v| v as f64))
}
NcType::Char => Err(Error::TypeMismatch {
expected: "numeric type".to_string(),
actual: "Char".to_string(),
}),
NcType::String => Err(Error::TypeMismatch {
expected: "numeric type".to_string(),
actual: "String".to_string(),
}),
other => Err(Error::TypeMismatch {
expected: "numeric type".to_string(),
actual: format!("{:?}", other),
}),
}
}};
}
pub struct Nc4File {
hdf5: Hdf5File,
metadata_mode: crate::NcMetadataMode,
root_metadata: OnceLock<NcGroup>,
metadata_tree: OnceLock<NcGroup>,
}
impl Nc4File {
pub(crate) fn from_hdf5(hdf5: Hdf5File, metadata_mode: crate::NcMetadataMode) -> Result<Self> {
Ok(Nc4File {
hdf5,
metadata_mode,
root_metadata: OnceLock::new(),
metadata_tree: OnceLock::new(),
})
}
pub fn open(path: &Path) -> Result<Self> {
Self::open_with_options(path, crate::NcOpenOptions::default())
}
pub fn open_with_options(path: &Path, options: crate::NcOpenOptions) -> Result<Self> {
let hdf5 = Hdf5File::open_with_options(
path,
hdf5_reader::OpenOptions {
chunk_cache_bytes: options.chunk_cache_bytes,
chunk_cache_slots: options.chunk_cache_slots,
filter_registry: options.filter_registry,
..Default::default()
},
)?;
Nc4File::from_hdf5(hdf5, options.metadata_mode)
}
pub fn from_bytes(data: &[u8]) -> Result<Self> {
Self::from_bytes_with_options(data, crate::NcOpenOptions::default())
}
pub fn from_bytes_with_options(data: &[u8], options: crate::NcOpenOptions) -> Result<Self> {
let hdf5 = Hdf5File::from_bytes_with_options(
data,
hdf5_reader::OpenOptions {
chunk_cache_bytes: options.chunk_cache_bytes,
chunk_cache_slots: options.chunk_cache_slots,
filter_registry: options.filter_registry,
..Default::default()
},
)?;
Nc4File::from_hdf5(hdf5, options.metadata_mode)
}
pub fn from_storage(storage: DynStorage) -> Result<Self> {
Self::from_storage_with_options(storage, crate::NcOpenOptions::default())
}
pub fn from_storage_with_options(
storage: DynStorage,
options: crate::NcOpenOptions,
) -> Result<Self> {
let hdf5 = Hdf5File::from_storage_with_options(
storage,
hdf5_reader::OpenOptions {
chunk_cache_bytes: options.chunk_cache_bytes,
chunk_cache_slots: options.chunk_cache_slots,
filter_registry: options.filter_registry,
..Default::default()
},
)?;
Nc4File::from_hdf5(hdf5, options.metadata_mode)
}
pub fn root_group(&self) -> Result<&NcGroup> {
if let Some(group) = self.metadata_tree.get() {
return Ok(group);
}
let metadata_tree = groups::build_root_group(&self.hdf5, self.metadata_mode)?;
let _ = self.metadata_tree.set(metadata_tree);
Ok(self
.metadata_tree
.get()
.expect("metadata tree must be initialized after successful build"))
}
pub fn is_classic_model(&self) -> bool {
self.hdf5
.root_group()
.ok()
.and_then(|g| g.attribute("_nc3_strict").ok())
.is_some()
}
pub fn dimensions(&self) -> Result<&[crate::types::NcDimension]> {
Ok(&self.root_metadata()?.dimensions)
}
pub fn variables(&self) -> Result<&[crate::types::NcVariable]> {
Ok(&self.root_metadata()?.variables)
}
pub fn global_attributes(&self) -> Result<&[crate::types::NcAttribute]> {
Ok(&self.root_metadata()?.attributes)
}
pub fn group(&self, path: &str) -> Result<&NcGroup> {
let normalized = normalize_group_path(path)?;
let root = self.root_group()?;
if normalized.is_empty() {
return Ok(root);
}
root.group(normalized)
.ok_or_else(|| Error::GroupNotFound(path.to_string()))
}
pub fn variable(&self, path: &str) -> Result<&crate::types::NcVariable> {
self.root_group()?
.variable(path)
.ok_or_else(|| Error::VariableNotFound(path.to_string()))
}
pub fn dimension(&self, path: &str) -> Result<&crate::types::NcDimension> {
self.root_group()?
.dimension(path)
.ok_or_else(|| Error::DimensionNotFound(path.to_string()))
}
pub fn global_attribute(&self, path: &str) -> Result<&crate::types::NcAttribute> {
self.root_group()?
.attribute(path)
.ok_or_else(|| Error::AttributeNotFound(path.to_string()))
}
pub fn read_variable<T: H5Type>(&self, path: &str) -> Result<ArrayD<T>> {
let normalized = normalize_dataset_path(path)?;
let dataset = self.hdf5.dataset(normalized)?;
Ok(dataset.read_array::<T>()?)
}
pub fn read_variable_as_string(&self, path: &str) -> Result<String> {
let mut strings = self.read_variable_as_strings(path)?;
match strings.len() {
1 => Ok(strings.swap_remove(0)),
0 => Err(Error::InvalidData(format!(
"variable '{}' contains no string elements",
path
))),
count => Err(Error::InvalidData(format!(
"variable '{}' contains {count} string elements; use read_variable_as_strings()",
path
))),
}
}
pub fn read_variable_as_strings(&self, path: &str) -> Result<Vec<String>> {
let normalized = normalize_dataset_path(path)?;
let dataset = self.hdf5.dataset(normalized)?;
let dtype = dataset_nc_type(&dataset)?;
if dtype != NcType::String {
return Err(Error::TypeMismatch {
expected: "String".to_string(),
actual: format!("{dtype:?}"),
});
}
Ok(dataset.read_strings()?)
}
#[cfg(feature = "rayon")]
pub fn read_variable_parallel<T: H5Type>(&self, path: &str) -> Result<ArrayD<T>> {
let normalized = normalize_dataset_path(path)?;
let dataset = self.hdf5.dataset(normalized)?;
Ok(dataset.read_array_parallel::<T>()?)
}
#[cfg(feature = "rayon")]
pub fn read_variable_in_pool<T: H5Type>(
&self,
path: &str,
pool: &ThreadPool,
) -> Result<ArrayD<T>> {
let normalized = normalize_dataset_path(path)?;
let dataset = self.hdf5.dataset(normalized)?;
Ok(dataset.read_array_in_pool::<T>(pool)?)
}
}
impl Nc4File {
fn root_metadata(&self) -> Result<&NcGroup> {
if let Some(group) = self.root_metadata.get() {
return Ok(group);
}
let root_metadata = groups::build_root_group_metadata(&self.hdf5, self.metadata_mode)?;
let _ = self.root_metadata.set(root_metadata);
Ok(self
.root_metadata
.get()
.expect("root metadata must be initialized after successful build"))
}
}
impl Nc4File {
pub fn read_variable_as_f64(&self, path: &str) -> Result<ArrayD<f64>> {
let normalized = normalize_dataset_path(path)?;
let dataset = self.hdf5.dataset(normalized)?;
let dtype = dataset_nc_type(&dataset)?;
dispatch_read_as_f64!(&dtype, |T| dataset.read_array::<T>())
}
pub fn read_variable_slice_as_f64(
&self,
path: &str,
selection: &crate::types::NcSliceInfo,
) -> Result<ArrayD<f64>> {
let normalized = normalize_dataset_path(path)?;
let dataset = self.hdf5.dataset(normalized)?;
let hdf5_sel = selection.to_hdf5_slice_info();
let dtype = dataset_nc_type(&dataset)?;
dispatch_read_as_f64!(&dtype, |T| dataset.read_slice::<T>(&hdf5_sel))
}
pub fn read_variable_slice<T: H5Type>(
&self,
path: &str,
selection: &crate::types::NcSliceInfo,
) -> Result<ArrayD<T>> {
let normalized = normalize_dataset_path(path)?;
let dataset = self.hdf5.dataset(normalized)?;
let hdf5_sel = selection.to_hdf5_slice_info();
Ok(dataset.read_slice::<T>(&hdf5_sel)?)
}
#[cfg(feature = "rayon")]
pub fn read_variable_slice_parallel<T: H5Type>(
&self,
path: &str,
selection: &crate::types::NcSliceInfo,
) -> Result<ArrayD<T>> {
let normalized = normalize_dataset_path(path)?;
let dataset = self.hdf5.dataset(normalized)?;
let hdf5_sel = selection.to_hdf5_slice_info();
Ok(dataset.read_slice_parallel::<T>(&hdf5_sel)?)
}
}
fn normalize_dataset_path(path: &str) -> Result<&str> {
let trimmed = path.trim_matches('/');
if trimmed.is_empty() {
return Err(Error::VariableNotFound(path.to_string()));
}
Ok(trimmed)
}
fn normalize_group_path(path: &str) -> Result<&str> {
Ok(path.trim_matches('/'))
}
fn dataset_nc_type(dataset: &hdf5_reader::Dataset) -> Result<NcType> {
self::types::hdf5_to_nc_type(dataset.dtype()).map_err(|err| {
Error::InvalidData(format!(
"dataset '{}' cannot be mapped to a NetCDF-4 type: {err}",
dataset.name()
))
})
}