use std::path::Path;
use std::sync::{Arc, Mutex};
fn fixture_path(name: &str) -> Option<std::path::PathBuf> {
let base = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.join("testdata/hdf5");
let path = base.join(name);
if path.exists() {
Some(path)
} else {
None
}
}
macro_rules! skip_if_missing {
($name:expr) => {
match fixture_path($name) {
Some(p) => p,
None => {
eprintln!("SKIPPED: fixture {} not found", $name);
return;
}
}
};
}
struct CountingStorage {
data: Vec<u8>,
ranges: Mutex<Vec<(u64, usize)>>,
}
impl CountingStorage {
fn new(data: Vec<u8>) -> Self {
Self {
data,
ranges: Mutex::new(Vec::new()),
}
}
fn clear_ranges(&self) {
self.ranges.lock().unwrap().clear();
}
fn ranges(&self) -> Vec<(u64, usize)> {
self.ranges.lock().unwrap().clone()
}
}
impl hdf5_reader::Storage for CountingStorage {
fn len(&self) -> u64 {
self.data.len() as u64
}
fn read_range(
&self,
offset: u64,
len: usize,
) -> hdf5_reader::error::Result<hdf5_reader::StorageBuffer> {
let start = usize::try_from(offset)
.map_err(|_| hdf5_reader::error::Error::OffsetOutOfBounds(offset))?;
let end = start
.checked_add(len)
.ok_or(hdf5_reader::error::Error::OffsetOutOfBounds(offset))?;
if end > self.data.len() {
return Err(hdf5_reader::error::Error::UnexpectedEof {
offset,
needed: len as u64,
available: self.len().saturating_sub(offset),
});
}
self.ranges.lock().unwrap().push((offset, len));
Ok(hdf5_reader::StorageBuffer::from_vec(
self.data[start..end].to_vec(),
))
}
}
#[test]
fn test_simple_contiguous() {
let path = skip_if_missing!("simple_contiguous.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let root = file.root_group().unwrap();
let ds = root.dataset("data").unwrap();
assert_eq!(ds.shape(), &[4, 5]);
let data: ndarray::ArrayD<f64> = ds.read_array().unwrap();
assert_eq!(data.shape(), &[4, 5]);
for i in 0..5 {
assert!((data[[0, i]] - i as f64).abs() < 1e-10);
}
}
#[test]
fn test_simple_contiguous_inner_window_slice() {
let path = skip_if_missing!("simple_contiguous.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/data").unwrap();
let selection = hdf5_reader::SliceInfo {
selections: vec![
hdf5_reader::SliceInfoElem::Slice {
start: 1,
end: 4,
step: 1,
},
hdf5_reader::SliceInfoElem::Slice {
start: 1,
end: 4,
step: 1,
},
],
};
let sliced: ndarray::ArrayD<f64> = ds.read_slice(&selection).unwrap();
assert_eq!(sliced.shape(), &[3, 3]);
assert_eq!(sliced[[0, 0]], 6.0);
assert_eq!(sliced[[0, 2]], 8.0);
assert_eq!(sliced[[2, 2]], 18.0);
}
#[test]
fn test_contiguous_slice_reads_selected_ranges_directly() {
let path = skip_if_missing!("simple_contiguous.h5");
let storage = Arc::new(CountingStorage::new(std::fs::read(path).unwrap()));
let file = hdf5_reader::Hdf5File::from_storage(storage.clone()).unwrap();
let ds = file.dataset("/data").unwrap();
storage.clear_ranges();
let selection = hdf5_reader::SliceInfo {
selections: vec![
hdf5_reader::SliceInfoElem::Slice {
start: 1,
end: 4,
step: 1,
},
hdf5_reader::SliceInfoElem::Slice {
start: 1,
end: 4,
step: 1,
},
],
};
let sliced: ndarray::ArrayD<f64> = ds.read_slice(&selection).unwrap();
assert_eq!(sliced.shape(), &[3, 3]);
assert_eq!(sliced[[0, 0]], 6.0);
assert_eq!(sliced[[2, 2]], 18.0);
assert_eq!(storage.ranges(), vec![(2096, 24), (2136, 24), (2176, 24)]);
}
#[test]
fn test_simple_contiguous_strided_slice() {
let path = skip_if_missing!("simple_contiguous.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/data").unwrap();
let selection = hdf5_reader::SliceInfo {
selections: vec![
hdf5_reader::SliceInfoElem::Slice {
start: 0,
end: 4,
step: 2,
},
hdf5_reader::SliceInfoElem::Slice {
start: 1,
end: 5,
step: 2,
},
],
};
let sliced: ndarray::ArrayD<f64> = ds.read_slice(&selection).unwrap();
assert_eq!(sliced.shape(), &[2, 2]);
assert_eq!(sliced[[0, 0]], 1.0);
assert_eq!(sliced[[0, 1]], 3.0);
assert_eq!(sliced[[1, 0]], 11.0);
assert_eq!(sliced[[1, 1]], 13.0);
}
#[test]
fn test_big_endian_numeric_datasets() {
let path = skip_if_missing!("big_endian.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let f32_data: ndarray::ArrayD<f32> = file.dataset("/float32_be").unwrap().read_array().unwrap();
assert_eq!(f32_data.shape(), &[4, 5]);
assert!((f32_data[[0, 0]] - 0.0).abs() < 1e-6);
assert!((f32_data[[3, 4]] - 19.0).abs() < 1e-6);
let f64_data: ndarray::ArrayD<f64> = file.dataset("/float64_be").unwrap().read_array().unwrap();
assert_eq!(f64_data.shape(), &[3, 4]);
assert!((f64_data[[0, 0]] - 0.0).abs() < 1e-12);
assert!((f64_data[[2, 3]] - 11.0).abs() < 1e-12);
let i32_data: ndarray::ArrayD<i32> = file.dataset("/int32_be").unwrap().read_array().unwrap();
assert_eq!(i32_data.shape(), &[3, 5]);
assert_eq!(i32_data[[0, 0]], 0);
assert_eq!(i32_data[[2, 4]], 14);
}
#[test]
fn test_simple_chunked_deflate() {
let path = skip_if_missing!("simple_chunked_deflate.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/temperature").unwrap();
assert_eq!(ds.shape(), &[10, 20]);
assert_eq!(ds.chunks().unwrap(), vec![5, 10]);
let data: ndarray::ArrayD<f32> = ds.read_array().unwrap();
assert_eq!(data.shape(), &[10, 20]);
assert!((data[[0, 0]] - 0.0).abs() < 1e-6);
assert!((data[[1, 0]] - 20.0).abs() < 1e-6);
}
#[cfg(feature = "rayon")]
#[test]
fn test_simple_chunked_deflate_parallel_matches_serial() {
let path = skip_if_missing!("simple_chunked_deflate.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/temperature").unwrap();
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(4)
.build()
.unwrap();
let serial: ndarray::ArrayD<f32> = ds.read_array().unwrap();
let parallel: ndarray::ArrayD<f32> = ds.read_array_in_pool(&pool).unwrap();
assert_eq!(serial, parallel);
}
#[test]
fn test_nested_groups() {
let path = skip_if_missing!("nested_groups.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let g1 = file.group("/group1").unwrap();
let ds1: ndarray::ArrayD<i32> = g1.dataset("data").unwrap().read_array().unwrap();
assert_eq!(ds1.as_slice().unwrap(), &[1, 2, 3]);
let sg = file.group("/group1/subgroup").unwrap();
let ds2: ndarray::ArrayD<i32> = sg.dataset("data").unwrap().read_array().unwrap();
assert_eq!(ds2.as_slice().unwrap(), &[4, 5, 6]);
let g2 = file.group("/group2").unwrap();
let ds3: ndarray::ArrayD<i32> = g2.dataset("data").unwrap().read_array().unwrap();
assert_eq!(ds3.as_slice().unwrap(), &[7, 8, 9]);
}
#[test]
fn test_string_attributes() {
let path = skip_if_missing!("string_attrs.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/data").unwrap();
let name = ds.attribute("name").unwrap();
assert_eq!(name.read_string().unwrap(), "test_dataset");
let scale = ds.attribute("scale_factor").unwrap();
assert!((scale.read_scalar::<f64>().unwrap() - 1.5).abs() < 1e-10);
let range = ds.attribute("valid_range").unwrap();
assert_eq!(range.read_1d::<i32>().unwrap(), vec![0, 100]);
}
#[test]
fn test_scalar_dataset() {
let path = skip_if_missing!("scalar_dataset.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/value").unwrap();
assert_eq!(ds.shape(), &[]);
let data: ndarray::ArrayD<f64> = ds.read_array().unwrap();
assert_eq!(data.ndim(), 0);
assert!((data[[]] - 42.0).abs() < 1e-10);
}
#[test]
fn test_old_format_v1() {
let path = skip_if_missing!("old_format_v1.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/data").unwrap();
assert_eq!(ds.shape(), &[4, 5]);
let data: ndarray::ArrayD<f64> = ds.read_array().unwrap();
assert_eq!(data.shape(), &[4, 5]);
}
#[test]
fn test_fill_value() {
let path = skip_if_missing!("fill_value.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/sparse").unwrap();
let data: ndarray::ArrayD<i32> = ds.read_array().unwrap();
assert_eq!(data[[0]], 0);
assert_eq!(data[[1]], 999);
assert_eq!(data[[2]], 999);
assert_eq!(data[[3]], 3);
assert_eq!(data[[4]], 999);
assert_eq!(data[[7]], 7);
assert_eq!(data[[9]], 999);
}
#[test]
fn test_external_raw_data_file() {
let path = skip_if_missing!("external_raw.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/data").unwrap();
let data: ndarray::ArrayD<i32> = ds.read_array().unwrap();
assert_eq!(data.as_slice().unwrap(), &(0..12).collect::<Vec<i32>>()[..]);
let selection = hdf5_reader::SliceInfo {
selections: vec![hdf5_reader::SliceInfoElem::Slice {
start: 3,
end: 7,
step: 1,
}],
};
let sliced: ndarray::ArrayD<i32> = ds.read_slice(&selection).unwrap();
assert_eq!(sliced.as_slice().unwrap(), &[3, 4, 5, 6]);
}
#[test]
fn test_external_link_resolver() {
let path = skip_if_missing!("external_links.h5");
let base_dir = path.parent().unwrap();
let file = hdf5_reader::Hdf5File::open_with_options(
&path,
hdf5_reader::OpenOptions {
external_link_resolver: Some(Arc::new(
hdf5_reader::FilesystemExternalLinkResolver::new(base_dir),
)),
..Default::default()
},
)
.unwrap();
let ds = file.dataset("/linked_data").unwrap();
let data: ndarray::ArrayD<i32> = ds.read_array().unwrap();
assert_eq!(data.as_slice().unwrap(), &[4, 5, 6]);
}
#[test]
fn test_fletcher32() {
let path = skip_if_missing!("fletcher32.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/checked").unwrap();
let data: ndarray::ArrayD<f32> = ds.read_array().unwrap();
assert_eq!(data.shape(), &[4, 4]);
assert!((data[[0, 0]] - 1.0).abs() < 1e-6);
assert!((data[[0, 1]] - 0.0).abs() < 1e-6);
assert!((data[[1, 1]] - 1.0).abs() < 1e-6);
}
#[test]
fn test_dense_groups() {
let path = skip_if_missing!("dense_groups.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let root = file.root_group().unwrap();
let datasets = root.datasets().unwrap();
assert!(
datasets.len() >= 20,
"expected at least 20 datasets, got {}",
datasets.len()
);
let ds = root.dataset("ds_000").unwrap();
let data: ndarray::ArrayD<i32> = ds.read_array().unwrap();
assert_eq!(data.as_slice().unwrap(), &[0]);
}
#[test]
fn test_from_vec() {
let path = skip_if_missing!("scalar_dataset.h5");
let bytes = std::fs::read(&path).unwrap();
let file = hdf5_reader::Hdf5File::from_vec(bytes).unwrap();
let ds = file.dataset("/value").unwrap();
let data: ndarray::ArrayD<f64> = ds.read_array().unwrap();
assert!((data[[]] - 42.0).abs() < 1e-10);
}
#[test]
fn test_from_bytes_with_options() {
let path = skip_if_missing!("scalar_dataset.h5");
let bytes = std::fs::read(&path).unwrap();
let file = hdf5_reader::Hdf5File::from_bytes_with_options(
&bytes,
hdf5_reader::OpenOptions {
chunk_cache_bytes: 1024,
chunk_cache_slots: 17,
filter_registry: None,
..Default::default()
},
)
.unwrap();
let ds = file.dataset("/value").unwrap();
let data: ndarray::ArrayD<f64> = ds.read_array().unwrap();
assert!((data[[]] - 42.0).abs() < 1e-10);
}
#[test]
fn test_from_storage() {
let path = skip_if_missing!("scalar_dataset.h5");
let bytes = std::fs::read(&path).unwrap();
let file = hdf5_reader::Hdf5File::from_storage(std::sync::Arc::new(
hdf5_reader::BytesStorage::new(bytes),
))
.unwrap();
let ds = file.dataset("/value").unwrap();
let data: ndarray::ArrayD<f64> = ds.read_array().unwrap();
assert!((data[[]] - 42.0).abs() < 1e-10);
}
#[test]
fn test_header_cache_reuse() {
let path = skip_if_missing!("nested_groups.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let g1a = file.group("/group1").unwrap();
let g1b = file.group("/group1").unwrap();
assert_eq!(g1a.name(), g1b.name());
}
#[test]
fn test_fixed_array_chunked() {
let path = skip_if_missing!("fixed_array_chunked.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/data").unwrap();
assert_eq!(ds.shape(), &[6, 10]);
let data: ndarray::ArrayD<f64> = ds.read_array().unwrap();
assert_eq!(data.shape(), &[6, 10]);
for r in 0..6 {
for c in 0..10 {
let expected = (r * 10 + c) as f64;
assert!(
(data[[r, c]] - expected).abs() < 1e-10,
"mismatch at [{},{}]: got {}, expected {}",
r,
c,
data[[r, c]],
expected
);
}
}
}
#[test]
fn test_extensible_array_chunked() {
let path = skip_if_missing!("extensible_array_chunked.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/data").unwrap();
assert_eq!(ds.shape(), &[5, 8]);
let data: ndarray::ArrayD<f64> = ds.read_array().unwrap();
assert_eq!(data.shape(), &[5, 8]);
for r in 0..5 {
for c in 0..8 {
let expected = (r * 8 + c) as f64;
assert!(
(data[[r, c]] - expected).abs() < 1e-10,
"mismatch at [{},{}]: got {}, expected {}",
r,
c,
data[[r, c]],
expected
);
}
}
}
#[test]
fn test_vlen_string_dataset() {
let path = skip_if_missing!("vlen_strings.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/labels").unwrap();
let strings = ds.read_strings().unwrap();
assert_eq!(strings, vec!["alpha", "beta", "gamma"]);
let err = ds.read_string().unwrap_err();
assert!(matches!(err, hdf5_reader::error::Error::InvalidData(_)));
}
#[test]
fn test_chunked_slice_single_index() {
let path = skip_if_missing!("fixed_array_chunked.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/data").unwrap();
let selection = hdf5_reader::SliceInfo {
selections: vec![
hdf5_reader::SliceInfoElem::Index(2),
hdf5_reader::SliceInfoElem::Slice {
start: 0,
end: u64::MAX,
step: 1,
},
],
};
let sliced: ndarray::ArrayD<f64> = ds.read_slice(&selection).unwrap();
assert_eq!(sliced.shape(), &[10]);
for c in 0..10 {
let expected = (2 * 10 + c) as f64;
assert!(
(sliced[[c]] - expected).abs() < 1e-10,
"mismatch at [{}]: got {}, expected {}",
c,
sliced[[c]],
expected
);
}
}
#[test]
fn test_chunked_slice_range_with_step() {
let path = skip_if_missing!("simple_chunked_deflate.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/temperature").unwrap();
let selection = hdf5_reader::SliceInfo {
selections: vec![
hdf5_reader::SliceInfoElem::Slice {
start: 0,
end: 10,
step: 2,
},
hdf5_reader::SliceInfoElem::Slice {
start: 0,
end: 10,
step: 1,
},
],
};
let sliced: ndarray::ArrayD<f32> = ds.read_slice(&selection).unwrap();
assert_eq!(sliced.shape(), &[5, 10]);
for c in 0..10 {
assert!((sliced[[0, c]] - c as f32).abs() < 1e-6);
}
for c in 0..10 {
assert!((sliced[[1, c]] - (40 + c) as f32).abs() < 1e-6);
}
}
#[test]
fn test_chunked_slice_empty_range_is_empty() {
let path = skip_if_missing!("simple_chunked_deflate.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/temperature").unwrap();
let selection = hdf5_reader::SliceInfo {
selections: vec![
hdf5_reader::SliceInfoElem::Slice {
start: 8,
end: 4,
step: 1,
},
hdf5_reader::SliceInfoElem::Slice {
start: 0,
end: 10,
step: 1,
},
],
};
let sliced: ndarray::ArrayD<f32> = ds.read_slice(&selection).unwrap();
assert_eq!(sliced.shape(), &[0, 10]);
assert_eq!(sliced.len(), 0);
}
#[test]
fn test_chunked_slice_start_out_of_bounds_errors() {
let path = skip_if_missing!("simple_chunked_deflate.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/temperature").unwrap();
let selection = hdf5_reader::SliceInfo {
selections: vec![
hdf5_reader::SliceInfoElem::Slice {
start: 11,
end: 12,
step: 1,
},
hdf5_reader::SliceInfoElem::Slice {
start: 0,
end: 10,
step: 1,
},
],
};
let err = ds.read_slice::<f32>(&selection).unwrap_err();
assert!(matches!(
err,
hdf5_reader::error::Error::SliceOutOfBounds {
dim: 0,
index: 11,
size: 10
}
));
}
#[test]
fn test_chunked_shuffle_deflate() {
let path = skip_if_missing!("chunked_shuffle_deflate.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/values").unwrap();
assert_eq!(ds.shape(), &[100, 100]);
assert_eq!(ds.chunks().unwrap(), vec![10, 10]);
let data: ndarray::ArrayD<f64> = ds.read_array().unwrap();
assert_eq!(data.shape(), &[100, 100]);
assert!(data.iter().all(|v| v.is_finite()));
}
#[cfg(feature = "lz4")]
#[test]
fn test_chunked_lz4() {
let path = skip_if_missing!("chunked_lz4.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/data").unwrap();
assert_eq!(ds.shape(), &[10, 20]);
let data: ndarray::ArrayD<f32> = ds.read_array().unwrap();
assert_eq!(data.shape(), &[10, 20]);
assert!((data[[0, 0]] - 0.0).abs() < 1e-6);
assert!((data[[0, 1]] - 1.0).abs() < 1e-6);
assert!((data[[1, 0]] - 20.0).abs() < 1e-6);
assert!((data[[9, 19]] - 199.0).abs() < 1e-6);
}
#[cfg(feature = "lz4")]
#[test]
fn test_chunked_lz4_compressed() {
let path = skip_if_missing!("chunked_lz4_zeros.h5");
let file = hdf5_reader::Hdf5File::open(&path).unwrap();
let ds = file.dataset("/data").unwrap();
let data: ndarray::ArrayD<f32> = ds.read_array().unwrap();
assert_eq!(data.shape(), &[10, 20]);
assert!(data.iter().all(|&v| v == 0.0));
}