use hdf5_reader::checksum::jenkins_lookup3;
use hdf5_reader::error::Error;
use hdf5_reader::superblock::HDF5_MAGIC;
use hdf5_reader::Hdf5File;
use std::path::Path;
fn expect_err<T>(result: Result<T, Error>) -> Error {
match result {
Err(e) => e,
Ok(_) => panic!("expected error, got Ok"),
}
}
fn error_chain_contains_unsupported_datatype(err: &Error, class: u8) -> bool {
match err {
Error::UnsupportedDatatypeClass(actual) => *actual == class,
Error::Context { source, .. } => error_chain_contains_unsupported_datatype(source, class),
_ => false,
}
}
fn error_chain_contains_invalid_btree_signature(err: &Error) -> bool {
match err {
Error::InvalidBTreeSignature => true,
Error::Context { source, .. } => error_chain_contains_invalid_btree_signature(source),
_ => false,
}
}
fn error_chain_contains_invalid_data(err: &Error, needle: &str) -> bool {
match err {
Error::InvalidData(message) => message.contains(needle),
Error::Context { source, .. } => error_chain_contains_invalid_data(source, needle),
_ => false,
}
}
fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
haystack
.windows(needle.len())
.position(|window| window == needle)
}
fn find_raw_chunk_btree(bytes: &[u8], entries_used: u16) -> Option<usize> {
let mut needle = Vec::from(&b"TREE"[..]);
needle.push(1); needle.push(0); needle.extend_from_slice(&entries_used.to_le_bytes());
find_bytes(bytes, &needle)
}
fn build_minimal_v2_superblock() -> Vec<u8> {
let mut buf = Vec::with_capacity(48);
buf.extend_from_slice(&HDF5_MAGIC);
buf.push(2);
buf.push(8);
buf.push(8);
buf.push(0);
buf.extend_from_slice(&0u64.to_le_bytes());
buf.extend_from_slice(&u64::MAX.to_le_bytes());
let eof_pos = buf.len();
buf.extend_from_slice(&0u64.to_le_bytes()); buf.extend_from_slice(&48u64.to_le_bytes());
let total_len: u64 = 48;
buf[eof_pos..eof_pos + 8].copy_from_slice(&total_len.to_le_bytes());
let checksum = jenkins_lookup3(&buf[0..44]);
buf.extend_from_slice(&checksum.to_le_bytes());
assert_eq!(buf.len(), 48);
buf
}
fn fixture_bytes(name: &str) -> Option<Vec<u8>> {
let base = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.join("testdata/hdf5");
let path = base.join(name);
if path.exists() {
Some(std::fs::read(&path).unwrap())
} else {
None
}
}
#[test]
fn truncated_before_magic_complete() {
let data = HDF5_MAGIC[..4].to_vec();
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::InvalidMagic),
"expected InvalidMagic, got: {err}"
);
}
#[test]
fn truncated_at_magic_boundary() {
let data = HDF5_MAGIC.to_vec();
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(
err,
Error::UnexpectedEof { .. } | Error::Io(_) | Error::InvalidMagic
),
"expected UnexpectedEof or Io error after bare magic, got: {err}"
);
}
#[test]
fn truncated_after_version_byte() {
let mut data = HDF5_MAGIC.to_vec();
data.push(2); let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::UnexpectedEof { .. } | Error::Io(_)),
"expected UnexpectedEof or Io after version byte, got: {err}"
);
}
#[test]
fn truncated_mid_superblock_v2() {
let full = build_minimal_v2_superblock();
let data = full[..20].to_vec();
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::UnexpectedEof { .. } | Error::Io(_)),
"expected UnexpectedEof when truncated mid-superblock, got: {err}"
);
}
#[test]
fn truncated_before_checksum() {
let full = build_minimal_v2_superblock();
let data = full[..44].to_vec();
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::UnexpectedEof { .. } | Error::Io(_)),
"expected UnexpectedEof when checksum is missing, got: {err}"
);
}
#[test]
fn truncated_real_file_at_various_points() {
let bytes = match fixture_bytes("scalar_dataset.h5") {
Some(b) => b,
None => {
eprintln!("SKIPPED: fixture scalar_dataset.h5 not found");
return;
}
};
let must_fail_points = [0, 1, 4, 7, 8, 9, 16, 32];
for &point in &must_fail_points {
if point >= bytes.len() {
continue;
}
let truncated = bytes[..point].to_vec();
let result = Hdf5File::from_vec(truncated);
assert!(
result.is_err(),
"expected error when truncated at byte {point}, but got Ok"
);
}
let no_panic_points = [48, bytes.len() / 2, bytes.len() - 1];
for &point in &no_panic_points {
if point >= bytes.len() {
continue;
}
let truncated = bytes[..point].to_vec();
let _ = Hdf5File::from_vec(truncated);
}
}
#[test]
fn all_zeros_no_magic() {
let data = vec![0u8; 256];
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::InvalidMagic),
"expected InvalidMagic for all-zero data, got: {err}"
);
}
#[test]
fn flipped_first_magic_byte() {
let mut data = build_minimal_v2_superblock();
data[0] ^= 0x01;
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::InvalidMagic),
"expected InvalidMagic when first magic byte is flipped, got: {err}"
);
}
#[test]
fn flipped_middle_magic_byte() {
let mut data = build_minimal_v2_superblock();
data[3] ^= 0xFF;
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::InvalidMagic),
"expected InvalidMagic when middle magic byte is flipped, got: {err}"
);
}
#[test]
fn reversed_magic_bytes() {
let mut data = build_minimal_v2_superblock();
data[..8].reverse();
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::InvalidMagic),
"expected InvalidMagic when magic bytes are reversed, got: {err}"
);
}
#[test]
fn wrong_magic_signature() {
let mut data = build_minimal_v2_superblock();
let png_magic = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
data[..8].copy_from_slice(&png_magic);
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::InvalidMagic),
"expected InvalidMagic for PNG magic, got: {err}"
);
}
#[test]
fn empty_file() {
let data = Vec::new();
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::InvalidMagic),
"expected InvalidMagic for empty file, got: {err}"
);
}
#[test]
fn single_byte_file() {
let data = vec![0x89]; let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::InvalidMagic),
"expected InvalidMagic for single byte, got: {err}"
);
}
#[test]
fn corrupted_checksum_in_v2_superblock() {
let mut data = build_minimal_v2_superblock();
data[44] ^= 0x01;
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::ChecksumMismatch { .. }),
"expected ChecksumMismatch when checksum byte is corrupted, got: {err}"
);
}
#[test]
fn zeroed_checksum_in_v2_superblock() {
let mut data = build_minimal_v2_superblock();
data[44..48].fill(0x00);
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::ChecksumMismatch { .. }),
"expected ChecksumMismatch when checksum is zeroed, got: {err}"
);
}
#[test]
fn flipped_data_byte_triggers_checksum_mismatch() {
let mut data = build_minimal_v2_superblock();
data[11] ^= 0x01;
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::ChecksumMismatch { .. }),
"expected ChecksumMismatch when data byte is corrupted, got: {err}"
);
}
#[test]
fn corrupted_base_address_triggers_checksum_mismatch() {
let mut data = build_minimal_v2_superblock();
data[12] ^= 0x42;
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::ChecksumMismatch { .. }),
"expected ChecksumMismatch when base_address is corrupted, got: {err}"
);
}
#[test]
fn corrupted_real_file_checksum() {
let mut bytes = match fixture_bytes("scalar_dataset.h5") {
Some(b) => b,
None => {
eprintln!("SKIPPED: fixture scalar_dataset.h5 not found");
return;
}
};
assert!(
Hdf5File::from_bytes(&bytes).is_ok(),
"fixture should parse without error"
);
let version = bytes[8];
if version >= 2 {
let offset_size = bytes[9] as usize;
let checksum_offset = 12 + 4 * offset_size;
if checksum_offset + 4 <= bytes.len() {
bytes[checksum_offset] ^= 0x80;
let err = expect_err(Hdf5File::from_vec(bytes));
assert!(
matches!(err, Error::ChecksumMismatch { .. }),
"expected ChecksumMismatch for corrupted real file checksum, got: {err}"
);
}
} else {
eprintln!("SKIPPED: fixture has v0/v1 superblock (no checksum to corrupt)");
}
}
#[test]
fn superblock_version_4_unsupported() {
let mut data = build_minimal_v2_superblock();
data[8] = 4;
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::UnsupportedSuperblockVersion(4)),
"expected UnsupportedSuperblockVersion(4), got: {err}"
);
}
#[test]
fn superblock_version_255_unsupported() {
let mut data = build_minimal_v2_superblock();
data[8] = 255;
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::UnsupportedSuperblockVersion(255)),
"expected UnsupportedSuperblockVersion(255), got: {err}"
);
}
#[test]
fn superblock_version_5_unsupported() {
let mut data = build_minimal_v2_superblock();
data[8] = 5;
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::UnsupportedSuperblockVersion(5)),
"expected UnsupportedSuperblockVersion(5), got: {err}"
);
}
#[test]
fn superblock_version_128_unsupported() {
let mut data = build_minimal_v2_superblock();
data[8] = 128;
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::UnsupportedSuperblockVersion(128)),
"expected UnsupportedSuperblockVersion(128), got: {err}"
);
}
#[test]
fn random_garbage_bytes() {
let data: Vec<u8> = (0u16..1024).map(|i| ((i * 37 + 13) % 251) as u8).collect();
let err = expect_err(Hdf5File::from_vec(data));
assert!(
matches!(err, Error::InvalidMagic),
"expected InvalidMagic for random garbage, got: {err}"
);
}
#[test]
fn valid_magic_then_garbage() {
let mut data = HDF5_MAGIC.to_vec();
data.extend_from_slice(&[0xDE, 0xAD, 0xBE, 0xEF, 0xCA, 0xFE, 0xBA, 0xBE]);
data.extend_from_slice(&[0xFF; 128]);
let result = Hdf5File::from_vec(data);
assert!(
result.is_err(),
"expected error for magic + garbage, but got Ok"
);
}
#[test]
fn corrupted_offset_size_zero() {
let mut data = build_minimal_v2_superblock();
data[9] = 0;
let checksum = jenkins_lookup3(&data[0..44]);
data[44..48].copy_from_slice(&checksum.to_le_bytes());
let result = Hdf5File::from_vec(data);
assert!(
result.is_err(),
"expected error for offset_size=0, but got Ok"
);
}
#[test]
fn corrupted_offset_size_large() {
let mut data = build_minimal_v2_superblock();
data[9] = 16;
let checksum = jenkins_lookup3(&data[0..44]);
data[44..48].copy_from_slice(&checksum.to_le_bytes());
let result = Hdf5File::from_vec(data);
assert!(
result.is_err(),
"expected error for offset_size=16, but got Ok"
);
}
#[test]
fn valid_parse_then_corrupted_dataset_read() {
let mut bytes = match fixture_bytes("simple_contiguous.h5") {
Some(b) => b,
None => {
eprintln!("SKIPPED: fixture simple_contiguous.h5 not found");
return;
}
};
let corrupt_offset = bytes.len() * 3 / 4;
if corrupt_offset < bytes.len() {
for i in 0..std::cmp::min(32, bytes.len() - corrupt_offset) {
bytes[corrupt_offset + i] ^= 0xFF;
}
}
let result = Hdf5File::from_vec(bytes);
match result {
Ok(file) => {
let _ = file.root_group().and_then(|g| g.dataset("data"));
}
Err(_) => {
}
}
}
#[test]
fn corrupted_child_dataset_parse_error_surfaces_in_lookup_and_members() {
let mut bytes = match fixture_bytes("simple_contiguous.h5") {
Some(b) => b,
None => {
eprintln!("SKIPPED: fixture simple_contiguous.h5 not found");
return;
}
};
let dataset_address = Hdf5File::from_vec(bytes.clone())
.unwrap()
.dataset("/data")
.unwrap()
.address();
let datatype_class_offset = usize::try_from(dataset_address + 72).unwrap();
assert_eq!(bytes[datatype_class_offset] & 0x0f, 1);
bytes[datatype_class_offset] = (bytes[datatype_class_offset] & 0xf0) | 0x0f;
let file = Hdf5File::from_vec(bytes).unwrap();
let root = file.root_group().unwrap();
let dataset_err = expect_err(root.dataset("data"));
assert!(
error_chain_contains_unsupported_datatype(&dataset_err, 15),
"dataset lookup should surface unsupported datatype, got: {dataset_err}"
);
assert!(!matches!(dataset_err, Error::DatasetNotFound(_)));
let members_err = expect_err(root.members());
assert!(
error_chain_contains_unsupported_datatype(&members_err, 15),
"members() should surface unsupported datatype, got: {members_err}"
);
}
#[test]
fn duplicate_chunk_offsets_error_in_full_chunk_fast_path() {
let mut bytes = match fixture_bytes("simple_chunked_deflate.h5") {
Some(b) => b,
None => {
eprintln!("SKIPPED: fixture simple_chunked_deflate.h5 not found");
return;
}
};
let btree =
find_raw_chunk_btree(&bytes, 4).expect("fixture should contain a 4-entry chunk B-tree");
let key0_offsets = btree + 24 + 8;
let key1_offsets = btree + 24 + 32 + 8 + 8;
let duplicate_offsets = bytes[key0_offsets..key0_offsets + 16].to_vec();
bytes[key1_offsets..key1_offsets + 16].copy_from_slice(&duplicate_offsets);
let file = Hdf5File::from_vec(bytes).unwrap();
let dataset = file.dataset("/temperature").unwrap();
let err = expect_err(dataset.read_array::<f32>());
assert!(
error_chain_contains_invalid_data(&err, "duplicate chunk output offsets"),
"expected duplicate chunk offset error, got: {err}"
);
}
#[test]
fn short_unfiltered_chunk_errors_before_copying_to_output() {
let mut bytes = match fixture_bytes("single_chunk.h5") {
Some(b) => b,
None => {
eprintln!("SKIPPED: fixture single_chunk.h5 not found");
return;
}
};
let btree =
find_raw_chunk_btree(&bytes, 1).expect("fixture should contain a 1-entry chunk B-tree");
let chunk_size = btree + 24;
assert_eq!(
u32::from_le_bytes(bytes[chunk_size..chunk_size + 4].try_into().unwrap()),
160
);
bytes[chunk_size..chunk_size + 4].copy_from_slice(&159u32.to_le_bytes());
let file = Hdf5File::from_vec(bytes).unwrap();
let dataset = file.dataset("/data").unwrap();
let err = expect_err(dataset.read_array::<f64>());
assert!(
error_chain_contains_invalid_data(&err, "decoded to 159 bytes, expected 160 bytes"),
"expected exact decoded chunk length error, got: {err}"
);
}
#[test]
fn corrupted_chunk_index_signature_errors_on_read() {
let mut bytes = match fixture_bytes("simple_chunked_deflate.h5") {
Some(b) => b,
None => {
eprintln!("SKIPPED: fixture simple_chunked_deflate.h5 not found");
return;
}
};
let btree =
find_raw_chunk_btree(&bytes, 4).expect("fixture should contain a 4-entry chunk B-tree");
bytes[btree..btree + 4].copy_from_slice(b"BROK");
let file = Hdf5File::from_vec(bytes).unwrap();
let dataset = file.dataset("/temperature").unwrap();
let err = expect_err(dataset.read_array::<f32>());
assert!(
error_chain_contains_invalid_btree_signature(&err),
"expected invalid B-tree signature error, got: {err}"
);
}
#[test]
fn from_bytes_api_also_handles_corruption() {
let data = [0u8; 64];
let err = expect_err(Hdf5File::from_bytes(&data));
assert!(
matches!(err, Error::InvalidMagic),
"expected InvalidMagic via from_bytes, got: {err}"
);
}