use crate::checksum::jenkins_lookup3;
use crate::error::{Error, Result};
use crate::io::Cursor;
use crate::storage::Storage;
use crate::symbol_table::SymbolTableEntry;
pub const HDF5_MAGIC: [u8; 8] = [0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a];
#[derive(Debug, Clone)]
pub struct Superblock {
pub version: u8,
pub offset_size: u8,
pub length_size: u8,
pub group_leaf_node_k: u16,
pub group_internal_node_k: u16,
pub indexed_storage_k: u16,
pub consistency_flags: u32,
pub base_address: u64,
pub free_space_address: u64,
pub eof_address: u64,
pub driver_info_address: u64,
pub root_symbol_table_entry: Option<SymbolTableEntry>,
pub root_object_header_address: Option<u64>,
pub extension_address: Option<u64>,
}
impl Superblock {
pub fn parse(cursor: &mut Cursor<'_>) -> Result<Self> {
let magic_offset = find_magic(cursor)?;
cursor.set_position(magic_offset + 8);
let version = cursor.read_u8()?;
match version {
0 | 1 => Self::parse_v0_v1(cursor, version),
2 | 3 => Self::parse_v2_v3(cursor, version, magic_offset),
v => Err(Error::UnsupportedSuperblockVersion(v)),
}
}
pub fn parse_from_storage(storage: &dyn Storage) -> Result<Self> {
let magic_offset = find_magic_in_storage(storage)?;
let remaining = storage.len().saturating_sub(magic_offset);
let header_len = remaining.min(256) as usize;
let header = storage.read_range(magic_offset, header_len)?;
let mut cursor = Cursor::new(header.as_ref());
cursor.set_position(8);
let version = cursor.read_u8()?;
match version {
0 | 1 => Self::parse_v0_v1(&mut cursor, version),
2 | 3 => Self::parse_v2_v3(&mut cursor, version, 0),
v => Err(Error::UnsupportedSuperblockVersion(v)),
}
}
fn parse_v0_v1(cursor: &mut Cursor<'_>, version: u8) -> Result<Self> {
let _free_space_version = cursor.read_u8()?;
let _root_group_version = cursor.read_u8()?;
let _reserved1 = cursor.read_u8()?;
let _shared_header_version = cursor.read_u8()?;
let offset_size = cursor.read_u8()?;
let length_size = cursor.read_u8()?;
let _reserved2 = cursor.read_u8()?;
let group_leaf_node_k = cursor.read_u16_le()?;
let group_internal_node_k = cursor.read_u16_le()?;
let consistency_flags = cursor.read_u32_le()?;
let indexed_storage_k = if version == 1 {
let k = cursor.read_u16_le()?;
let _reserved = cursor.read_u16_le()?;
k
} else {
0
};
let base_address = cursor.read_offset(offset_size)?;
let free_space_address = cursor.read_offset(offset_size)?;
let eof_address = cursor.read_offset(offset_size)?;
let driver_info_address = cursor.read_offset(offset_size)?;
let root_entry = SymbolTableEntry::parse(cursor, offset_size, length_size)?;
Ok(Superblock {
version,
offset_size,
length_size,
group_leaf_node_k,
group_internal_node_k,
indexed_storage_k,
consistency_flags,
base_address,
free_space_address,
eof_address,
driver_info_address,
root_symbol_table_entry: Some(root_entry),
root_object_header_address: None,
extension_address: None,
})
}
fn parse_v2_v3(cursor: &mut Cursor<'_>, version: u8, magic_offset: u64) -> Result<Self> {
let offset_size = cursor.read_u8()?;
let length_size = cursor.read_u8()?;
let consistency_flags = cursor.read_u8()? as u32;
let base_address = cursor.read_offset(offset_size)?;
let extension_address = cursor.read_offset(offset_size)?;
let eof_address = cursor.read_offset(offset_size)?;
let root_object_header_address = cursor.read_offset(offset_size)?;
let stored_checksum = cursor.read_u32_le()?;
let checksum_start = magic_offset as usize;
let checksum_end = cursor.position() as usize - 4;
let computed = jenkins_lookup3(&cursor.data()[checksum_start..checksum_end]);
if computed != stored_checksum {
return Err(Error::ChecksumMismatch {
expected: stored_checksum,
actual: computed,
});
}
let ext = if !Cursor::is_undefined_offset(extension_address, offset_size) {
Some(extension_address)
} else {
None
};
Ok(Superblock {
version,
offset_size,
length_size,
group_leaf_node_k: 0,
group_internal_node_k: 0,
indexed_storage_k: 0,
consistency_flags,
base_address,
free_space_address: u64::MAX,
eof_address,
driver_info_address: u64::MAX,
root_symbol_table_entry: None,
root_object_header_address: Some(root_object_header_address),
extension_address: ext,
})
}
pub fn root_object_header_address(&self) -> Result<u64> {
if let Some(addr) = self.root_object_header_address {
Ok(addr)
} else if let Some(ref entry) = self.root_symbol_table_entry {
Ok(entry.object_header_address)
} else {
Err(Error::InvalidData(
"superblock has no root group reference".into(),
))
}
}
pub fn root_btree_address(&self) -> Option<u64> {
self.root_symbol_table_entry
.as_ref()
.and_then(|e| e.btree_address())
}
pub fn root_local_heap_address(&self) -> Option<u64> {
self.root_symbol_table_entry
.as_ref()
.and_then(|e| e.local_heap_address())
}
}
fn find_magic(cursor: &Cursor<'_>) -> Result<u64> {
if cursor.len() >= 8 {
let bytes = cursor.peek_bytes(8)?;
if bytes == HDF5_MAGIC {
return Ok(0);
}
}
let mut offset: u64 = 512;
while offset + 8 <= cursor.len() {
let c = cursor.at_offset(offset)?;
let bytes = c.peek_bytes(8)?;
if bytes == HDF5_MAGIC {
return Ok(offset);
}
offset *= 2;
}
Err(Error::InvalidMagic)
}
fn find_magic_in_storage(storage: &dyn Storage) -> Result<u64> {
if storage.len() >= 8 {
let bytes = storage.read_range(0, 8)?;
if bytes.as_ref() == HDF5_MAGIC {
return Ok(0);
}
}
let mut offset: u64 = 512;
while offset + 8 <= storage.len() {
let bytes = storage.read_range(offset, 8)?;
if bytes.as_ref() == HDF5_MAGIC {
return Ok(offset);
}
offset *= 2;
}
Err(Error::InvalidMagic)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_magic_detection() {
let mut data = HDF5_MAGIC.to_vec();
data.extend_from_slice(&[0u8; 100]);
let cursor = Cursor::new(&data);
assert_eq!(find_magic(&cursor).unwrap(), 0);
}
#[test]
fn test_no_magic() {
let data = [0u8; 100];
let cursor = Cursor::new(&data);
assert!(find_magic(&cursor).is_err());
}
}