Skip to main content

hdf5_reader/
superblock.rs

1use crate::checksum::jenkins_lookup3;
2use crate::error::{Error, Result};
3use crate::io::Cursor;
4use crate::symbol_table::SymbolTableEntry;
5
6/// HDF5 magic bytes: `\x89HDF\r\n\x1a\n`
7pub const HDF5_MAGIC: [u8; 8] = [0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a];
8
9/// Parsed HDF5 superblock.
10#[derive(Debug, Clone)]
11pub struct Superblock {
12    /// Superblock version (0, 1, 2, or 3).
13    pub version: u8,
14    /// Size of offsets (addresses) in bytes: 2, 4, or 8.
15    pub offset_size: u8,
16    /// Size of lengths in bytes: 2, 4, or 8.
17    pub length_size: u8,
18    /// Group leaf node K (v0/v1 only).
19    pub group_leaf_node_k: u16,
20    /// Group internal node K (v0/v1 only).
21    pub group_internal_node_k: u16,
22    /// Indexed storage internal node K (v1 only).
23    pub indexed_storage_k: u16,
24    /// File consistency flags.
25    pub consistency_flags: u32,
26    /// Base address for offsets (usually 0).
27    pub base_address: u64,
28    /// Address of the file free-space info (undefined = not present).
29    pub free_space_address: u64,
30    /// End-of-file address.
31    pub eof_address: u64,
32    /// Driver information block address (v0/v1 only).
33    pub driver_info_address: u64,
34    /// Root group symbol table entry (v0/v1).
35    pub root_symbol_table_entry: Option<SymbolTableEntry>,
36    /// Root group object header address (v2/v3).
37    pub root_object_header_address: Option<u64>,
38    /// Superblock extension address (v2/v3).
39    pub extension_address: Option<u64>,
40}
41
42impl Superblock {
43    /// Parse the superblock from a cursor positioned at byte 0 (or where the magic starts).
44    ///
45    /// The cursor should be positioned at the start of the file. The method will
46    /// search for the magic bytes at position 0, 512, 1024, 2048, etc.
47    pub fn parse(cursor: &mut Cursor<'_>) -> Result<Self> {
48        // Search for magic at positions 0, 512, 1024, 2048, ...
49        let magic_offset = find_magic(cursor)?;
50        cursor.set_position(magic_offset + 8);
51
52        let version = cursor.read_u8()?;
53        match version {
54            0 | 1 => Self::parse_v0_v1(cursor, version),
55            2 | 3 => Self::parse_v2_v3(cursor, version, magic_offset),
56            v => Err(Error::UnsupportedSuperblockVersion(v)),
57        }
58    }
59
60    fn parse_v0_v1(cursor: &mut Cursor<'_>, version: u8) -> Result<Self> {
61        let _free_space_version = cursor.read_u8()?;
62        let _root_group_version = cursor.read_u8()?;
63        let _reserved1 = cursor.read_u8()?;
64        let _shared_header_version = cursor.read_u8()?;
65
66        let offset_size = cursor.read_u8()?;
67        let length_size = cursor.read_u8()?;
68        let _reserved2 = cursor.read_u8()?;
69
70        let group_leaf_node_k = cursor.read_u16_le()?;
71        let group_internal_node_k = cursor.read_u16_le()?;
72        let consistency_flags = cursor.read_u32_le()?;
73
74        let indexed_storage_k = if version == 1 {
75            let k = cursor.read_u16_le()?;
76            let _reserved = cursor.read_u16_le()?;
77            k
78        } else {
79            0
80        };
81
82        let base_address = cursor.read_offset(offset_size)?;
83        let free_space_address = cursor.read_offset(offset_size)?;
84        let eof_address = cursor.read_offset(offset_size)?;
85        let driver_info_address = cursor.read_offset(offset_size)?;
86
87        let root_entry = SymbolTableEntry::parse(cursor, offset_size, length_size)?;
88
89        Ok(Superblock {
90            version,
91            offset_size,
92            length_size,
93            group_leaf_node_k,
94            group_internal_node_k,
95            indexed_storage_k,
96            consistency_flags,
97            base_address,
98            free_space_address,
99            eof_address,
100            driver_info_address,
101            root_symbol_table_entry: Some(root_entry),
102            root_object_header_address: None,
103            extension_address: None,
104        })
105    }
106
107    fn parse_v2_v3(cursor: &mut Cursor<'_>, version: u8, magic_offset: u64) -> Result<Self> {
108        let offset_size = cursor.read_u8()?;
109        let length_size = cursor.read_u8()?;
110        let consistency_flags = cursor.read_u8()? as u32;
111
112        let base_address = cursor.read_offset(offset_size)?;
113        let extension_address = cursor.read_offset(offset_size)?;
114        let eof_address = cursor.read_offset(offset_size)?;
115        let root_object_header_address = cursor.read_offset(offset_size)?;
116
117        let stored_checksum = cursor.read_u32_le()?;
118
119        // Verify checksum: covers everything from magic to just before the checksum
120        let checksum_start = magic_offset as usize;
121        let checksum_end = cursor.position() as usize - 4;
122        let computed = jenkins_lookup3(&cursor.data()[checksum_start..checksum_end]);
123        if computed != stored_checksum {
124            return Err(Error::ChecksumMismatch {
125                expected: stored_checksum,
126                actual: computed,
127            });
128        }
129
130        let ext = if !Cursor::is_undefined_offset(extension_address, offset_size) {
131            Some(extension_address)
132        } else {
133            None
134        };
135
136        Ok(Superblock {
137            version,
138            offset_size,
139            length_size,
140            group_leaf_node_k: 0,
141            group_internal_node_k: 0,
142            indexed_storage_k: 0,
143            consistency_flags,
144            base_address,
145            free_space_address: u64::MAX,
146            eof_address,
147            driver_info_address: u64::MAX,
148            root_symbol_table_entry: None,
149            root_object_header_address: Some(root_object_header_address),
150            extension_address: ext,
151        })
152    }
153
154    /// Get the root group's object header address.
155    pub fn root_object_header_address(&self) -> Result<u64> {
156        if let Some(addr) = self.root_object_header_address {
157            Ok(addr)
158        } else if let Some(ref entry) = self.root_symbol_table_entry {
159            Ok(entry.object_header_address)
160        } else {
161            Err(Error::InvalidData(
162                "superblock has no root group reference".into(),
163            ))
164        }
165    }
166
167    /// For v0/v1 superblocks, get the B-tree address from the root symbol table entry's
168    /// scratch-pad (used for root group navigation).
169    pub fn root_btree_address(&self) -> Option<u64> {
170        self.root_symbol_table_entry
171            .as_ref()
172            .and_then(|e| e.btree_address())
173    }
174
175    /// For v0/v1 superblocks, get the local heap address from the root symbol table entry's
176    /// scratch-pad.
177    pub fn root_local_heap_address(&self) -> Option<u64> {
178        self.root_symbol_table_entry
179            .as_ref()
180            .and_then(|e| e.local_heap_address())
181    }
182}
183
184/// Search for the HDF5 magic bytes. Per spec, the superblock can appear at
185/// offsets 0, 512, 1024, 2048, etc. (powers of two times 512, plus 0).
186fn find_magic(cursor: &Cursor<'_>) -> Result<u64> {
187    // Check offset 0
188    if cursor.len() >= 8 {
189        let bytes = cursor.peek_bytes(8)?;
190        if bytes == HDF5_MAGIC {
191            return Ok(0);
192        }
193    }
194
195    // Check 512, 1024, 2048, ...
196    let mut offset: u64 = 512;
197    while offset + 8 <= cursor.len() {
198        let c = cursor.at_offset(offset)?;
199        let bytes = c.peek_bytes(8)?;
200        if bytes == HDF5_MAGIC {
201            return Ok(offset);
202        }
203        offset *= 2;
204    }
205
206    Err(Error::InvalidMagic)
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    #[test]
214    fn test_magic_detection() {
215        // Valid magic at offset 0
216        let mut data = HDF5_MAGIC.to_vec();
217        data.extend_from_slice(&[0u8; 100]);
218        let cursor = Cursor::new(&data);
219        assert_eq!(find_magic(&cursor).unwrap(), 0);
220    }
221
222    #[test]
223    fn test_no_magic() {
224        let data = [0u8; 100];
225        let cursor = Cursor::new(&data);
226        assert!(find_magic(&cursor).is_err());
227    }
228}