Skip to main content

hdf5_reader/
superblock.rs

1use crate::checksum::jenkins_lookup3;
2use crate::error::{Error, Result};
3use crate::io::Cursor;
4use crate::storage::Storage;
5use crate::symbol_table::SymbolTableEntry;
6
7/// HDF5 magic bytes: `\x89HDF\r\n\x1a\n`
8pub const HDF5_MAGIC: [u8; 8] = [0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a];
9
10/// Parsed HDF5 superblock.
11#[derive(Debug, Clone)]
12pub struct Superblock {
13    /// Superblock version (0, 1, 2, or 3).
14    pub version: u8,
15    /// Size of offsets (addresses) in bytes: 2, 4, or 8.
16    pub offset_size: u8,
17    /// Size of lengths in bytes: 2, 4, or 8.
18    pub length_size: u8,
19    /// Group leaf node K (v0/v1 only).
20    pub group_leaf_node_k: u16,
21    /// Group internal node K (v0/v1 only).
22    pub group_internal_node_k: u16,
23    /// Indexed storage internal node K (v1 only).
24    pub indexed_storage_k: u16,
25    /// File consistency flags.
26    pub consistency_flags: u32,
27    /// Base address for offsets (usually 0).
28    pub base_address: u64,
29    /// Address of the file free-space info (undefined = not present).
30    pub free_space_address: u64,
31    /// End-of-file address.
32    pub eof_address: u64,
33    /// Driver information block address (v0/v1 only).
34    pub driver_info_address: u64,
35    /// Root group symbol table entry (v0/v1).
36    pub root_symbol_table_entry: Option<SymbolTableEntry>,
37    /// Root group object header address (v2/v3).
38    pub root_object_header_address: Option<u64>,
39    /// Superblock extension address (v2/v3).
40    pub extension_address: Option<u64>,
41}
42
43impl Superblock {
44    /// Parse the superblock from a cursor positioned at byte 0 (or where the magic starts).
45    ///
46    /// The cursor should be positioned at the start of the file. The method will
47    /// search for the magic bytes at position 0, 512, 1024, 2048, etc.
48    pub fn parse(cursor: &mut Cursor<'_>) -> Result<Self> {
49        // Search for magic at positions 0, 512, 1024, 2048, ...
50        let magic_offset = find_magic(cursor)?;
51        cursor.set_position(magic_offset + 8);
52
53        let version = cursor.read_u8()?;
54        match version {
55            0 | 1 => Self::parse_v0_v1(cursor, version),
56            2 | 3 => Self::parse_v2_v3(cursor, version, magic_offset),
57            v => Err(Error::UnsupportedSuperblockVersion(v)),
58        }
59    }
60
61    /// Parse the superblock from random-access storage.
62    pub fn parse_from_storage(storage: &dyn Storage) -> Result<Self> {
63        let magic_offset = find_magic_in_storage(storage)?;
64        let remaining = storage.len().saturating_sub(magic_offset);
65        let header_len = remaining.min(256) as usize;
66        let header = storage.read_range(magic_offset, header_len)?;
67        let mut cursor = Cursor::new(header.as_ref());
68        cursor.set_position(8);
69
70        let version = cursor.read_u8()?;
71        match version {
72            0 | 1 => Self::parse_v0_v1(&mut cursor, version),
73            2 | 3 => Self::parse_v2_v3(&mut cursor, version, 0),
74            v => Err(Error::UnsupportedSuperblockVersion(v)),
75        }
76    }
77
78    fn parse_v0_v1(cursor: &mut Cursor<'_>, version: u8) -> Result<Self> {
79        let _free_space_version = cursor.read_u8()?;
80        let _root_group_version = cursor.read_u8()?;
81        let _reserved1 = cursor.read_u8()?;
82        let _shared_header_version = cursor.read_u8()?;
83
84        let offset_size = cursor.read_u8()?;
85        let length_size = cursor.read_u8()?;
86        let _reserved2 = cursor.read_u8()?;
87
88        let group_leaf_node_k = cursor.read_u16_le()?;
89        let group_internal_node_k = cursor.read_u16_le()?;
90        let consistency_flags = cursor.read_u32_le()?;
91
92        let indexed_storage_k = if version == 1 {
93            let k = cursor.read_u16_le()?;
94            let _reserved = cursor.read_u16_le()?;
95            k
96        } else {
97            0
98        };
99
100        let base_address = cursor.read_offset(offset_size)?;
101        let free_space_address = cursor.read_offset(offset_size)?;
102        let eof_address = cursor.read_offset(offset_size)?;
103        let driver_info_address = cursor.read_offset(offset_size)?;
104
105        let root_entry = SymbolTableEntry::parse(cursor, offset_size, length_size)?;
106
107        Ok(Superblock {
108            version,
109            offset_size,
110            length_size,
111            group_leaf_node_k,
112            group_internal_node_k,
113            indexed_storage_k,
114            consistency_flags,
115            base_address,
116            free_space_address,
117            eof_address,
118            driver_info_address,
119            root_symbol_table_entry: Some(root_entry),
120            root_object_header_address: None,
121            extension_address: None,
122        })
123    }
124
125    fn parse_v2_v3(cursor: &mut Cursor<'_>, version: u8, magic_offset: u64) -> Result<Self> {
126        let offset_size = cursor.read_u8()?;
127        let length_size = cursor.read_u8()?;
128        let consistency_flags = cursor.read_u8()? as u32;
129
130        let base_address = cursor.read_offset(offset_size)?;
131        let extension_address = cursor.read_offset(offset_size)?;
132        let eof_address = cursor.read_offset(offset_size)?;
133        let root_object_header_address = cursor.read_offset(offset_size)?;
134
135        let stored_checksum = cursor.read_u32_le()?;
136
137        // Verify checksum: covers everything from magic to just before the checksum
138        let checksum_start = magic_offset as usize;
139        let checksum_end = cursor.position() as usize - 4;
140        let computed = jenkins_lookup3(&cursor.data()[checksum_start..checksum_end]);
141        if computed != stored_checksum {
142            return Err(Error::ChecksumMismatch {
143                expected: stored_checksum,
144                actual: computed,
145            });
146        }
147
148        let ext = if !Cursor::is_undefined_offset(extension_address, offset_size) {
149            Some(extension_address)
150        } else {
151            None
152        };
153
154        Ok(Superblock {
155            version,
156            offset_size,
157            length_size,
158            group_leaf_node_k: 0,
159            group_internal_node_k: 0,
160            indexed_storage_k: 0,
161            consistency_flags,
162            base_address,
163            free_space_address: u64::MAX,
164            eof_address,
165            driver_info_address: u64::MAX,
166            root_symbol_table_entry: None,
167            root_object_header_address: Some(root_object_header_address),
168            extension_address: ext,
169        })
170    }
171
172    /// Get the root group's object header address.
173    pub fn root_object_header_address(&self) -> Result<u64> {
174        if let Some(addr) = self.root_object_header_address {
175            Ok(addr)
176        } else if let Some(ref entry) = self.root_symbol_table_entry {
177            Ok(entry.object_header_address)
178        } else {
179            Err(Error::InvalidData(
180                "superblock has no root group reference".into(),
181            ))
182        }
183    }
184
185    /// For v0/v1 superblocks, get the B-tree address from the root symbol table entry's
186    /// scratch-pad (used for root group navigation).
187    pub fn root_btree_address(&self) -> Option<u64> {
188        self.root_symbol_table_entry
189            .as_ref()
190            .and_then(|e| e.btree_address())
191    }
192
193    /// For v0/v1 superblocks, get the local heap address from the root symbol table entry's
194    /// scratch-pad.
195    pub fn root_local_heap_address(&self) -> Option<u64> {
196        self.root_symbol_table_entry
197            .as_ref()
198            .and_then(|e| e.local_heap_address())
199    }
200}
201
202/// Search for the HDF5 magic bytes. Per spec, the superblock can appear at
203/// offsets 0, 512, 1024, 2048, etc. (powers of two times 512, plus 0).
204fn find_magic(cursor: &Cursor<'_>) -> Result<u64> {
205    // Check offset 0
206    if cursor.len() >= 8 {
207        let bytes = cursor.peek_bytes(8)?;
208        if bytes == HDF5_MAGIC {
209            return Ok(0);
210        }
211    }
212
213    // Check 512, 1024, 2048, ...
214    let mut offset: u64 = 512;
215    while offset + 8 <= cursor.len() {
216        let c = cursor.at_offset(offset)?;
217        let bytes = c.peek_bytes(8)?;
218        if bytes == HDF5_MAGIC {
219            return Ok(offset);
220        }
221        offset *= 2;
222    }
223
224    Err(Error::InvalidMagic)
225}
226
227fn find_magic_in_storage(storage: &dyn Storage) -> Result<u64> {
228    if storage.len() >= 8 {
229        let bytes = storage.read_range(0, 8)?;
230        if bytes.as_ref() == HDF5_MAGIC {
231            return Ok(0);
232        }
233    }
234
235    let mut offset: u64 = 512;
236    while offset + 8 <= storage.len() {
237        let bytes = storage.read_range(offset, 8)?;
238        if bytes.as_ref() == HDF5_MAGIC {
239            return Ok(offset);
240        }
241        offset *= 2;
242    }
243
244    Err(Error::InvalidMagic)
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn test_magic_detection() {
253        // Valid magic at offset 0
254        let mut data = HDF5_MAGIC.to_vec();
255        data.extend_from_slice(&[0u8; 100]);
256        let cursor = Cursor::new(&data);
257        assert_eq!(find_magic(&cursor).unwrap(), 0);
258    }
259
260    #[test]
261    fn test_no_magic() {
262        let data = [0u8; 100];
263        let cursor = Cursor::new(&data);
264        assert!(find_magic(&cursor).is_err());
265    }
266}