hdf5_reader/
superblock.rs1use crate::checksum::jenkins_lookup3;
2use crate::error::{Error, Result};
3use crate::io::Cursor;
4use crate::storage::Storage;
5use crate::symbol_table::SymbolTableEntry;
6
7pub const HDF5_MAGIC: [u8; 8] = [0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a];
9
10#[derive(Debug, Clone)]
12pub struct Superblock {
13 pub version: u8,
15 pub offset_size: u8,
17 pub length_size: u8,
19 pub group_leaf_node_k: u16,
21 pub group_internal_node_k: u16,
23 pub indexed_storage_k: u16,
25 pub consistency_flags: u32,
27 pub base_address: u64,
29 pub free_space_address: u64,
31 pub eof_address: u64,
33 pub driver_info_address: u64,
35 pub root_symbol_table_entry: Option<SymbolTableEntry>,
37 pub root_object_header_address: Option<u64>,
39 pub extension_address: Option<u64>,
41}
42
43impl Superblock {
44 pub fn parse(cursor: &mut Cursor<'_>) -> Result<Self> {
49 let magic_offset = find_magic(cursor)?;
51 cursor.set_position(magic_offset + 8);
52
53 let version = cursor.read_u8()?;
54 match version {
55 0 | 1 => Self::parse_v0_v1(cursor, version),
56 2 | 3 => Self::parse_v2_v3(cursor, version, magic_offset),
57 v => Err(Error::UnsupportedSuperblockVersion(v)),
58 }
59 }
60
61 pub fn parse_from_storage(storage: &dyn Storage) -> Result<Self> {
63 let magic_offset = find_magic_in_storage(storage)?;
64 let remaining = storage.len().saturating_sub(magic_offset);
65 let header_len = remaining.min(256) as usize;
66 let header = storage.read_range(magic_offset, header_len)?;
67 let mut cursor = Cursor::new(header.as_ref());
68 cursor.set_position(8);
69
70 let version = cursor.read_u8()?;
71 match version {
72 0 | 1 => Self::parse_v0_v1(&mut cursor, version),
73 2 | 3 => Self::parse_v2_v3(&mut cursor, version, 0),
74 v => Err(Error::UnsupportedSuperblockVersion(v)),
75 }
76 }
77
78 fn parse_v0_v1(cursor: &mut Cursor<'_>, version: u8) -> Result<Self> {
79 let _free_space_version = cursor.read_u8()?;
80 let _root_group_version = cursor.read_u8()?;
81 let _reserved1 = cursor.read_u8()?;
82 let _shared_header_version = cursor.read_u8()?;
83
84 let offset_size = cursor.read_u8()?;
85 let length_size = cursor.read_u8()?;
86 let _reserved2 = cursor.read_u8()?;
87
88 let group_leaf_node_k = cursor.read_u16_le()?;
89 let group_internal_node_k = cursor.read_u16_le()?;
90 let consistency_flags = cursor.read_u32_le()?;
91
92 let indexed_storage_k = if version == 1 {
93 let k = cursor.read_u16_le()?;
94 let _reserved = cursor.read_u16_le()?;
95 k
96 } else {
97 0
98 };
99
100 let base_address = cursor.read_offset(offset_size)?;
101 let free_space_address = cursor.read_offset(offset_size)?;
102 let eof_address = cursor.read_offset(offset_size)?;
103 let driver_info_address = cursor.read_offset(offset_size)?;
104
105 let root_entry = SymbolTableEntry::parse(cursor, offset_size, length_size)?;
106
107 Ok(Superblock {
108 version,
109 offset_size,
110 length_size,
111 group_leaf_node_k,
112 group_internal_node_k,
113 indexed_storage_k,
114 consistency_flags,
115 base_address,
116 free_space_address,
117 eof_address,
118 driver_info_address,
119 root_symbol_table_entry: Some(root_entry),
120 root_object_header_address: None,
121 extension_address: None,
122 })
123 }
124
125 fn parse_v2_v3(cursor: &mut Cursor<'_>, version: u8, magic_offset: u64) -> Result<Self> {
126 let offset_size = cursor.read_u8()?;
127 let length_size = cursor.read_u8()?;
128 let consistency_flags = cursor.read_u8()? as u32;
129
130 let base_address = cursor.read_offset(offset_size)?;
131 let extension_address = cursor.read_offset(offset_size)?;
132 let eof_address = cursor.read_offset(offset_size)?;
133 let root_object_header_address = cursor.read_offset(offset_size)?;
134
135 let stored_checksum = cursor.read_u32_le()?;
136
137 let checksum_start = magic_offset as usize;
139 let checksum_end = cursor.position() as usize - 4;
140 let computed = jenkins_lookup3(&cursor.data()[checksum_start..checksum_end]);
141 if computed != stored_checksum {
142 return Err(Error::ChecksumMismatch {
143 expected: stored_checksum,
144 actual: computed,
145 });
146 }
147
148 let ext = if !Cursor::is_undefined_offset(extension_address, offset_size) {
149 Some(extension_address)
150 } else {
151 None
152 };
153
154 Ok(Superblock {
155 version,
156 offset_size,
157 length_size,
158 group_leaf_node_k: 0,
159 group_internal_node_k: 0,
160 indexed_storage_k: 0,
161 consistency_flags,
162 base_address,
163 free_space_address: u64::MAX,
164 eof_address,
165 driver_info_address: u64::MAX,
166 root_symbol_table_entry: None,
167 root_object_header_address: Some(root_object_header_address),
168 extension_address: ext,
169 })
170 }
171
172 pub fn root_object_header_address(&self) -> Result<u64> {
174 if let Some(addr) = self.root_object_header_address {
175 Ok(addr)
176 } else if let Some(ref entry) = self.root_symbol_table_entry {
177 Ok(entry.object_header_address)
178 } else {
179 Err(Error::InvalidData(
180 "superblock has no root group reference".into(),
181 ))
182 }
183 }
184
185 pub fn root_btree_address(&self) -> Option<u64> {
188 self.root_symbol_table_entry
189 .as_ref()
190 .and_then(|e| e.btree_address())
191 }
192
193 pub fn root_local_heap_address(&self) -> Option<u64> {
196 self.root_symbol_table_entry
197 .as_ref()
198 .and_then(|e| e.local_heap_address())
199 }
200}
201
202fn find_magic(cursor: &Cursor<'_>) -> Result<u64> {
205 if cursor.len() >= 8 {
207 let bytes = cursor.peek_bytes(8)?;
208 if bytes == HDF5_MAGIC {
209 return Ok(0);
210 }
211 }
212
213 let mut offset: u64 = 512;
215 while offset.checked_add(8).is_some_and(|end| end <= cursor.len()) {
216 let c = cursor.at_offset(offset)?;
217 let bytes = c.peek_bytes(8)?;
218 if bytes == HDF5_MAGIC {
219 return Ok(offset);
220 }
221 let Some(next_offset) = offset.checked_mul(2) else {
222 break;
223 };
224 offset = next_offset;
225 }
226
227 Err(Error::InvalidMagic)
228}
229
230fn find_magic_in_storage(storage: &dyn Storage) -> Result<u64> {
231 if storage.len() >= 8 {
232 let bytes = storage.read_range(0, 8)?;
233 if bytes.as_ref() == HDF5_MAGIC {
234 return Ok(0);
235 }
236 }
237
238 let mut offset: u64 = 512;
239 while offset
240 .checked_add(8)
241 .is_some_and(|end| end <= storage.len())
242 {
243 let bytes = storage.read_range(offset, 8)?;
244 if bytes.as_ref() == HDF5_MAGIC {
245 return Ok(offset);
246 }
247 let Some(next_offset) = offset.checked_mul(2) else {
248 break;
249 };
250 offset = next_offset;
251 }
252
253 Err(Error::InvalidMagic)
254}
255
256#[cfg(test)]
257mod tests {
258 use super::*;
259 use crate::storage::RangeRequestStorage;
260
261 #[test]
262 fn magic_detection() {
263 let mut data = HDF5_MAGIC.to_vec();
265 data.extend_from_slice(&[0u8; 100]);
266 let cursor = Cursor::new(&data);
267 assert_eq!(find_magic(&cursor).unwrap(), 0);
268 }
269
270 #[test]
271 fn no_magic() {
272 let data = [0u8; 100];
273 let cursor = Cursor::new(&data);
274 assert!(find_magic(&cursor).is_err());
275 }
276
277 #[test]
278 fn storage_magic_search_handles_huge_length_without_overflow() {
279 let storage = RangeRequestStorage::new(u64::MAX, |_offset, len| Ok(vec![0; len]));
280
281 let err = find_magic_in_storage(&storage).unwrap_err();
282
283 assert!(matches!(err, Error::InvalidMagic));
284 }
285}