1use bytes::Bytes;
2
3use crate::endian::{HDF5Reader, UNDEF_ADDR};
4use crate::error::{HDF5Error, Result};
5
6pub const HDF5_SIGNATURE: [u8; 8] = [0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a];
8
9#[derive(Debug, Clone)]
15pub struct Superblock {
16 pub version: u8,
18 pub size_of_offsets: u8,
20 pub size_of_lengths: u8,
22 pub base_address: u64,
24 pub root_group_address: u64,
26 pub end_of_file_address: u64,
28 pub extension_address: u64,
30}
31
32impl Superblock {
33 pub fn parse(data: &Bytes) -> Result<(Self, u64)> {
38 let offsets = [0u64, 512, 1024, 2048, 4096];
40 for &offset in &offsets {
41 if offset as usize + 8 > data.len() {
42 break;
43 }
44 let slice = &data[offset as usize..offset as usize + 8];
45 if slice == HDF5_SIGNATURE {
46 let sb = Self::parse_at(data, offset)?;
47 return Ok((sb, offset));
48 }
49 }
50
51 let hint = identify_format(data);
53 Err(HDF5Error::InvalidSignature { offset: 0, hint })
54 }
55
56 fn parse_at(data: &Bytes, offset: u64) -> Result<Self> {
57 let mut r = HDF5Reader::new(data.clone());
58 r.set_position(offset);
59
60 r.skip(8);
62
63 let version = r.read_u8()?;
64
65 match version {
66 0 | 1 => Self::parse_v0_v1(&mut r, version),
67 2 | 3 => Self::parse_v2_v3(&mut r, version),
68 _ => Err(HDF5Error::UnsupportedSuperblockVersion(version)),
69 }
70 }
71
72 fn parse_v0_v1(r: &mut HDF5Reader, version: u8) -> Result<Self> {
92 let _free_space_version = r.read_u8()?;
93 let _root_group_version = r.read_u8()?;
94 let _reserved1 = r.read_u8()?;
95 let _shared_header_version = r.read_u8()?;
96 let size_of_offsets = r.read_u8()?;
97 let size_of_lengths = r.read_u8()?;
98 let _reserved2 = r.read_u8()?;
99
100 *r = HDF5Reader::with_sizes(r.get_ref().clone(), size_of_offsets, size_of_lengths);
102 r.set_position(
103 8 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1, );
108
109 let _group_leaf_k = r.read_u16()?;
110 let _group_internal_k = r.read_u16()?;
111 let _consistency_flags = r.read_u32()?;
112
113 if version == 1 {
114 let _indexed_storage_k = r.read_u16()?;
115 let _reserved3 = r.read_u16()?;
116 }
117
118 let base_address = r.read_offset()?;
119 let _free_space_address = r.read_offset()?;
120 let end_of_file_address = r.read_offset()?;
121 let _driver_info_address = r.read_offset()?;
122
123 let _link_name_offset = r.read_offset()?;
126 let root_group_address = r.read_offset()?;
128 Ok(Self {
132 version,
133 size_of_offsets,
134 size_of_lengths,
135 base_address,
136 root_group_address,
137 end_of_file_address,
138 extension_address: UNDEF_ADDR,
139 })
140 }
141
142 fn parse_v2_v3(r: &mut HDF5Reader, version: u8) -> Result<Self> {
154 let size_of_offsets = r.read_u8()?;
155 let size_of_lengths = r.read_u8()?;
156 let _consistency_flags = r.read_u8()?;
157
158 let pos = r.position();
160 *r = HDF5Reader::with_sizes(r.get_ref().clone(), size_of_offsets, size_of_lengths);
161 r.set_position(pos);
162
163 let base_address = r.read_offset()?;
164 let extension_address = r.read_offset()?;
165 let end_of_file_address = r.read_offset()?;
166 let root_group_address = r.read_offset()?;
167 let _checksum = r.read_u32()?;
168
169 Ok(Self {
170 version,
171 size_of_offsets,
172 size_of_lengths,
173 base_address,
174 root_group_address,
175 end_of_file_address,
176 extension_address,
177 })
178 }
179}
180
181fn identify_format(data: &Bytes) -> String {
184 if data.len() < 4 {
185 return format!(
186 "file is too small ({} bytes) to contain an HDF5 superblock",
187 data.len()
188 );
189 }
190
191 let head = &data[..std::cmp::min(data.len(), 8)];
192
193 if head.starts_with(b"CDF") && data.len() >= 4 {
195 let version_byte = data[3];
196 let variant = match version_byte {
197 1 => "NetCDF3 classic (CDF-1)",
198 2 => "NetCDF3 64-bit offset (CDF-2)",
199 5 => "NetCDF3 64-bit data (CDF-5)",
200 _ => "NetCDF3 (unknown variant)",
201 };
202 return format!(
203 "file appears to be {} format, not HDF5. \
204 NetCDF4 (which uses HDF5) starts with \\x89HDF, \
205 but this file starts with CDF\\x{:02x}",
206 variant, version_byte
207 );
208 }
209
210 if head.len() >= 4 && head[0] == 0x0e && head[1] == 0x03 && head[2] == 0x13 && head[3] == 0x01 {
212 return "file appears to be HDF4 format, not HDF5. \
213 async-hdf5 only supports HDF5 (and NetCDF4, which is HDF5-based)"
214 .to_string();
215 }
216
217 if head.len() >= 4
219 && ((head[0] == b'I' && head[1] == b'I' && head[2] == 42 && head[3] == 0)
220 || (head[0] == b'M' && head[1] == b'M' && head[2] == 0 && head[3] == 42))
221 {
222 return "file appears to be TIFF format, not HDF5".to_string();
223 }
224
225 let hex: Vec<String> = head.iter().map(|b| format!("{:02x}", b)).collect();
227 format!(
228 "expected HDF5 signature (\\x89HDF\\r\\n\\x1a\\n) but found [{}]",
229 hex.join(" ")
230 )
231}
232
233#[cfg(test)]
234mod tests {
235 use super::*;
236
237 #[test]
238 fn test_hdf5_signature() {
239 assert_eq!(HDF5_SIGNATURE[1], b'H');
240 assert_eq!(HDF5_SIGNATURE[2], b'D');
241 assert_eq!(HDF5_SIGNATURE[3], b'F');
242 }
243
244 #[test]
245 fn test_superblock_v2_minimal() {
246 let mut data = Vec::new();
248 data.extend_from_slice(&HDF5_SIGNATURE);
250 data.push(2);
252 data.push(8);
254 data.push(8);
255 data.push(0);
257 data.extend_from_slice(&0u64.to_le_bytes());
259 data.extend_from_slice(&u64::MAX.to_le_bytes());
261 data.extend_from_slice(&4096u64.to_le_bytes());
263 data.extend_from_slice(&48u64.to_le_bytes());
265 data.extend_from_slice(&0u32.to_le_bytes());
267
268 let bytes = Bytes::from(data);
269 let (sb, offset) = Superblock::parse(&bytes).unwrap();
270 assert_eq!(offset, 0);
271 assert_eq!(sb.version, 2);
272 assert_eq!(sb.size_of_offsets, 8);
273 assert_eq!(sb.size_of_lengths, 8);
274 assert_eq!(sb.base_address, 0);
275 assert_eq!(sb.root_group_address, 48);
276 assert_eq!(sb.end_of_file_address, 4096);
277 assert_eq!(sb.extension_address, UNDEF_ADDR);
278 }
279}