Skip to main content

async_hdf5/
superblock.rs

1use bytes::Bytes;
2
3use crate::endian::{HDF5Reader, UNDEF_ADDR};
4use crate::error::{HDF5Error, Result};
5
6/// HDF5 file format signature: `\211HDF\r\n\032\n`
7pub const HDF5_SIGNATURE: [u8; 8] = [0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a];
8
9/// The HDF5 superblock, parsed from the beginning of the file.
10///
11/// Contains file-level parameters that control interpretation of all other
12/// structures: the sizes of offset and length fields, and the address of
13/// the root group.
14#[derive(Debug, Clone)]
15pub struct Superblock {
16    /// Superblock version (0, 1, 2, or 3).
17    pub version: u8,
18    /// Number of bytes used for addresses (offsets) throughout the file.
19    pub size_of_offsets: u8,
20    /// Number of bytes used for sizes (lengths) throughout the file.
21    pub size_of_lengths: u8,
22    /// File base address (usually 0).
23    pub base_address: u64,
24    /// Address of the root group's object header.
25    pub root_group_address: u64,
26    /// End-of-file address.
27    pub end_of_file_address: u64,
28    /// Address of the superblock extension object header (v2/v3 only, may be UNDEF).
29    pub extension_address: u64,
30}
31
32impl Superblock {
33    /// Parse a superblock from the initial bytes of an HDF5 file.
34    ///
35    /// The HDF5 signature may appear at offset 0, 512, 1024, 2048, or any
36    /// power-of-two multiple of 512. We search the first few candidates.
37    pub fn parse(data: &Bytes) -> Result<(Self, u64)> {
38        // Search for the signature at standard offsets
39        let offsets = [0u64, 512, 1024, 2048, 4096];
40        for &offset in &offsets {
41            if offset as usize + 8 > data.len() {
42                break;
43            }
44            let slice = &data[offset as usize..offset as usize + 8];
45            if slice == HDF5_SIGNATURE {
46                let sb = Self::parse_at(data, offset)?;
47                return Ok((sb, offset));
48            }
49        }
50
51        // No HDF5 signature found — try to identify the actual format.
52        let hint = identify_format(data);
53        Err(HDF5Error::InvalidSignature { offset: 0, hint })
54    }
55
56    fn parse_at(data: &Bytes, offset: u64) -> Result<Self> {
57        let mut r = HDF5Reader::new(data.clone());
58        r.set_position(offset);
59
60        // Skip the 8-byte signature
61        r.skip(8);
62
63        let version = r.read_u8()?;
64
65        match version {
66            0 | 1 => Self::parse_v0_v1(&mut r, version),
67            2 | 3 => Self::parse_v2_v3(&mut r, version),
68            _ => Err(HDF5Error::UnsupportedSuperblockVersion(version)),
69        }
70    }
71
72    /// Parse superblock version 0 or 1.
73    ///
74    /// Layout after version byte:
75    ///   - Version # of File's Free Space Storage (1 byte)
76    ///   - Version # of Root Group Symbol Table Entry (1 byte)
77    ///   - Reserved (1 byte)
78    ///   - Version # of Shared Header Message Format (1 byte)
79    ///   - Size of Offsets (1 byte)
80    ///   - Size of Lengths (1 byte)
81    ///   - Reserved (1 byte)
82    ///   - Group Leaf Node K (2 bytes)
83    ///   - Group Internal Node K (2 bytes)
84    ///   - File Consistency Flags (4 bytes)
85    ///   - [v1 only] Indexed Storage Internal Node K (2 bytes) + Reserved (2 bytes)
86    ///   - Base Address (O bytes)
87    ///   - Address of File Free-space Info (O bytes)
88    ///   - End of File Address (O bytes)
89    ///   - Driver Information Block Address (O bytes)
90    ///   - Root Group Symbol Table Entry (variable)
91    fn parse_v0_v1(r: &mut HDF5Reader, version: u8) -> Result<Self> {
92        let _free_space_version = r.read_u8()?;
93        let _root_group_version = r.read_u8()?;
94        let _reserved1 = r.read_u8()?;
95        let _shared_header_version = r.read_u8()?;
96        let size_of_offsets = r.read_u8()?;
97        let size_of_lengths = r.read_u8()?;
98        let _reserved2 = r.read_u8()?;
99
100        // Now we know field sizes — update the reader
101        *r = HDF5Reader::with_sizes(r.get_ref().clone(), size_of_offsets, size_of_lengths);
102        r.set_position(
103            8 // signature
104            + 1 // version
105            + 1 + 1 + 1 + 1 // sub-versions
106            + 1 + 1 + 1, // sizes + reserved
107        );
108
109        let _group_leaf_k = r.read_u16()?;
110        let _group_internal_k = r.read_u16()?;
111        let _consistency_flags = r.read_u32()?;
112
113        if version == 1 {
114            let _indexed_storage_k = r.read_u16()?;
115            let _reserved3 = r.read_u16()?;
116        }
117
118        let base_address = r.read_offset()?;
119        let _free_space_address = r.read_offset()?;
120        let end_of_file_address = r.read_offset()?;
121        let _driver_info_address = r.read_offset()?;
122
123        // Root Group Symbol Table Entry
124        // Link Name Offset (O bytes) — offset into local heap
125        let _link_name_offset = r.read_offset()?;
126        // Object Header Address (O bytes) — this is the root group address
127        let root_group_address = r.read_offset()?;
128        // Cache Type (4 bytes) + Reserved (4 bytes) + Scratch-pad (16 bytes) — skip
129        // Total symbol table entry scratch: 4 + 4 + 16 = 24 bytes
130
131        Ok(Self {
132            version,
133            size_of_offsets,
134            size_of_lengths,
135            base_address,
136            root_group_address,
137            end_of_file_address,
138            extension_address: UNDEF_ADDR,
139        })
140    }
141
142    /// Parse superblock version 2 or 3.
143    ///
144    /// Layout after version byte:
145    ///   - Size of Offsets (1 byte)
146    ///   - Size of Lengths (1 byte)
147    ///   - File Consistency Flags (1 byte)
148    ///   - Base Address (O bytes)
149    ///   - Superblock Extension Address (O bytes)
150    ///   - End of File Address (O bytes)
151    ///   - Root Group Object Header Address (O bytes)
152    ///   - Superblock Checksum (4 bytes)
153    fn parse_v2_v3(r: &mut HDF5Reader, version: u8) -> Result<Self> {
154        let size_of_offsets = r.read_u8()?;
155        let size_of_lengths = r.read_u8()?;
156        let _consistency_flags = r.read_u8()?;
157
158        // Recreate reader with correct sizes
159        let pos = r.position();
160        *r = HDF5Reader::with_sizes(r.get_ref().clone(), size_of_offsets, size_of_lengths);
161        r.set_position(pos);
162
163        let base_address = r.read_offset()?;
164        let extension_address = r.read_offset()?;
165        let end_of_file_address = r.read_offset()?;
166        let root_group_address = r.read_offset()?;
167        let _checksum = r.read_u32()?;
168
169        Ok(Self {
170            version,
171            size_of_offsets,
172            size_of_lengths,
173            base_address,
174            root_group_address,
175            end_of_file_address,
176            extension_address,
177        })
178    }
179}
180
181/// Inspect the first bytes of data and return a human-readable description
182/// of the file format when it is not HDF5.
183fn identify_format(data: &Bytes) -> String {
184    if data.len() < 4 {
185        return format!(
186            "file is too small ({} bytes) to contain an HDF5 superblock",
187            data.len()
188        );
189    }
190
191    let head = &data[..std::cmp::min(data.len(), 8)];
192
193    // NetCDF classic / 64-bit offset / CDF-5
194    if head.starts_with(b"CDF") && data.len() >= 4 {
195        let version_byte = data[3];
196        let variant = match version_byte {
197            1 => "NetCDF3 classic (CDF-1)",
198            2 => "NetCDF3 64-bit offset (CDF-2)",
199            5 => "NetCDF3 64-bit data (CDF-5)",
200            _ => "NetCDF3 (unknown variant)",
201        };
202        return format!(
203            "file appears to be {} format, not HDF5. \
204             NetCDF4 (which uses HDF5) starts with \\x89HDF, \
205             but this file starts with CDF\\x{:02x}",
206            variant, version_byte
207        );
208    }
209
210    // HDF4
211    if head.len() >= 4 && head[0] == 0x0e && head[1] == 0x03 && head[2] == 0x13 && head[3] == 0x01 {
212        return "file appears to be HDF4 format, not HDF5. \
213                async-hdf5 only supports HDF5 (and NetCDF4, which is HDF5-based)"
214            .to_string();
215    }
216
217    // TIFF (little-endian or big-endian)
218    if head.len() >= 4
219        && ((head[0] == b'I' && head[1] == b'I' && head[2] == 42 && head[3] == 0)
220            || (head[0] == b'M' && head[1] == b'M' && head[2] == 0 && head[3] == 42))
221    {
222        return "file appears to be TIFF format, not HDF5".to_string();
223    }
224
225    // Generic: show first 8 bytes as hex
226    let hex: Vec<String> = head.iter().map(|b| format!("{:02x}", b)).collect();
227    format!(
228        "expected HDF5 signature (\\x89HDF\\r\\n\\x1a\\n) but found [{}]",
229        hex.join(" ")
230    )
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    #[test]
238    fn test_hdf5_signature() {
239        assert_eq!(HDF5_SIGNATURE[1], b'H');
240        assert_eq!(HDF5_SIGNATURE[2], b'D');
241        assert_eq!(HDF5_SIGNATURE[3], b'F');
242    }
243
244    #[test]
245    fn test_superblock_v2_minimal() {
246        // Construct a minimal valid superblock v2
247        let mut data = Vec::new();
248        // Signature
249        data.extend_from_slice(&HDF5_SIGNATURE);
250        // Version
251        data.push(2);
252        // Size of Offsets = 8, Size of Lengths = 8
253        data.push(8);
254        data.push(8);
255        // Consistency flags
256        data.push(0);
257        // Base Address (8 bytes LE) = 0
258        data.extend_from_slice(&0u64.to_le_bytes());
259        // Extension Address (8 bytes LE) = UNDEF
260        data.extend_from_slice(&u64::MAX.to_le_bytes());
261        // End of File Address = 4096
262        data.extend_from_slice(&4096u64.to_le_bytes());
263        // Root Group Object Header Address = 48
264        data.extend_from_slice(&48u64.to_le_bytes());
265        // Checksum (dummy)
266        data.extend_from_slice(&0u32.to_le_bytes());
267
268        let bytes = Bytes::from(data);
269        let (sb, offset) = Superblock::parse(&bytes).unwrap();
270        assert_eq!(offset, 0);
271        assert_eq!(sb.version, 2);
272        assert_eq!(sb.size_of_offsets, 8);
273        assert_eq!(sb.size_of_lengths, 8);
274        assert_eq!(sb.base_address, 0);
275        assert_eq!(sb.root_group_address, 48);
276        assert_eq!(sb.end_of_file_address, 4096);
277        assert_eq!(sb.extension_address, UNDEF_ADDR);
278    }
279}