Skip to main content

async_hdf5/
object_header.rs

1use bytes::Bytes;
2
3use crate::endian::HDF5Reader;
4use crate::error::{HDF5Error, Result};
5
6/// Object Header v2 signature.
7const OHDR_SIGNATURE: [u8; 4] = [b'O', b'H', b'D', b'R'];
8
9/// Object Header Continuation signature (v2).
10#[allow(dead_code)]
11const OCHK_SIGNATURE: [u8; 4] = [b'O', b'C', b'H', b'K'];
12
13/// A raw header message (type + data bytes, not yet parsed into a specific message).
14#[derive(Debug, Clone)]
15pub struct HeaderMessage {
16    /// Message type ID.
17    pub msg_type: u16,
18    /// Raw message data (interpretation depends on type).
19    pub data: Bytes,
20    /// Message flags.
21    pub flags: u8,
22}
23
24/// Known header message types.
25pub mod msg_types {
26    /// NIL message (padding).
27    pub const NIL: u16 = 0x0000;
28    /// Dataspace message — describes array dimensionality.
29    pub const DATASPACE: u16 = 0x0001;
30    /// Link info message — fractal heap + B-tree v2 for group links.
31    pub const LINK_INFO: u16 = 0x0002;
32    /// Datatype message — describes element data type.
33    pub const DATATYPE: u16 = 0x0003;
34    /// Fill value (old) message.
35    pub const FILL_VALUE_OLD: u16 = 0x0004;
36    /// Fill value message.
37    pub const FILL_VALUE: u16 = 0x0005;
38    /// Link message — describes a single link in a group.
39    pub const LINK: u16 = 0x0006;
40    /// External data files message.
41    pub const EXTERNAL_DATA_FILES: u16 = 0x0007;
42    /// Data layout message — compact/contiguous/chunked storage.
43    pub const DATA_LAYOUT: u16 = 0x0008;
44    /// Bogus message (testing).
45    pub const BOGUS: u16 = 0x0009;
46    /// Group info message.
47    pub const GROUP_INFO: u16 = 0x000A;
48    /// Filter pipeline message.
49    pub const FILTER_PIPELINE: u16 = 0x000B;
50    /// Attribute message.
51    pub const ATTRIBUTE: u16 = 0x000C;
52    /// Object comment message.
53    pub const OBJECT_COMMENT: u16 = 0x000D;
54    /// Object modification time (old) message.
55    pub const MODIFICATION_TIME_OLD: u16 = 0x000E;
56    /// Shared message table message.
57    pub const SHARED_MESSAGE_TABLE: u16 = 0x000F;
58    /// Object header continuation message.
59    pub const HEADER_CONTINUATION: u16 = 0x0010;
60    /// Symbol table message (v1 groups).
61    pub const SYMBOL_TABLE: u16 = 0x0011;
62    /// Object modification time message.
63    pub const MODIFICATION_TIME: u16 = 0x0012;
64    /// B-tree 'K' values message.
65    pub const BTREE_K_VALUES: u16 = 0x0013;
66    /// Driver info message.
67    pub const DRIVER_INFO: u16 = 0x0014;
68    /// Attribute info message.
69    pub const ATTRIBUTE_INFO: u16 = 0x0015;
70    /// Object reference count message.
71    pub const REFERENCE_COUNT: u16 = 0x0016;
72}
73
74/// Parsed object header containing its version and all raw messages.
75#[derive(Debug, Clone)]
76pub struct ObjectHeader {
77    /// Object header version (1 or 2).
78    pub version: u8,
79    /// Whether creation order is tracked for messages (v2 only, flags bit 2).
80    pub track_creation_order: bool,
81    /// All header messages, in order.
82    pub messages: Vec<HeaderMessage>,
83}
84
85impl ObjectHeader {
86    /// Parse an object header from a byte buffer at the given position.
87    ///
88    /// For v2 headers, the data must start with the `OHDR` signature.
89    /// For v1 headers, there is no signature — the version byte comes first.
90    ///
91    /// `continuation_fetcher` is called when a continuation message is found
92    /// and we need to fetch more data from a different file offset. For the
93    /// initial parse (from a single prefetched buffer), pass `None`.
94    pub fn parse(data: &Bytes, size_of_offsets: u8, size_of_lengths: u8) -> Result<Self> {
95        let _r = HDF5Reader::with_sizes(data.clone(), size_of_offsets, size_of_lengths);
96
97        // Peek at first bytes to determine version
98        if data.len() >= 4 && data[0..4] == OHDR_SIGNATURE {
99            Self::parse_v2(data, size_of_offsets, size_of_lengths)
100        } else {
101            Self::parse_v1(data, size_of_offsets, size_of_lengths)
102        }
103    }
104
105    /// Parse a version 1 object header.
106    ///
107    /// Layout:
108    ///   - Version (1 byte) — value 1
109    ///   - Reserved (1 byte)
110    ///   - Number of Header Messages (2 bytes)
111    ///   - Object Reference Count (4 bytes)
112    ///   - Object Header Size (4 bytes)
113    ///   - Reserved/Padding (4 bytes, align to 8)
114    ///   - Messages...
115    fn parse_v1(data: &Bytes, size_of_offsets: u8, size_of_lengths: u8) -> Result<Self> {
116        let mut r = HDF5Reader::with_sizes(data.clone(), size_of_offsets, size_of_lengths);
117
118        let version = r.read_u8()?;
119        if version != 1 {
120            return Err(HDF5Error::UnsupportedObjectHeaderVersion(version));
121        }
122
123        let _reserved = r.read_u8()?;
124        let num_messages = r.read_u16()?;
125        let _ref_count = r.read_u32()?;
126        let header_size = r.read_u32()? as u64;
127        // Align to 8 bytes — skip padding (4 bytes of reserved in some interpretations)
128        let _reserved2 = r.read_u32()?;
129
130        let msg_start = r.position();
131        let msg_end = msg_start + header_size;
132
133        let mut messages = Vec::with_capacity(num_messages as usize);
134
135        while r.position() < msg_end && messages.len() < num_messages as usize {
136            // v1 message header: type(2) + size(2) + flags(1) + reserved(3)
137            let msg_type = r.read_u16()?;
138            let msg_size = r.read_u16()? as usize;
139            let flags = r.read_u8()?;
140            r.skip(3); // reserved
141
142            if msg_size == 0 && msg_type == msg_types::NIL {
143                continue;
144            }
145
146            let msg_data = r.slice_from_position(msg_size)?;
147            r.skip(msg_size as u64);
148
149            // Align to 8-byte boundary
150            r.skip_to_alignment(8);
151
152            messages.push(HeaderMessage {
153                msg_type,
154                data: msg_data,
155                flags,
156            });
157        }
158
159        Ok(Self {
160            version: 1,
161            track_creation_order: false,
162            messages,
163        })
164    }
165
166    /// Parse a version 2 object header.
167    ///
168    /// Layout:
169    ///   - Signature "OHDR" (4 bytes)
170    ///   - Version (1 byte) — value 2
171    ///   - Flags (1 byte)
172    ///   - [optional timestamps, phase change values based on flags]
173    ///   - Size of Chunk #0 (variable: 1, 2, 4, or 8 bytes)
174    ///   - Messages... (packed, no alignment padding)
175    ///   - Checksum (4 bytes) — gap byte 0 marks end before checksum
176    fn parse_v2(data: &Bytes, size_of_offsets: u8, size_of_lengths: u8) -> Result<Self> {
177        let mut r = HDF5Reader::with_sizes(data.clone(), size_of_offsets, size_of_lengths);
178
179        r.read_signature(&OHDR_SIGNATURE)?;
180        let version = r.read_u8()?;
181        if version != 2 {
182            return Err(HDF5Error::UnsupportedObjectHeaderVersion(version));
183        }
184
185        let flags = r.read_u8()?;
186
187        // Optional timestamps (if flags bit 5 set)
188        if flags & 0x20 != 0 {
189            let _access_time = r.read_u32()?;
190            let _modification_time = r.read_u32()?;
191            let _change_time = r.read_u32()?;
192            let _birth_time = r.read_u32()?;
193        }
194
195        // Optional attribute phase change values (if flags bit 4 set)
196        if flags & 0x10 != 0 {
197            let _max_compact = r.read_u16()?;
198            let _min_dense = r.read_u16()?;
199        }
200
201        // Size of chunk #0
202        let chunk_size_width = 1u8 << (flags & 0x03);
203        let chunk0_size = match chunk_size_width {
204            1 => r.read_u8()? as u64,
205            2 => r.read_u16()? as u64,
206            4 => r.read_u32()? as u64,
207            8 => r.read_u64()?,
208            _ => unreachable!(),
209        };
210
211        let track_creation_order = flags & 0x04 != 0;
212        let msg_start = r.position();
213        let msg_end = msg_start + chunk0_size - 4; // -4 for the trailing checksum
214
215        let mut messages = Vec::new();
216
217        while r.position() < msg_end {
218            // v2 message: type(1) + size(2) + flags(1) + [creation_order(2)]
219            let msg_type = r.read_u8()? as u16;
220            let msg_size = r.read_u16()? as usize;
221            let msg_flags = r.read_u8()?;
222
223            if track_creation_order {
224                let _creation_order = r.read_u16()?;
225            }
226
227            // NIL type signals start of gap/padding to end of chunk.
228            if msg_type == msg_types::NIL {
229                break;
230            }
231
232            if msg_size == 0 {
233                messages.push(HeaderMessage {
234                    msg_type,
235                    data: Bytes::new(),
236                    flags: msg_flags,
237                });
238                continue;
239            }
240
241            let msg_data = r.slice_from_position(msg_size)?;
242            r.skip(msg_size as u64);
243
244            messages.push(HeaderMessage {
245                msg_type,
246                data: msg_data,
247                flags: msg_flags,
248            });
249        }
250
251        Ok(Self {
252            version: 2,
253            track_creation_order,
254            messages,
255        })
256    }
257
258    /// Find the first message of a given type.
259    pub fn find_message(&self, msg_type: u16) -> Option<&HeaderMessage> {
260        self.messages.iter().find(|m| m.msg_type == msg_type)
261    }
262
263    /// Find all messages of a given type.
264    pub fn find_messages(&self, msg_type: u16) -> Vec<&HeaderMessage> {
265        self.messages
266            .iter()
267            .filter(|m| m.msg_type == msg_type)
268            .collect()
269    }
270
271    /// Check if a continuation message is present (meaning we'd need to
272    /// fetch more data from a different file address).
273    pub fn has_continuation(&self) -> bool {
274        self.messages
275            .iter()
276            .any(|m| m.msg_type == msg_types::HEADER_CONTINUATION)
277    }
278
279    /// Extract continuation addresses from continuation messages.
280    pub fn continuation_addresses(
281        &self,
282        size_of_offsets: u8,
283        size_of_lengths: u8,
284    ) -> Result<Vec<(u64, u64)>> {
285        let mut continuations = Vec::new();
286        for msg in &self.messages {
287            if msg.msg_type == msg_types::HEADER_CONTINUATION {
288                let mut r =
289                    HDF5Reader::with_sizes(msg.data.clone(), size_of_offsets, size_of_lengths);
290                let address = r.read_offset()?;
291                let length = r.read_length()?;
292                continuations.push((address, length));
293            }
294        }
295        Ok(continuations)
296    }
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302
303    #[test]
304    fn test_parse_v2_minimal() {
305        let mut data = Vec::new();
306        // OHDR signature
307        data.extend_from_slice(&OHDR_SIGNATURE);
308        // Version = 2
309        data.push(2);
310        // Flags = 0 (no timestamps, no creation order, 1-byte chunk size)
311        data.push(0x00);
312        // Chunk #0 size = 12 (4 bytes msg + 4 bytes padding + 4 bytes checksum)
313        // We'll use a simple message: NIL type 0, size 0
314        data.push(8); // chunk size: 4 bytes for a message + 4 for checksum
315
316        // Message: type=DATASPACE(1), size=0, flags=0
317        data.push(0x01); // type
318        data.extend_from_slice(&0u16.to_le_bytes()); // size
319        data.push(0x00); // flags
320
321        // Checksum (4 bytes, just zeros for test)
322        data.extend_from_slice(&0u32.to_le_bytes());
323
324        let bytes = Bytes::from(data);
325        let header = ObjectHeader::parse(&bytes, 8, 8).unwrap();
326        assert_eq!(header.version, 2);
327        assert_eq!(header.messages.len(), 1);
328        assert_eq!(header.messages[0].msg_type, 1);
329    }
330}