Skip to main content

hdf5_reader/messages/
mod.rs

1//! HDF5 object header message parsing.
2//!
3//! Each object header contains a sequence of messages identified by a 16-bit
4//! type ID. This module dispatches to type-specific parsers and collects the
5//! results into `HdfMessage` variants.
6
7pub mod attribute;
8pub mod attribute_info;
9pub mod btree_k;
10pub mod continuation;
11pub mod dataspace;
12pub mod datatype;
13pub mod external_files;
14pub mod fill_value;
15pub mod filter_pipeline;
16pub mod group_info;
17pub mod layout;
18pub mod link;
19pub mod link_info;
20pub mod modification_time;
21pub mod shared;
22pub mod symbol_table_msg;
23
24// Re-exports for convenience.
25pub use dataspace::DataspaceMessage;
26pub use datatype::Datatype;
27
28use crate::error::Result;
29use crate::io::Cursor;
30
31// ---------------------------------------------------------------------------
32// Message type IDs (from the HDF5 specification)
33// ---------------------------------------------------------------------------
34
35/// NIL message — padding in the header.
36pub const MSG_NIL: u16 = 0x0000;
37/// Dataspace message.
38pub const MSG_DATASPACE: u16 = 0x0001;
39/// Link info message (v2 groups).
40pub const MSG_LINK_INFO: u16 = 0x0002;
41/// Datatype message.
42pub const MSG_DATATYPE: u16 = 0x0003;
43/// Old fill value message (deprecated).
44pub const MSG_FILL_VALUE_OLD: u16 = 0x0004;
45/// Fill value message.
46pub const MSG_FILL_VALUE: u16 = 0x0005;
47/// Link message (v2 groups).
48pub const MSG_LINK: u16 = 0x0006;
49/// External data files message.
50pub const MSG_EXTERNAL_FILES: u16 = 0x0007;
51/// Data layout message.
52pub const MSG_DATA_LAYOUT: u16 = 0x0008;
53/// Bogus message (testing only, should never appear).
54pub const MSG_BOGUS: u16 = 0x0009;
55/// Group info message (v2 groups).
56pub const MSG_GROUP_INFO: u16 = 0x000A;
57/// Filter pipeline message.
58pub const MSG_FILTER_PIPELINE: u16 = 0x000B;
59/// Attribute message.
60pub const MSG_ATTRIBUTE: u16 = 0x000C;
61/// Object comment message.
62pub const MSG_COMMENT: u16 = 0x000D;
63/// Old modification time message (deprecated).
64pub const MSG_MODIFICATION_TIME_OLD: u16 = 0x000E;
65/// Shared message table message.
66pub const MSG_SHARED_TABLE: u16 = 0x000F;
67/// Object header continuation message.
68pub const MSG_CONTINUATION: u16 = 0x0010;
69/// Symbol table message (v1 groups).
70pub const MSG_SYMBOL_TABLE: u16 = 0x0011;
71/// Modification time message.
72pub const MSG_MODIFICATION_TIME: u16 = 0x0012;
73/// B-tree 'K' values message.
74pub const MSG_BTREE_K: u16 = 0x0013;
75/// Driver info message.
76pub const MSG_DRIVER_INFO: u16 = 0x0014;
77/// Attribute info message.
78pub const MSG_ATTRIBUTE_INFO: u16 = 0x0015;
79/// Object reference count message.
80pub const MSG_REFERENCE_COUNT: u16 = 0x0016;
81/// File space info message (v2).
82pub const MSG_FILE_SPACE_INFO: u16 = 0x0018;
83
84// ---------------------------------------------------------------------------
85// Unified message enum
86// ---------------------------------------------------------------------------
87
88/// A parsed HDF5 header message.
89#[derive(Debug, Clone)]
90pub enum HdfMessage {
91    /// Nil (padding) — no payload.
92    Nil,
93    /// Dataspace (shape).
94    Dataspace(dataspace::DataspaceMessage),
95    /// Datatype (element type).
96    Datatype(datatype::DatatypeMessage),
97    /// Fill value (old or new).
98    FillValue(fill_value::FillValueMessage),
99    /// Data layout (compact / contiguous / chunked).
100    DataLayout(layout::DataLayoutMessage),
101    /// Filter pipeline (compression, shuffle, etc.).
102    FilterPipeline(filter_pipeline::FilterPipelineMessage),
103    /// Attribute (name + type + data).
104    Attribute(attribute::AttributeMessage),
105    /// Attribute info (dense attribute storage addresses).
106    AttributeInfo(attribute_info::AttributeInfoMessage),
107    /// Link (v2 group child).
108    Link(link::LinkMessage),
109    /// Link info (dense link storage addresses).
110    LinkInfo(link_info::LinkInfoMessage),
111    /// Group info (storage hints for v2 groups).
112    GroupInfo(group_info::GroupInfoMessage),
113    /// Symbol table (v1 group child navigation).
114    SymbolTable(symbol_table_msg::SymbolTableMessage),
115    /// Header continuation (pointer to more messages).
116    Continuation(continuation::ContinuationMessage),
117    /// Modification time.
118    ModificationTime(modification_time::ModificationTimeMessage),
119    /// B-tree K values.
120    BTreeK(btree_k::BTreeKMessage),
121    /// External data files.
122    ExternalFiles(external_files::ExternalFilesMessage),
123    /// Shared message wrapper.
124    Shared(shared::SharedMessage),
125    /// Object header continuation (marker only — the parser follows the
126    /// continuation internally, but records that one was encountered).
127    ObjectHeaderContinuation,
128    /// Comment (plain text).
129    Comment(String),
130    /// Object reference count.
131    ReferenceCount(u32),
132    /// Unknown or unimplemented message type — raw bytes preserved.
133    Unknown { type_id: u16, data: Vec<u8> },
134}
135
136/// Parse a single header message given its type ID, size, and a cursor
137/// positioned at the start of the message payload.
138///
139/// `offset_size` and `length_size` come from the superblock.
140pub fn parse_message(
141    type_id: u16,
142    msg_size: usize,
143    cursor: &mut Cursor<'_>,
144    offset_size: u8,
145    length_size: u8,
146) -> Result<HdfMessage> {
147    // Short-circuit for NIL (padding) messages.
148    if type_id == MSG_NIL {
149        if msg_size > 0 {
150            cursor.skip(msg_size)?;
151        }
152        return Ok(HdfMessage::Nil);
153    }
154
155    match type_id {
156        MSG_DATASPACE => {
157            let msg = dataspace::parse(cursor, offset_size, length_size, msg_size)?;
158            Ok(HdfMessage::Dataspace(msg))
159        }
160        MSG_DATATYPE => {
161            let msg = datatype::parse(cursor, msg_size)?;
162            Ok(HdfMessage::Datatype(msg))
163        }
164        MSG_FILL_VALUE_OLD => {
165            let msg = fill_value::parse_old(cursor, offset_size, length_size, msg_size)?;
166            Ok(HdfMessage::FillValue(msg))
167        }
168        MSG_FILL_VALUE => {
169            let msg = fill_value::parse_new(cursor, offset_size, length_size, msg_size)?;
170            Ok(HdfMessage::FillValue(msg))
171        }
172        MSG_DATA_LAYOUT => {
173            let msg = layout::parse(cursor, offset_size, length_size, msg_size)?;
174            Ok(HdfMessage::DataLayout(msg))
175        }
176        MSG_FILTER_PIPELINE => {
177            let msg = filter_pipeline::parse(cursor, offset_size, length_size, msg_size)?;
178            Ok(HdfMessage::FilterPipeline(msg))
179        }
180        MSG_ATTRIBUTE => {
181            let msg = attribute::parse(cursor, offset_size, length_size, msg_size)?;
182            Ok(HdfMessage::Attribute(msg))
183        }
184        MSG_ATTRIBUTE_INFO => {
185            let msg = attribute_info::parse(cursor, offset_size, length_size, msg_size)?;
186            Ok(HdfMessage::AttributeInfo(msg))
187        }
188        MSG_LINK => {
189            let msg = link::parse(cursor, offset_size, length_size, msg_size)?;
190            Ok(HdfMessage::Link(msg))
191        }
192        MSG_LINK_INFO => {
193            let msg = link_info::parse(cursor, offset_size, length_size, msg_size)?;
194            Ok(HdfMessage::LinkInfo(msg))
195        }
196        MSG_GROUP_INFO => {
197            let msg = group_info::parse(cursor, offset_size, length_size, msg_size)?;
198            Ok(HdfMessage::GroupInfo(msg))
199        }
200        MSG_SYMBOL_TABLE => {
201            let msg = symbol_table_msg::parse(cursor, offset_size, length_size, msg_size)?;
202            Ok(HdfMessage::SymbolTable(msg))
203        }
204        MSG_CONTINUATION => {
205            let msg = continuation::parse(cursor, offset_size, length_size, msg_size)?;
206            Ok(HdfMessage::Continuation(msg))
207        }
208        MSG_MODIFICATION_TIME_OLD => {
209            let msg = modification_time::parse_old(cursor, offset_size, length_size, msg_size)?;
210            Ok(HdfMessage::ModificationTime(msg))
211        }
212        MSG_MODIFICATION_TIME => {
213            let msg = modification_time::parse_new(cursor, offset_size, length_size, msg_size)?;
214            Ok(HdfMessage::ModificationTime(msg))
215        }
216        MSG_BTREE_K => {
217            let msg = btree_k::parse(cursor, offset_size, length_size, msg_size)?;
218            Ok(HdfMessage::BTreeK(msg))
219        }
220        MSG_EXTERNAL_FILES => {
221            let msg = external_files::parse(cursor, offset_size, length_size, msg_size)?;
222            Ok(HdfMessage::ExternalFiles(msg))
223        }
224        MSG_COMMENT => {
225            let comment = cursor.read_fixed_string(msg_size)?;
226            Ok(HdfMessage::Comment(comment))
227        }
228        MSG_REFERENCE_COUNT => {
229            let count = cursor.read_u32_le()?;
230            if msg_size > 4 {
231                cursor.skip(msg_size - 4)?;
232            }
233            Ok(HdfMessage::ReferenceCount(count))
234        }
235        _ => {
236            // Unknown or unimplemented message — preserve raw bytes.
237            let data = if msg_size > 0 {
238                cursor.read_bytes(msg_size)?.to_vec()
239            } else {
240                vec![]
241            };
242            Ok(HdfMessage::Unknown { type_id, data })
243        }
244    }
245}
246
247/// Returns a human-readable name for a message type ID.
248pub fn message_type_name(type_id: u16) -> &'static str {
249    match type_id {
250        MSG_NIL => "NIL",
251        MSG_DATASPACE => "Dataspace",
252        MSG_LINK_INFO => "LinkInfo",
253        MSG_DATATYPE => "Datatype",
254        MSG_FILL_VALUE_OLD => "FillValue (old)",
255        MSG_FILL_VALUE => "FillValue",
256        MSG_LINK => "Link",
257        MSG_EXTERNAL_FILES => "ExternalFiles",
258        MSG_DATA_LAYOUT => "DataLayout",
259        MSG_BOGUS => "Bogus",
260        MSG_GROUP_INFO => "GroupInfo",
261        MSG_FILTER_PIPELINE => "FilterPipeline",
262        MSG_ATTRIBUTE => "Attribute",
263        MSG_COMMENT => "Comment",
264        MSG_MODIFICATION_TIME_OLD => "ModificationTime (old)",
265        MSG_SHARED_TABLE => "SharedTable",
266        MSG_CONTINUATION => "Continuation",
267        MSG_SYMBOL_TABLE => "SymbolTable",
268        MSG_MODIFICATION_TIME => "ModificationTime",
269        MSG_BTREE_K => "BTreeK",
270        MSG_DRIVER_INFO => "DriverInfo",
271        MSG_ATTRIBUTE_INFO => "AttributeInfo",
272        MSG_REFERENCE_COUNT => "ReferenceCount",
273        MSG_FILE_SPACE_INFO => "FileSpaceInfo",
274        _ => "Unknown",
275    }
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    #[test]
283    fn test_parse_nil_message() {
284        let data = [0u8; 16];
285        let mut cursor = Cursor::new(&data);
286        let msg = parse_message(MSG_NIL, 16, &mut cursor, 8, 8).unwrap();
287        assert!(matches!(msg, HdfMessage::Nil));
288        assert_eq!(cursor.position(), 16);
289    }
290
291    #[test]
292    fn test_parse_unknown_message() {
293        let data = [0xAA, 0xBB, 0xCC, 0xDD];
294        let mut cursor = Cursor::new(&data);
295        let msg = parse_message(0xFFFF, 4, &mut cursor, 8, 8).unwrap();
296        match msg {
297            HdfMessage::Unknown { type_id, data } => {
298                assert_eq!(type_id, 0xFFFF);
299                assert_eq!(data, vec![0xAA, 0xBB, 0xCC, 0xDD]);
300            }
301            other => panic!("expected Unknown, got {:?}", other),
302        }
303    }
304
305    #[test]
306    fn test_parse_comment_message() {
307        let data = b"hello world\0\0\0\0\0";
308        let mut cursor = Cursor::new(data.as_ref());
309        let msg = parse_message(MSG_COMMENT, 16, &mut cursor, 8, 8).unwrap();
310        match msg {
311            HdfMessage::Comment(s) => assert_eq!(s, "hello world"),
312            other => panic!("expected Comment, got {:?}", other),
313        }
314    }
315
316    #[test]
317    fn test_parse_reference_count() {
318        let data = 42u32.to_le_bytes();
319        let mut cursor = Cursor::new(&data);
320        let msg = parse_message(MSG_REFERENCE_COUNT, 4, &mut cursor, 8, 8).unwrap();
321        match msg {
322            HdfMessage::ReferenceCount(n) => assert_eq!(n, 42),
323            other => panic!("expected ReferenceCount, got {:?}", other),
324        }
325    }
326
327    #[test]
328    fn test_parse_symbol_table_via_dispatch() {
329        let mut data = Vec::new();
330        data.extend_from_slice(&0x1234u64.to_le_bytes());
331        data.extend_from_slice(&0x5678u64.to_le_bytes());
332
333        let mut cursor = Cursor::new(&data);
334        let msg = parse_message(MSG_SYMBOL_TABLE, data.len(), &mut cursor, 8, 8).unwrap();
335        match msg {
336            HdfMessage::SymbolTable(st) => {
337                assert_eq!(st.btree_address, 0x1234);
338                assert_eq!(st.heap_address, 0x5678);
339            }
340            other => panic!("expected SymbolTable, got {:?}", other),
341        }
342    }
343
344    #[test]
345    fn test_parse_continuation_via_dispatch() {
346        let mut data = Vec::new();
347        data.extend_from_slice(&0xAAAAu64.to_le_bytes());
348        data.extend_from_slice(&512u64.to_le_bytes());
349
350        let mut cursor = Cursor::new(&data);
351        let msg = parse_message(MSG_CONTINUATION, data.len(), &mut cursor, 8, 8).unwrap();
352        match msg {
353            HdfMessage::Continuation(c) => {
354                assert_eq!(c.offset, 0xAAAA);
355                assert_eq!(c.length, 512);
356            }
357            other => panic!("expected Continuation, got {:?}", other),
358        }
359    }
360
361    #[test]
362    fn test_message_type_name() {
363        assert_eq!(message_type_name(MSG_DATASPACE), "Dataspace");
364        assert_eq!(message_type_name(MSG_DATATYPE), "Datatype");
365        assert_eq!(message_type_name(0x9999), "Unknown");
366    }
367}