Skip to main content

hdf5_reader/messages/
mod.rs

1//! HDF5 object header message parsing.
2//!
3//! Each object header contains a sequence of messages identified by a 16-bit
4//! type ID. This module dispatches to type-specific parsers and collects the
5//! results into `HdfMessage` variants.
6
7pub mod attribute;
8pub mod attribute_info;
9pub mod btree_k;
10pub mod continuation;
11pub mod dataspace;
12pub mod datatype;
13pub mod external_files;
14pub mod fill_value;
15pub mod filter_pipeline;
16pub mod group_info;
17pub mod layout;
18pub mod link;
19pub mod link_info;
20pub mod modification_time;
21pub mod shared;
22pub mod shared_table;
23pub mod symbol_table_msg;
24
25// Re-exports for convenience.
26pub use dataspace::DataspaceMessage;
27pub use datatype::Datatype;
28
29use crate::error::Result;
30use crate::io::Cursor;
31
32// ---------------------------------------------------------------------------
33// Message type IDs (from the HDF5 specification)
34// ---------------------------------------------------------------------------
35
36/// NIL message — padding in the header.
37pub const MSG_NIL: u16 = 0x0000;
38/// Dataspace message.
39pub const MSG_DATASPACE: u16 = 0x0001;
40/// Link info message (v2 groups).
41pub const MSG_LINK_INFO: u16 = 0x0002;
42/// Datatype message.
43pub const MSG_DATATYPE: u16 = 0x0003;
44/// Old fill value message (deprecated).
45pub const MSG_FILL_VALUE_OLD: u16 = 0x0004;
46/// Fill value message.
47pub const MSG_FILL_VALUE: u16 = 0x0005;
48/// Link message (v2 groups).
49pub const MSG_LINK: u16 = 0x0006;
50/// External data files message.
51pub const MSG_EXTERNAL_FILES: u16 = 0x0007;
52/// Data layout message.
53pub const MSG_DATA_LAYOUT: u16 = 0x0008;
54/// Bogus message (testing only, should never appear).
55pub const MSG_BOGUS: u16 = 0x0009;
56/// Group info message (v2 groups).
57pub const MSG_GROUP_INFO: u16 = 0x000A;
58/// Filter pipeline message.
59pub const MSG_FILTER_PIPELINE: u16 = 0x000B;
60/// Attribute message.
61pub const MSG_ATTRIBUTE: u16 = 0x000C;
62/// Object comment message.
63pub const MSG_COMMENT: u16 = 0x000D;
64/// Old modification time message (deprecated).
65pub const MSG_MODIFICATION_TIME_OLD: u16 = 0x000E;
66/// Shared message table message.
67pub const MSG_SHARED_TABLE: u16 = 0x000F;
68/// Object header continuation message.
69pub const MSG_CONTINUATION: u16 = 0x0010;
70/// Symbol table message (v1 groups).
71pub const MSG_SYMBOL_TABLE: u16 = 0x0011;
72/// Modification time message.
73pub const MSG_MODIFICATION_TIME: u16 = 0x0012;
74/// B-tree 'K' values message.
75pub const MSG_BTREE_K: u16 = 0x0013;
76/// Driver info message.
77pub const MSG_DRIVER_INFO: u16 = 0x0014;
78/// Attribute info message.
79pub const MSG_ATTRIBUTE_INFO: u16 = 0x0015;
80/// Object reference count message.
81pub const MSG_REFERENCE_COUNT: u16 = 0x0016;
82/// File space info message (v2).
83pub const MSG_FILE_SPACE_INFO: u16 = 0x0018;
84
85// ---------------------------------------------------------------------------
86// Unified message enum
87// ---------------------------------------------------------------------------
88
89/// A parsed HDF5 header message.
90#[derive(Debug, Clone)]
91pub enum HdfMessage {
92    /// Nil (padding) — no payload.
93    Nil,
94    /// Dataspace (shape).
95    Dataspace(dataspace::DataspaceMessage),
96    /// Datatype (element type).
97    Datatype(datatype::DatatypeMessage),
98    /// Fill value (old or new).
99    FillValue(fill_value::FillValueMessage),
100    /// Data layout (compact / contiguous / chunked).
101    DataLayout(layout::DataLayoutMessage),
102    /// Filter pipeline (compression, shuffle, etc.).
103    FilterPipeline(filter_pipeline::FilterPipelineMessage),
104    /// Attribute (name + type + data).
105    Attribute(attribute::AttributeMessage),
106    /// Attribute info (dense attribute storage addresses).
107    AttributeInfo(attribute_info::AttributeInfoMessage),
108    /// Link (v2 group child).
109    Link(link::LinkMessage),
110    /// Link info (dense link storage addresses).
111    LinkInfo(link_info::LinkInfoMessage),
112    /// Group info (storage hints for v2 groups).
113    GroupInfo(group_info::GroupInfoMessage),
114    /// Symbol table (v1 group child navigation).
115    SymbolTable(symbol_table_msg::SymbolTableMessage),
116    /// Header continuation (pointer to more messages).
117    Continuation(continuation::ContinuationMessage),
118    /// Modification time.
119    ModificationTime(modification_time::ModificationTimeMessage),
120    /// B-tree K values.
121    BTreeK(btree_k::BTreeKMessage),
122    /// External data files.
123    ExternalFiles(external_files::ExternalFilesMessage),
124    /// Pointer to the file-level SOHM master table.
125    SharedTable(shared_table::SharedTableMessage),
126    /// Shared message wrapper.
127    Shared(shared::SharedMessage),
128    /// Object header continuation (marker only — the parser follows the
129    /// continuation internally, but records that one was encountered).
130    ObjectHeaderContinuation,
131    /// Comment (plain text).
132    Comment(String),
133    /// Object reference count.
134    ReferenceCount(u32),
135    /// Unknown or unimplemented message type — raw bytes preserved.
136    Unknown { type_id: u16, data: Vec<u8> },
137}
138
139/// Parse a single header message given its type ID, size, and a cursor
140/// positioned at the start of the message payload.
141///
142/// `offset_size` and `length_size` come from the superblock.
143pub fn parse_message(
144    type_id: u16,
145    msg_size: usize,
146    cursor: &mut Cursor<'_>,
147    offset_size: u8,
148    length_size: u8,
149) -> Result<HdfMessage> {
150    // Short-circuit for NIL (padding) messages.
151    if type_id == MSG_NIL {
152        if msg_size > 0 {
153            cursor.skip(msg_size)?;
154        }
155        return Ok(HdfMessage::Nil);
156    }
157
158    match type_id {
159        MSG_DATASPACE => {
160            let msg = dataspace::parse(cursor, offset_size, length_size, msg_size)?;
161            Ok(HdfMessage::Dataspace(msg))
162        }
163        MSG_DATATYPE => {
164            let msg = datatype::parse(cursor, msg_size)?;
165            Ok(HdfMessage::Datatype(msg))
166        }
167        MSG_FILL_VALUE_OLD => {
168            let msg = fill_value::parse_old(cursor, offset_size, length_size, msg_size)?;
169            Ok(HdfMessage::FillValue(msg))
170        }
171        MSG_FILL_VALUE => {
172            let msg = fill_value::parse_new(cursor, offset_size, length_size, msg_size)?;
173            Ok(HdfMessage::FillValue(msg))
174        }
175        MSG_DATA_LAYOUT => {
176            let msg = layout::parse(cursor, offset_size, length_size, msg_size)?;
177            Ok(HdfMessage::DataLayout(msg))
178        }
179        MSG_FILTER_PIPELINE => {
180            let msg = filter_pipeline::parse(cursor, offset_size, length_size, msg_size)?;
181            Ok(HdfMessage::FilterPipeline(msg))
182        }
183        MSG_ATTRIBUTE => {
184            let msg = attribute::parse(cursor, offset_size, length_size, msg_size)?;
185            Ok(HdfMessage::Attribute(msg))
186        }
187        MSG_ATTRIBUTE_INFO => {
188            let msg = attribute_info::parse(cursor, offset_size, length_size, msg_size)?;
189            Ok(HdfMessage::AttributeInfo(msg))
190        }
191        MSG_LINK => {
192            let msg = link::parse(cursor, offset_size, length_size, msg_size)?;
193            Ok(HdfMessage::Link(msg))
194        }
195        MSG_LINK_INFO => {
196            let msg = link_info::parse(cursor, offset_size, length_size, msg_size)?;
197            Ok(HdfMessage::LinkInfo(msg))
198        }
199        MSG_GROUP_INFO => {
200            let msg = group_info::parse(cursor, offset_size, length_size, msg_size)?;
201            Ok(HdfMessage::GroupInfo(msg))
202        }
203        MSG_SYMBOL_TABLE => {
204            let msg = symbol_table_msg::parse(cursor, offset_size, length_size, msg_size)?;
205            Ok(HdfMessage::SymbolTable(msg))
206        }
207        MSG_CONTINUATION => {
208            let msg = continuation::parse(cursor, offset_size, length_size, msg_size)?;
209            Ok(HdfMessage::Continuation(msg))
210        }
211        MSG_MODIFICATION_TIME_OLD => {
212            let msg = modification_time::parse_old(cursor, offset_size, length_size, msg_size)?;
213            Ok(HdfMessage::ModificationTime(msg))
214        }
215        MSG_MODIFICATION_TIME => {
216            let msg = modification_time::parse_new(cursor, offset_size, length_size, msg_size)?;
217            Ok(HdfMessage::ModificationTime(msg))
218        }
219        MSG_BTREE_K => {
220            let msg = btree_k::parse(cursor, offset_size, length_size, msg_size)?;
221            Ok(HdfMessage::BTreeK(msg))
222        }
223        MSG_EXTERNAL_FILES => {
224            let msg = external_files::parse(cursor, offset_size, length_size, msg_size)?;
225            Ok(HdfMessage::ExternalFiles(msg))
226        }
227        MSG_SHARED_TABLE => {
228            let msg = shared_table::parse(cursor, offset_size, length_size, msg_size)?;
229            Ok(HdfMessage::SharedTable(msg))
230        }
231        MSG_COMMENT => {
232            let comment = cursor.read_fixed_string(msg_size)?;
233            Ok(HdfMessage::Comment(comment))
234        }
235        MSG_REFERENCE_COUNT => {
236            let count = cursor.read_u32_le()?;
237            if msg_size > 4 {
238                cursor.skip(msg_size - 4)?;
239            }
240            Ok(HdfMessage::ReferenceCount(count))
241        }
242        _ => {
243            // Unknown or unimplemented message — preserve raw bytes.
244            let data = if msg_size > 0 {
245                cursor.read_bytes(msg_size)?.to_vec()
246            } else {
247                vec![]
248            };
249            Ok(HdfMessage::Unknown { type_id, data })
250        }
251    }
252}
253
254/// Returns a human-readable name for a message type ID.
255pub fn message_type_name(type_id: u16) -> &'static str {
256    match type_id {
257        MSG_NIL => "NIL",
258        MSG_DATASPACE => "Dataspace",
259        MSG_LINK_INFO => "LinkInfo",
260        MSG_DATATYPE => "Datatype",
261        MSG_FILL_VALUE_OLD => "FillValue (old)",
262        MSG_FILL_VALUE => "FillValue",
263        MSG_LINK => "Link",
264        MSG_EXTERNAL_FILES => "ExternalFiles",
265        MSG_DATA_LAYOUT => "DataLayout",
266        MSG_BOGUS => "Bogus",
267        MSG_GROUP_INFO => "GroupInfo",
268        MSG_FILTER_PIPELINE => "FilterPipeline",
269        MSG_ATTRIBUTE => "Attribute",
270        MSG_COMMENT => "Comment",
271        MSG_MODIFICATION_TIME_OLD => "ModificationTime (old)",
272        MSG_SHARED_TABLE => "SharedTable",
273        MSG_CONTINUATION => "Continuation",
274        MSG_SYMBOL_TABLE => "SymbolTable",
275        MSG_MODIFICATION_TIME => "ModificationTime",
276        MSG_BTREE_K => "BTreeK",
277        MSG_DRIVER_INFO => "DriverInfo",
278        MSG_ATTRIBUTE_INFO => "AttributeInfo",
279        MSG_REFERENCE_COUNT => "ReferenceCount",
280        MSG_FILE_SPACE_INFO => "FileSpaceInfo",
281        _ => "Unknown",
282    }
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn test_parse_nil_message() {
291        let data = [0u8; 16];
292        let mut cursor = Cursor::new(&data);
293        let msg = parse_message(MSG_NIL, 16, &mut cursor, 8, 8).unwrap();
294        assert!(matches!(msg, HdfMessage::Nil));
295        assert_eq!(cursor.position(), 16);
296    }
297
298    #[test]
299    fn test_parse_unknown_message() {
300        let data = [0xAA, 0xBB, 0xCC, 0xDD];
301        let mut cursor = Cursor::new(&data);
302        let msg = parse_message(0xFFFF, 4, &mut cursor, 8, 8).unwrap();
303        match msg {
304            HdfMessage::Unknown { type_id, data } => {
305                assert_eq!(type_id, 0xFFFF);
306                assert_eq!(data, vec![0xAA, 0xBB, 0xCC, 0xDD]);
307            }
308            other => panic!("expected Unknown, got {:?}", other),
309        }
310    }
311
312    #[test]
313    fn test_parse_comment_message() {
314        let data = b"hello world\0\0\0\0\0";
315        let mut cursor = Cursor::new(data.as_ref());
316        let msg = parse_message(MSG_COMMENT, 16, &mut cursor, 8, 8).unwrap();
317        match msg {
318            HdfMessage::Comment(s) => assert_eq!(s, "hello world"),
319            other => panic!("expected Comment, got {:?}", other),
320        }
321    }
322
323    #[test]
324    fn test_parse_reference_count() {
325        let data = 42u32.to_le_bytes();
326        let mut cursor = Cursor::new(&data);
327        let msg = parse_message(MSG_REFERENCE_COUNT, 4, &mut cursor, 8, 8).unwrap();
328        match msg {
329            HdfMessage::ReferenceCount(n) => assert_eq!(n, 42),
330            other => panic!("expected ReferenceCount, got {:?}", other),
331        }
332    }
333
334    #[test]
335    fn test_parse_symbol_table_via_dispatch() {
336        let mut data = Vec::new();
337        data.extend_from_slice(&0x1234u64.to_le_bytes());
338        data.extend_from_slice(&0x5678u64.to_le_bytes());
339
340        let mut cursor = Cursor::new(&data);
341        let msg = parse_message(MSG_SYMBOL_TABLE, data.len(), &mut cursor, 8, 8).unwrap();
342        match msg {
343            HdfMessage::SymbolTable(st) => {
344                assert_eq!(st.btree_address, 0x1234);
345                assert_eq!(st.heap_address, 0x5678);
346            }
347            other => panic!("expected SymbolTable, got {:?}", other),
348        }
349    }
350
351    #[test]
352    fn test_parse_continuation_via_dispatch() {
353        let mut data = Vec::new();
354        data.extend_from_slice(&0xAAAAu64.to_le_bytes());
355        data.extend_from_slice(&512u64.to_le_bytes());
356
357        let mut cursor = Cursor::new(&data);
358        let msg = parse_message(MSG_CONTINUATION, data.len(), &mut cursor, 8, 8).unwrap();
359        match msg {
360            HdfMessage::Continuation(c) => {
361                assert_eq!(c.offset, 0xAAAA);
362                assert_eq!(c.length, 512);
363            }
364            other => panic!("expected Continuation, got {:?}", other),
365        }
366    }
367
368    #[test]
369    fn test_message_type_name() {
370        assert_eq!(message_type_name(MSG_DATASPACE), "Dataspace");
371        assert_eq!(message_type_name(MSG_DATATYPE), "Datatype");
372        assert_eq!(message_type_name(0x9999), "Unknown");
373    }
374}