Skip to main content

hdf5_reader/
object_header.rs

1//! HDF5 Object Header parser (v1 and v2).
2//!
3//! Object headers contain a collection of header messages that describe an
4//! HDF5 object (group, dataset, committed datatype, etc.).  Two on-disk
5//! formats exist:
6//!
7//! * **Version 1** (HDF5 < 1.8) — 16-byte fixed prefix, messages each have an
8//!   8-byte envelope (type u16 + size u16 + flags u8 + reserved 3).
9//! * **Version 2** (HDF5 >= 1.8) — begins with the `OHDR` signature, variable-
10//!   length prefix, messages have a 4-or-6-byte envelope, and every chunk is
11//!   checksummed with Jenkins lookup3.
12//!
13//! Continuation messages (type `0x0010`) cause the parser to follow an offset
14//! to an additional chunk of messages (an `OCHK` block in v2, or a raw message
15//! run in v1).
16
17use crate::checksum::jenkins_lookup3;
18use crate::error::{Error, Result};
19use crate::io::Cursor;
20use crate::messages::shared::SharedMessage;
21use crate::messages::{parse_message, HdfMessage};
22
23/// Magic signature for v2 object headers.
24const OHDR_SIGNATURE: [u8; 4] = *b"OHDR";
25
26/// Magic signature for v2 continuation chunks.
27const OCHK_SIGNATURE: [u8; 4] = *b"OCHK";
28
29/// Header continuation message type id.
30const MSG_TYPE_CONTINUATION: u16 = 0x0010;
31
32/// Nil (padding) message type id.
33const MSG_TYPE_NIL: u16 = 0x0000;
34
35/// Parsed object header with all its messages.
36#[derive(Debug, Clone)]
37pub struct ObjectHeader {
38    /// Object header format version (1 or 2).
39    pub version: u8,
40    /// All parsed header messages, collected from every chunk.
41    pub messages: Vec<HdfMessage>,
42    /// Object reference count.
43    pub reference_count: u32,
44    /// Modification time in seconds since the UNIX epoch (v2 only, when the
45    /// "times stored" flag is set).
46    pub modification_time: Option<u32>,
47}
48
49impl ObjectHeader {
50    /// Parse an object header at the given absolute file address.
51    ///
52    /// `data` is the entire file mapped into memory, `address` is the byte
53    /// offset where the object header starts, and `offset_size` / `length_size`
54    /// come from the superblock.
55    pub fn parse_at(data: &[u8], address: u64, offset_size: u8, length_size: u8) -> Result<Self> {
56        let mut cursor = Cursor::new(data);
57        cursor.set_position(address);
58
59        // Peek at the first four bytes to decide v1 vs v2.
60        let sig = cursor.peek_bytes(4)?;
61        if sig == OHDR_SIGNATURE {
62            Self::parse_v2(&cursor, address, offset_size, length_size)
63        } else {
64            Self::parse_v1(&cursor, address, offset_size, length_size)
65        }
66    }
67
68    /// Resolve shared messages by following references to other object headers.
69    ///
70    /// For `SharedInOhdr`, the referenced object header is parsed and the first
71    /// matching message type is extracted. `SharedInSohm` returns an error (rare).
72    pub fn resolve_shared_messages(
73        &mut self,
74        data: &[u8],
75        offset_size: u8,
76        length_size: u8,
77    ) -> Result<()> {
78        let old_messages = std::mem::take(&mut self.messages);
79        let mut resolved = Vec::with_capacity(old_messages.len());
80        for msg in old_messages {
81            match msg {
82                HdfMessage::Shared(SharedMessage::SharedInOhdr { address }) => {
83                    match Self::parse_at(data, address, offset_size, length_size) {
84                        Ok(target_header) => {
85                            // Extract the actual message(s) from the target header.
86                            // Typically there is exactly one "real" message (the
87                            // committed datatype, fill value, etc.).
88                            for target_msg in target_header.messages {
89                                match target_msg {
90                                    HdfMessage::Nil
91                                    | HdfMessage::ObjectHeaderContinuation
92                                    | HdfMessage::Shared(_) => continue,
93                                    other => {
94                                        resolved.push(other);
95                                        break;
96                                    }
97                                }
98                            }
99                        }
100                        Err(_) => {
101                            // If we can't parse the target, keep the shared ref
102                            resolved
103                                .push(HdfMessage::Shared(SharedMessage::SharedInOhdr { address }));
104                        }
105                    }
106                }
107                HdfMessage::Shared(SharedMessage::SharedInSohm { .. }) => {
108                    self.messages = resolved;
109                    return Err(Error::Other(
110                        "SOHM table lookup not yet supported — file uses shared object header messages".to_string(),
111                    ));
112                }
113                other => resolved.push(other),
114            }
115        }
116        self.messages = resolved;
117        Ok(())
118    }
119
120    // ------------------------------------------------------------------
121    // Version 1
122    // ------------------------------------------------------------------
123
124    /// Parse a version-1 object header.
125    ///
126    /// Layout (16 bytes total):
127    /// ```text
128    ///   version          u8    (must be 1)
129    ///   reserved         u8
130    ///   num_messages     u16
131    ///   ref_count        u32
132    ///   header_data_size u32   (byte count of the message run)
133    ///   reserved         u32   (alignment padding)
134    /// ```
135    fn parse_v1(base: &Cursor<'_>, address: u64, offset_size: u8, length_size: u8) -> Result<Self> {
136        let mut cursor = base.at_offset(address)?;
137
138        let version = cursor.read_u8()?;
139        if version != 1 {
140            return Err(Error::UnsupportedObjectHeaderVersion(version));
141        }
142
143        let _reserved = cursor.read_u8()?;
144        let num_messages = cursor.read_u16_le()?;
145        let reference_count = cursor.read_u32_le()?;
146        let header_data_size = cursor.read_u32_le()? as u64;
147        let _reserved2 = cursor.read_u32_le()?; // alignment padding
148
149        // Messages start right after the 16-byte prefix.
150        let messages_start = cursor.position();
151        let messages_end = messages_start + header_data_size;
152
153        let mut messages: Vec<HdfMessage> = Vec::with_capacity(num_messages as usize);
154        let mut continuations: Vec<(u64, u64)> = Vec::new();
155
156        Self::read_v1_messages(
157            base,
158            messages_start,
159            messages_end,
160            offset_size,
161            length_size,
162            &mut messages,
163            &mut continuations,
164        )?;
165
166        // Follow continuation messages.
167        while let Some((cont_offset, cont_length)) = continuations.pop() {
168            let cont_end = cont_offset + cont_length;
169            Self::read_v1_messages(
170                base,
171                cont_offset,
172                cont_end,
173                offset_size,
174                length_size,
175                &mut messages,
176                &mut continuations,
177            )?;
178        }
179
180        Ok(ObjectHeader {
181            version: 1,
182            messages,
183            reference_count,
184            modification_time: None,
185        })
186    }
187
188    /// Read v1 header messages from `start..end`, appending to `messages`.
189    /// Any continuation messages encountered are pushed onto `continuations`
190    /// for the caller to follow.
191    fn read_v1_messages(
192        base: &Cursor<'_>,
193        start: u64,
194        end: u64,
195        offset_size: u8,
196        length_size: u8,
197        messages: &mut Vec<HdfMessage>,
198        continuations: &mut Vec<(u64, u64)>,
199    ) -> Result<()> {
200        let mut cursor = base.at_offset(start)?;
201
202        while cursor.position() + 8 <= end {
203            let msg_type = cursor.read_u16_le()?;
204            let msg_data_size = cursor.read_u16_le()? as usize;
205            let msg_flags = cursor.read_u8()?;
206            let _reserved = cursor.read_bytes(3)?; // 3 reserved bytes
207
208            // Bounds-check the message data within this chunk.
209            if cursor.position() + msg_data_size as u64 > end {
210                return Err(Error::InvalidData(format!(
211                    "v1 message data ({} bytes) extends past header chunk end",
212                    msg_data_size
213                )));
214            }
215
216            if msg_type == MSG_TYPE_NIL {
217                // Nil / padding — skip the data bytes.
218                cursor.skip(msg_data_size)?;
219                messages.push(HdfMessage::Nil);
220                continue;
221            }
222
223            let msg_data = cursor.read_bytes(msg_data_size)?;
224            let is_shared = (msg_flags & 0x02) != 0;
225
226            if is_shared {
227                // Shared message — the stored bytes are a shared-message
228                // reference, not the message payload itself.
229                let shared_msg = crate::messages::shared::parse(
230                    &mut Cursor::new(msg_data),
231                    offset_size,
232                    length_size,
233                    msg_data_size,
234                )?;
235                messages.push(HdfMessage::Shared(shared_msg));
236            } else if msg_type == MSG_TYPE_CONTINUATION {
237                // Parse the continuation message to get offset + length, then
238                // enqueue it for later processing.
239                let cont = crate::messages::continuation::parse(
240                    &mut Cursor::new(msg_data),
241                    offset_size,
242                    length_size,
243                    msg_data_size,
244                )?;
245                continuations.push((cont.offset, cont.length));
246                messages.push(HdfMessage::ObjectHeaderContinuation);
247            } else {
248                let parsed = parse_message(
249                    msg_type,
250                    msg_data.len(),
251                    &mut Cursor::new(msg_data),
252                    offset_size,
253                    length_size,
254                )?;
255                messages.push(parsed);
256            }
257        }
258
259        Ok(())
260    }
261
262    // ------------------------------------------------------------------
263    // Version 2
264    // ------------------------------------------------------------------
265
266    /// Parse a version-2 object header.
267    ///
268    /// Layout:
269    /// ```text
270    ///   signature  4 bytes  ("OHDR")
271    ///   version    u8       (must be 2)
272    ///   flags      u8
273    ///   [optional timestamps — 4 x u32 if bit 5 of flags]
274    ///   [optional attr phase change — 2 x u16 if bit 4 of flags]
275    ///   chunk0_size  1/2/4/8 bytes (encoded size depends on bits 0-1 of flags)
276    ///   <messages for chunk 0>
277    ///   checksum   u32      (Jenkins lookup3 from "OHDR" through last byte before checksum)
278    /// ```
279    fn parse_v2(base: &Cursor<'_>, address: u64, offset_size: u8, length_size: u8) -> Result<Self> {
280        let mut cursor = base.at_offset(address)?;
281
282        // ---- Fixed prefix ----
283        let sig = cursor.read_bytes(4)?;
284        if sig != OHDR_SIGNATURE {
285            return Err(Error::InvalidObjectHeaderSignature);
286        }
287        let version = cursor.read_u8()?;
288        if version != 2 {
289            return Err(Error::UnsupportedObjectHeaderVersion(version));
290        }
291        let flags = cursor.read_u8()?;
292
293        // Bit 5 — timestamps stored.
294        let modification_time = if (flags & 0x20) != 0 {
295            let _access_time = cursor.read_u32_le()?;
296            let mod_time = cursor.read_u32_le()?;
297            let _change_time = cursor.read_u32_le()?;
298            let _birth_time = cursor.read_u32_le()?;
299            Some(mod_time)
300        } else {
301            None
302        };
303
304        // Bit 4 — non-default attribute storage phase change values.
305        if (flags & 0x10) != 0 {
306            let _max_compact = cursor.read_u16_le()?;
307            let _min_dense = cursor.read_u16_le()?;
308        }
309
310        // Chunk#0 size — width depends on bits 0-1 of flags.
311        let size_field_width = 1usize << (flags & 0x03);
312        let chunk0_data_size = cursor.read_uvar(size_field_width)?;
313
314        // Bit 2 — attribute creation order tracked (affects per-message envelope).
315        let creation_order_tracked = (flags & 0x04) != 0;
316
317        // Messages for chunk 0 run from the current position for
318        // `chunk0_data_size` bytes.  The last 4 bytes of that range are the
319        // checksum.
320        let messages_start = cursor.position();
321        let chunk0_end = messages_start + chunk0_data_size;
322
323        // The checksum covers everything from "OHDR" through the last byte
324        // before the checksum field.
325        let checksum_start = address as usize;
326        let checksum_end = chunk0_end as usize; // the checksum itself sits at chunk0_end
327        let stored_checksum = {
328            let mut ck = base.at_offset(chunk0_end)?;
329            ck.read_u32_le()?
330        };
331        let computed = jenkins_lookup3(&base.data()[checksum_start..checksum_end]);
332        if computed != stored_checksum {
333            return Err(Error::ChecksumMismatch {
334                expected: stored_checksum,
335                actual: computed,
336            });
337        }
338
339        let mut messages: Vec<HdfMessage> = Vec::new();
340        let mut continuations: Vec<(u64, u64)> = Vec::new();
341
342        Self::read_v2_messages(
343            base,
344            messages_start,
345            chunk0_end,
346            offset_size,
347            length_size,
348            creation_order_tracked,
349            &mut messages,
350            &mut continuations,
351        )?;
352
353        // Follow continuation chunks.
354        while let Some((cont_offset, cont_length)) = continuations.pop() {
355            Self::read_v2_continuation_chunk(
356                base,
357                cont_offset,
358                cont_length,
359                offset_size,
360                length_size,
361                creation_order_tracked,
362                &mut messages,
363                &mut continuations,
364            )?;
365        }
366
367        Ok(ObjectHeader {
368            version: 2,
369            messages,
370            reference_count: 0, // v2 does not store a reference count in the header
371            modification_time,
372        })
373    }
374
375    /// Read v2 messages from `start..end`.
376    #[allow(clippy::too_many_arguments)]
377    fn read_v2_messages(
378        base: &Cursor<'_>,
379        start: u64,
380        end: u64,
381        offset_size: u8,
382        length_size: u8,
383        creation_order_tracked: bool,
384        messages: &mut Vec<HdfMessage>,
385        continuations: &mut Vec<(u64, u64)>,
386    ) -> Result<()> {
387        let mut cursor = base.at_offset(start)?;
388
389        // Minimum envelope: type(1) + size(2) + flags(1) = 4 bytes, optionally
390        // +2 for creation order.
391        let min_envelope = if creation_order_tracked { 6 } else { 4 };
392
393        while cursor.position() + min_envelope as u64 <= end {
394            let msg_type = cursor.read_u8()? as u16;
395            let msg_data_size = cursor.read_u16_le()? as usize;
396            let msg_flags = cursor.read_u8()?;
397
398            if creation_order_tracked {
399                let _creation_order = cursor.read_u16_le()?;
400            }
401
402            if msg_type == MSG_TYPE_NIL {
403                if msg_data_size == 0
404                    && base.data()[cursor.position() as usize..end as usize]
405                        .iter()
406                        .all(|byte| *byte == 0)
407                {
408                    break;
409                }
410                cursor.skip(msg_data_size)?;
411                messages.push(HdfMessage::Nil);
412                continue;
413            }
414
415            if cursor.position() + msg_data_size as u64 > end {
416                return Err(Error::InvalidData(format!(
417                    "v2 message data ({} bytes) extends past chunk end",
418                    msg_data_size
419                )));
420            }
421
422            let msg_data = cursor.read_bytes(msg_data_size)?;
423            let is_shared = (msg_flags & 0x02) != 0;
424
425            if is_shared {
426                let shared_msg = crate::messages::shared::parse(
427                    &mut Cursor::new(msg_data),
428                    offset_size,
429                    length_size,
430                    msg_data_size,
431                )?;
432                messages.push(HdfMessage::Shared(shared_msg));
433            } else if msg_type == MSG_TYPE_CONTINUATION {
434                let cont = crate::messages::continuation::parse(
435                    &mut Cursor::new(msg_data),
436                    offset_size,
437                    length_size,
438                    msg_data_size,
439                )?;
440                continuations.push((cont.offset, cont.length));
441                messages.push(HdfMessage::ObjectHeaderContinuation);
442            } else {
443                let parsed = parse_message(
444                    msg_type,
445                    msg_data.len(),
446                    &mut Cursor::new(msg_data),
447                    offset_size,
448                    length_size,
449                )?;
450                messages.push(parsed);
451            }
452        }
453
454        Ok(())
455    }
456
457    /// Read and verify a v2 continuation chunk (`OCHK`).
458    #[allow(clippy::too_many_arguments)]
459    ///
460    /// Layout:
461    /// ```text
462    ///   "OCHK"    4 bytes
463    ///   messages  (cont_length - 4 - 4) bytes
464    ///   checksum  u32
465    /// ```
466    fn read_v2_continuation_chunk(
467        base: &Cursor<'_>,
468        cont_offset: u64,
469        cont_length: u64,
470        offset_size: u8,
471        length_size: u8,
472        creation_order_tracked: bool,
473        messages: &mut Vec<HdfMessage>,
474        continuations: &mut Vec<(u64, u64)>,
475    ) -> Result<()> {
476        let mut cursor = base.at_offset(cont_offset)?;
477
478        let sig = cursor.read_bytes(4)?;
479        if sig != OCHK_SIGNATURE {
480            return Err(Error::InvalidObjectHeaderSignature);
481        }
482
483        let chunk_end = cont_offset + cont_length;
484        // The last 4 bytes of the chunk are the checksum.
485        let messages_end = chunk_end - 4;
486        let messages_start = cursor.position(); // right after "OCHK"
487
488        // Verify checksum: covers "OCHK" through the byte before the checksum.
489        let checksum_start = cont_offset as usize;
490        let checksum_end = messages_end as usize;
491        let stored_checksum = {
492            let mut ck = base.at_offset(messages_end)?;
493            ck.read_u32_le()?
494        };
495        let computed = jenkins_lookup3(&base.data()[checksum_start..checksum_end]);
496        if computed != stored_checksum {
497            return Err(Error::ChecksumMismatch {
498                expected: stored_checksum,
499                actual: computed,
500            });
501        }
502
503        Self::read_v2_messages(
504            base,
505            messages_start,
506            messages_end,
507            offset_size,
508            length_size,
509            creation_order_tracked,
510            messages,
511            continuations,
512        )
513    }
514}
515
516#[cfg(test)]
517mod tests {
518    use super::*;
519    use crate::checksum::jenkins_lookup3;
520
521    // ------------------------------------------------------------------
522    // Helpers
523    // ------------------------------------------------------------------
524
525    /// Build a v1 object header containing the given pre-encoded messages.
526    /// Each entry in `raw_messages` is `(type_id, flags, payload)`.
527    fn build_v1_header(raw_messages: &[(u16, u8, &[u8])], ref_count: u32) -> Vec<u8> {
528        // Compute total message data size.
529        let data_size: usize = raw_messages
530            .iter()
531            .map(|(_, _, payload)| 8 + payload.len()) // 8-byte envelope per message
532            .sum();
533
534        let mut buf = Vec::new();
535        // Version
536        buf.push(1);
537        // Reserved
538        buf.push(0);
539        // Number of messages
540        buf.extend_from_slice(&(raw_messages.len() as u16).to_le_bytes());
541        // Reference count
542        buf.extend_from_slice(&ref_count.to_le_bytes());
543        // Header data size
544        buf.extend_from_slice(&(data_size as u32).to_le_bytes());
545        // Reserved padding (4 bytes)
546        buf.extend_from_slice(&[0u8; 4]);
547
548        // Messages
549        for (type_id, flags, payload) in raw_messages {
550            buf.extend_from_slice(&type_id.to_le_bytes());
551            buf.extend_from_slice(&(payload.len() as u16).to_le_bytes());
552            buf.push(*flags);
553            buf.extend_from_slice(&[0u8; 3]); // reserved
554            buf.extend_from_slice(payload);
555        }
556
557        buf
558    }
559
560    /// Build a v2 OHDR chunk#0 with the given raw messages.
561    /// `flags` controls the header flags byte.  Timestamps and phase-change
562    /// values are added automatically when the corresponding flag bits are set.
563    /// Each entry in `raw_messages` is `(type_id, flags, payload)`.
564    /// Returns the complete OHDR block including the trailing checksum.
565    fn build_v2_header(
566        header_flags: u8,
567        raw_messages: &[(u8, u8, &[u8])],
568        timestamps: Option<[u32; 4]>,
569        phase_change: Option<(u16, u16)>,
570    ) -> Vec<u8> {
571        let creation_order = (header_flags & 0x04) != 0;
572
573        // Compute message data size.
574        let envelope_size: usize = if creation_order { 6 } else { 4 };
575        let msg_data_size: usize = raw_messages
576            .iter()
577            .map(|(_, _, payload)| envelope_size + payload.len())
578            .sum();
579
580        let mut buf = Vec::new();
581        // Signature
582        buf.extend_from_slice(&OHDR_SIGNATURE);
583        // Version
584        buf.push(2);
585        // Flags
586        buf.push(header_flags);
587
588        // Timestamps (bit 5)
589        if let Some(ts) = timestamps {
590            for &t in &ts {
591                buf.extend_from_slice(&t.to_le_bytes());
592            }
593        }
594
595        // Phase change (bit 4)
596        if let Some((max_compact, min_dense)) = phase_change {
597            buf.extend_from_slice(&max_compact.to_le_bytes());
598            buf.extend_from_slice(&min_dense.to_le_bytes());
599        }
600
601        // Chunk#0 size field — encode using the width dictated by bits 0-1.
602        let size_width = 1usize << (header_flags & 0x03);
603        match size_width {
604            1 => buf.push(msg_data_size as u8),
605            2 => buf.extend_from_slice(&(msg_data_size as u16).to_le_bytes()),
606            4 => buf.extend_from_slice(&(msg_data_size as u32).to_le_bytes()),
607            8 => buf.extend_from_slice(&(msg_data_size as u64).to_le_bytes()),
608            _ => unreachable!(),
609        }
610
611        // Messages
612        for (type_id, mflags, payload) in raw_messages {
613            buf.push(*type_id);
614            buf.extend_from_slice(&(payload.len() as u16).to_le_bytes());
615            buf.push(*mflags);
616            if creation_order {
617                buf.extend_from_slice(&0u16.to_le_bytes());
618            }
619            buf.extend_from_slice(payload);
620        }
621
622        // Checksum — covers everything so far.
623        let ck = jenkins_lookup3(&buf);
624        buf.extend_from_slice(&ck.to_le_bytes());
625
626        buf
627    }
628
629    /// Build a v2 OCHK continuation chunk containing the given raw messages.
630    fn build_v2_ochk(raw_messages: &[(u8, u8, &[u8])], creation_order: bool) -> Vec<u8> {
631        let mut buf = Vec::new();
632        // Signature
633        buf.extend_from_slice(&OCHK_SIGNATURE);
634
635        // Messages
636        for (type_id, mflags, payload) in raw_messages {
637            buf.push(*type_id);
638            buf.extend_from_slice(&(payload.len() as u16).to_le_bytes());
639            buf.push(*mflags);
640            if creation_order {
641                buf.extend_from_slice(&0u16.to_le_bytes());
642            }
643            buf.extend_from_slice(payload);
644        }
645
646        // Checksum over everything before the checksum itself.
647        let ck = jenkins_lookup3(&buf);
648        buf.extend_from_slice(&ck.to_le_bytes());
649
650        buf
651    }
652
653    // ------------------------------------------------------------------
654    // Tests — Version 1
655    // ------------------------------------------------------------------
656
657    #[test]
658    fn v1_empty_header() {
659        let data = build_v1_header(&[], 1);
660        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
661        assert_eq!(hdr.version, 1);
662        assert_eq!(hdr.reference_count, 1);
663        assert!(hdr.messages.is_empty());
664        assert!(hdr.modification_time.is_none());
665    }
666
667    #[test]
668    fn v1_nil_message() {
669        // A single nil message with 4 bytes of padding payload.
670        let data = build_v1_header(&[(0x0000, 0, &[0u8; 4])], 1);
671        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
672        assert_eq!(hdr.messages.len(), 1);
673        assert!(matches!(hdr.messages[0], HdfMessage::Nil));
674    }
675
676    #[test]
677    fn v1_unknown_message() {
678        // An unknown message type should be stored as HdfMessage::Unknown.
679        let payload = [0xAA, 0xBB, 0xCC];
680        let data = build_v1_header(&[(0x00FF, 0, &payload)], 2);
681        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
682        assert_eq!(hdr.reference_count, 2);
683        assert_eq!(hdr.messages.len(), 1);
684        match &hdr.messages[0] {
685            HdfMessage::Unknown { type_id, data } => {
686                assert_eq!(*type_id, 0x00FF);
687                assert_eq!(data.as_slice(), &payload);
688            }
689            other => panic!("expected Unknown, got {:?}", other),
690        }
691    }
692
693    #[test]
694    fn v1_symbol_table_message() {
695        // Type 0x0011 — symbol table message.
696        // Payload: btree address (8 bytes) + heap address (8 bytes).
697        let mut payload = Vec::new();
698        payload.extend_from_slice(&0x1000u64.to_le_bytes());
699        payload.extend_from_slice(&0x2000u64.to_le_bytes());
700
701        let data = build_v1_header(&[(0x0011, 0, &payload)], 1);
702        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
703        assert_eq!(hdr.messages.len(), 1);
704        match &hdr.messages[0] {
705            HdfMessage::SymbolTable(st) => {
706                assert_eq!(st.btree_address, 0x1000);
707                assert_eq!(st.heap_address, 0x2000);
708            }
709            other => panic!("expected SymbolTable, got {:?}", other),
710        }
711    }
712
713    #[test]
714    fn v1_continuation_message() {
715        // Build a continuation payload that points to a second chunk.
716        // The second chunk contains one unknown message.
717        let unknown_payload = [0xDD; 2];
718
719        // Build the continuation target (a raw v1 message run, no header prefix).
720        let mut cont_chunk = Vec::new();
721        // message type 0x00FE
722        cont_chunk.extend_from_slice(&0x00FEu16.to_le_bytes());
723        // message data size
724        cont_chunk.extend_from_slice(&(unknown_payload.len() as u16).to_le_bytes());
725        // flags
726        cont_chunk.push(0);
727        // reserved
728        cont_chunk.extend_from_slice(&[0u8; 3]);
729        // payload
730        cont_chunk.extend_from_slice(&unknown_payload);
731
732        // We will place the continuation chunk after the main header.
733        // First build the main header with a continuation message.
734        let main_header_base_size = 16; // v1 prefix
735                                        // The continuation message envelope = 8, payload = offset_size + length_size.
736                                        // With offset_size=8, length_size=8, the continuation payload is 16 bytes.
737        let cont_msg_envelope_size = 8 + 16; // 24
738        let cont_chunk_offset = (main_header_base_size + cont_msg_envelope_size) as u64;
739
740        let mut cont_payload = Vec::new();
741        cont_payload.extend_from_slice(&cont_chunk_offset.to_le_bytes()); // offset
742        cont_payload.extend_from_slice(&(cont_chunk.len() as u64).to_le_bytes()); // length
743
744        let main_header = build_v1_header(&[(MSG_TYPE_CONTINUATION, 0, &cont_payload)], 1);
745
746        // Concatenate main header + continuation chunk.
747        let mut file_data = main_header;
748        assert_eq!(file_data.len() as u64, cont_chunk_offset);
749        file_data.extend_from_slice(&cont_chunk);
750
751        let hdr = ObjectHeader::parse_at(&file_data, 0, 8, 8).unwrap();
752        // Should have the continuation marker + the unknown message from the continuation chunk.
753        assert_eq!(hdr.messages.len(), 2);
754        assert!(matches!(
755            hdr.messages[0],
756            HdfMessage::ObjectHeaderContinuation
757        ));
758        match &hdr.messages[1] {
759            HdfMessage::Unknown { type_id, data } => {
760                assert_eq!(*type_id, 0x00FE);
761                assert_eq!(data.as_slice(), &unknown_payload);
762            }
763            other => panic!("expected Unknown from continuation, got {:?}", other),
764        }
765    }
766
767    #[test]
768    fn v1_nonzero_address_offset() {
769        // Place the header at a non-zero offset in the file.
770        let prefix_pad = vec![0xFFu8; 64];
771        let header = build_v1_header(&[(0x00AA, 0, &[0x01])], 3);
772
773        let mut file_data = prefix_pad;
774        file_data.extend_from_slice(&header);
775
776        let hdr = ObjectHeader::parse_at(&file_data, 64, 8, 8).unwrap();
777        assert_eq!(hdr.version, 1);
778        assert_eq!(hdr.reference_count, 3);
779        assert_eq!(hdr.messages.len(), 1);
780    }
781
782    #[test]
783    fn v1_bad_version() {
784        let mut data = build_v1_header(&[], 1);
785        data[0] = 3; // corrupt version to 3
786        let err = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap_err();
787        assert!(matches!(err, Error::UnsupportedObjectHeaderVersion(3)));
788    }
789
790    // ------------------------------------------------------------------
791    // Tests — Version 2
792    // ------------------------------------------------------------------
793
794    #[test]
795    fn v2_empty_header() {
796        // Flags=0 → 1-byte size field, no timestamps, no phase change, no creation order.
797        let data = build_v2_header(0x00, &[], None, None);
798        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
799        assert_eq!(hdr.version, 2);
800        assert!(hdr.messages.is_empty());
801        assert!(hdr.modification_time.is_none());
802    }
803
804    #[test]
805    fn v2_nil_message() {
806        let data = build_v2_header(0x00, &[(0x00, 0, &[0u8; 3])], None, None);
807        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
808        assert_eq!(hdr.messages.len(), 1);
809        assert!(matches!(hdr.messages[0], HdfMessage::Nil));
810    }
811
812    #[test]
813    fn v2_unknown_message() {
814        let payload = [0x11, 0x22];
815        let data = build_v2_header(0x00, &[(0xFE, 0, &payload)], None, None);
816        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
817        assert_eq!(hdr.messages.len(), 1);
818        match &hdr.messages[0] {
819            HdfMessage::Unknown { type_id, data } => {
820                assert_eq!(*type_id, 0x00FE);
821                assert_eq!(data.as_slice(), &payload);
822            }
823            other => panic!("expected Unknown, got {:?}", other),
824        }
825    }
826
827    #[test]
828    fn v2_with_timestamps() {
829        // Flags: bit 5 (timestamps) + bits 0-1 = 0 (1-byte size field).
830        let flags = 0x20;
831        let ts = [1000u32, 2000, 3000, 4000]; // access, modification, change, birth
832        let data = build_v2_header(flags, &[], Some(ts), None);
833        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
834        assert_eq!(hdr.modification_time, Some(2000));
835    }
836
837    #[test]
838    fn v2_with_phase_change() {
839        // Flags: bit 4 (phase change) + bits 0-1 = 0.
840        let flags = 0x10;
841        let data = build_v2_header(flags, &[], None, Some((8, 6)));
842        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
843        assert!(hdr.messages.is_empty());
844    }
845
846    #[test]
847    fn v2_with_creation_order() {
848        // Flags: bit 2 (creation order tracked) + bits 0-1 = 0.
849        let flags = 0x04;
850        let payload = [0xAA];
851        let data = build_v2_header(flags, &[(0xFE, 0, &payload)], None, None);
852        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
853        assert_eq!(hdr.messages.len(), 1);
854        match &hdr.messages[0] {
855            HdfMessage::Unknown { type_id, .. } => assert_eq!(*type_id, 0x00FE),
856            other => panic!("expected Unknown, got {:?}", other),
857        }
858    }
859
860    #[test]
861    fn v2_2byte_size_field() {
862        // bits 0-1 = 1 → 2-byte size field.
863        let flags = 0x01;
864        let payload = [0x42; 5];
865        let data = build_v2_header(flags, &[(0xFE, 0, &payload)], None, None);
866        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
867        assert_eq!(hdr.messages.len(), 1);
868    }
869
870    #[test]
871    fn v2_4byte_size_field() {
872        // bits 0-1 = 2 → 4-byte size field.
873        let flags = 0x02;
874        let data = build_v2_header(flags, &[(0xFE, 0, &[0x01])], None, None);
875        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
876        assert_eq!(hdr.messages.len(), 1);
877    }
878
879    #[test]
880    fn v2_8byte_size_field() {
881        // bits 0-1 = 3 → 8-byte size field.
882        let flags = 0x03;
883        let data = build_v2_header(flags, &[(0xFE, 0, &[0x01])], None, None);
884        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
885        assert_eq!(hdr.messages.len(), 1);
886    }
887
888    #[test]
889    fn v2_checksum_mismatch() {
890        let mut data = build_v2_header(0x00, &[(0xFE, 0, &[0x01])], None, None);
891        // Corrupt the last byte (part of checksum).
892        let last = data.len() - 1;
893        data[last] ^= 0xFF;
894        let err = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap_err();
895        assert!(matches!(err, Error::ChecksumMismatch { .. }));
896    }
897
898    #[test]
899    fn v2_continuation_chunk() {
900        // Build a continuation chunk (OCHK) that holds one unknown message.
901        let unknown_payload = [0xCC; 3];
902        let ochk = build_v2_ochk(&[(0xFD, 0, &unknown_payload)], false);
903
904        // The continuation message payload is offset(8) + length(8) = 16 bytes.
905        // We will compute the offset of the OCHK after building the main OHDR.
906        // Strategy: build OHDR first with a placeholder, measure its size,
907        // set the actual offset, then rebuild.
908
909        // Placeholder continuation payload (will rewrite).
910        let mut cont_payload = vec![0u8; 16];
911
912        // Build OHDR with the continuation message.  The OHDR occupies:
913        //   4 (sig) + 1 (ver) + 1 (flags) + 1 (size field, flags=0) + messages + 4 (checksum)
914        // Message envelope: type(1) + size(2) + flags(1) = 4; payload = 16.
915        // Total OHDR = 4 + 1 + 1 + 1 + 4 + 16 + 4 = 31 bytes.
916        // The OCHK starts at byte 31.
917
918        // We need the offset to be the byte where OCHK starts.
919        // OHDR: sig(4) + ver(1) + flags(1) + size(1) + [envelope(4)+payload(16)] + checksum(4) = 31
920        let ohdr_size = 4 + 1 + 1 + 1 + (4 + cont_payload.len()) + 4;
921        let ochk_offset = ohdr_size as u64;
922
923        // Rebuild continuation payload with correct offset.
924        cont_payload.clear();
925        cont_payload.extend_from_slice(&ochk_offset.to_le_bytes());
926        cont_payload.extend_from_slice(&(ochk.len() as u64).to_le_bytes());
927
928        let ohdr = build_v2_header(0x00, &[(0x10, 0, &cont_payload)], None, None);
929        assert_eq!(ohdr.len(), ohdr_size);
930
931        let mut file_data = ohdr;
932        file_data.extend_from_slice(&ochk);
933
934        let hdr = ObjectHeader::parse_at(&file_data, 0, 8, 8).unwrap();
935        // Should have: continuation marker + unknown message from OCHK.
936        assert_eq!(hdr.messages.len(), 2);
937        assert!(matches!(
938            hdr.messages[0],
939            HdfMessage::ObjectHeaderContinuation
940        ));
941        match &hdr.messages[1] {
942            HdfMessage::Unknown { type_id, data } => {
943                assert_eq!(*type_id, 0x00FD);
944                assert_eq!(data.as_slice(), &unknown_payload);
945            }
946            other => panic!("expected Unknown from OCHK, got {:?}", other),
947        }
948    }
949
950    #[test]
951    fn v2_ochk_checksum_mismatch() {
952        let unknown_payload = [0xCC; 3];
953        let mut ochk = build_v2_ochk(&[(0xFD, 0, &unknown_payload)], false);
954        // Corrupt OCHK checksum.
955        let last = ochk.len() - 1;
956        ochk[last] ^= 0xFF;
957
958        let ohdr_size = 4 + 1 + 1 + 1 + (4 + 16) + 4; // 31
959        let ochk_offset = ohdr_size as u64;
960
961        let mut cont_payload = Vec::new();
962        cont_payload.extend_from_slice(&ochk_offset.to_le_bytes());
963        cont_payload.extend_from_slice(&(ochk.len() as u64).to_le_bytes());
964
965        let ohdr = build_v2_header(0x00, &[(0x10, 0, &cont_payload)], None, None);
966        let mut file_data = ohdr;
967        file_data.extend_from_slice(&ochk);
968
969        let err = ObjectHeader::parse_at(&file_data, 0, 8, 8).unwrap_err();
970        assert!(matches!(err, Error::ChecksumMismatch { .. }));
971    }
972
973    #[test]
974    fn v2_multiple_messages() {
975        // Two unknown messages in the same chunk.
976        let p1 = [0x01, 0x02];
977        let p2 = [0x03, 0x04, 0x05];
978        let data = build_v2_header(0x00, &[(0xA0, 0, &p1), (0xA1, 0, &p2)], None, None);
979        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
980        assert_eq!(hdr.messages.len(), 2);
981        match &hdr.messages[0] {
982            HdfMessage::Unknown { type_id, .. } => assert_eq!(*type_id, 0x00A0),
983            other => panic!("expected Unknown 0xA0, got {:?}", other),
984        }
985        match &hdr.messages[1] {
986            HdfMessage::Unknown { type_id, .. } => assert_eq!(*type_id, 0x00A1),
987            other => panic!("expected Unknown 0xA1, got {:?}", other),
988        }
989    }
990
991    #[test]
992    fn v2_zero_length_nil_before_more_messages() {
993        let p1 = [0xAA];
994        let p2 = [0xBB];
995        let data = build_v2_header(
996            0x04,
997            &[(0xFE, 0, &p1), (0x00, 0, &[]), (0xFD, 0, &p2)],
998            None,
999            None,
1000        );
1001        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1002        assert_eq!(hdr.messages.len(), 3);
1003        assert!(matches!(hdr.messages[0], HdfMessage::Unknown { .. }));
1004        assert!(matches!(hdr.messages[1], HdfMessage::Nil));
1005        assert!(matches!(hdr.messages[2], HdfMessage::Unknown { .. }));
1006    }
1007
1008    #[test]
1009    fn v2_nonzero_address() {
1010        // Place the OHDR at offset 128 in a larger buffer.
1011        let prefix_pad = vec![0u8; 128];
1012        let ohdr = build_v2_header(0x00, &[(0xFE, 0, &[0x42])], None, None);
1013
1014        let mut file_data = prefix_pad;
1015        file_data.extend_from_slice(&ohdr);
1016
1017        let hdr = ObjectHeader::parse_at(&file_data, 128, 8, 8).unwrap();
1018        assert_eq!(hdr.version, 2);
1019        assert_eq!(hdr.messages.len(), 1);
1020    }
1021
1022    #[test]
1023    fn v2_all_flags_combined() {
1024        // Combine timestamps (0x20) + phase change (0x10) + creation order (0x04) + 2-byte size (0x01).
1025        let flags = 0x20 | 0x10 | 0x04 | 0x01;
1026        let ts = [100u32, 200, 300, 400];
1027        let payload = [0xBB];
1028        let data = build_v2_header(flags, &[(0xFE, 0, &payload)], Some(ts), Some((12, 8)));
1029        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1030        assert_eq!(hdr.version, 2);
1031        assert_eq!(hdr.modification_time, Some(200));
1032        assert_eq!(hdr.messages.len(), 1);
1033    }
1034
1035    #[test]
1036    fn v1_multiple_messages() {
1037        // Two messages in a single v1 header.
1038        let p1 = [0xAA; 4];
1039        let p2 = [0xBB; 8];
1040        let data = build_v1_header(&[(0x00FF, 0, &p1), (0x00FE, 0, &p2)], 5);
1041        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1042        assert_eq!(hdr.version, 1);
1043        assert_eq!(hdr.reference_count, 5);
1044        assert_eq!(hdr.messages.len(), 2);
1045    }
1046
1047    #[test]
1048    fn v1_4byte_offsets() {
1049        // Verify correct operation with 4-byte offset/length sizes.
1050        // Symbol table message with 4-byte addresses.
1051        let mut payload = Vec::new();
1052        payload.extend_from_slice(&0x1000u32.to_le_bytes());
1053        payload.extend_from_slice(&0x2000u32.to_le_bytes());
1054
1055        let data = build_v1_header(&[(0x0011, 0, &payload)], 1);
1056        let hdr = ObjectHeader::parse_at(&data, 0, 4, 4).unwrap();
1057        assert_eq!(hdr.messages.len(), 1);
1058        match &hdr.messages[0] {
1059            HdfMessage::SymbolTable(st) => {
1060                assert_eq!(st.btree_address, 0x1000);
1061                assert_eq!(st.heap_address, 0x2000);
1062            }
1063            other => panic!("expected SymbolTable, got {:?}", other),
1064        }
1065    }
1066}