Skip to main content

hdf5_reader/
object_header.rs

1//! HDF5 Object Header parser (v1 and v2).
2//!
3//! Object headers contain a collection of header messages that describe an
4//! HDF5 object (group, dataset, committed datatype, etc.).  Two on-disk
5//! formats exist:
6//!
7//! * **Version 1** (HDF5 < 1.8) — 16-byte fixed prefix, messages each have an
8//!   8-byte envelope (type u16 + size u16 + flags u8 + reserved 3).
9//! * **Version 2** (HDF5 >= 1.8) — begins with the `OHDR` signature, variable-
10//!   length prefix, messages have a 4-or-6-byte envelope, and every chunk is
11//!   checksummed with Jenkins lookup3.
12//!
13//! Continuation messages (type `0x0010`) cause the parser to follow an offset
14//! to an additional chunk of messages (an `OCHK` block in v2, or a raw message
15//! run in v1).
16
17use crate::checksum::jenkins_lookup3;
18use crate::error::{Error, Result};
19use crate::io::Cursor;
20use crate::messages::shared::SharedMessage;
21use crate::messages::{parse_message, HdfMessage};
22use crate::storage::Storage;
23
24/// Magic signature for v2 object headers.
25const OHDR_SIGNATURE: [u8; 4] = *b"OHDR";
26
27/// Magic signature for v2 continuation chunks.
28const OCHK_SIGNATURE: [u8; 4] = *b"OCHK";
29
30/// Header continuation message type id.
31const MSG_TYPE_CONTINUATION: u16 = 0x0010;
32
33/// Nil (padding) message type id.
34const MSG_TYPE_NIL: u16 = 0x0000;
35
36/// Parsed object header with all its messages.
37#[derive(Debug, Clone)]
38pub struct ObjectHeader {
39    /// Object header format version (1 or 2).
40    pub version: u8,
41    /// All parsed header messages, collected from every chunk.
42    pub messages: Vec<HdfMessage>,
43    /// Object reference count.
44    pub reference_count: u32,
45    /// Modification time in seconds since the UNIX epoch (v2 only, when the
46    /// "times stored" flag is set).
47    pub modification_time: Option<u32>,
48}
49
50impl ObjectHeader {
51    /// Parse an object header at the given absolute file address.
52    ///
53    /// `data` is the entire file mapped into memory, `address` is the byte
54    /// offset where the object header starts, and `offset_size` / `length_size`
55    /// come from the superblock.
56    pub fn parse_at(data: &[u8], address: u64, offset_size: u8, length_size: u8) -> Result<Self> {
57        let mut cursor = Cursor::new(data);
58        cursor.set_position(address);
59
60        // Peek at the first four bytes to decide v1 vs v2.
61        let sig = cursor.peek_bytes(4)?;
62        if sig == OHDR_SIGNATURE {
63            Self::parse_v2(&cursor, address, offset_size, length_size)
64        } else {
65            Self::parse_v1(&cursor, address, offset_size, length_size)
66        }
67    }
68
69    /// Parse an object header from random-access storage.
70    pub fn parse_at_storage(
71        storage: &dyn Storage,
72        address: u64,
73        offset_size: u8,
74        length_size: u8,
75    ) -> Result<Self> {
76        let prefix = storage.read_range(address, 64)?;
77        if prefix.len() < 5 {
78            return Err(Error::UnexpectedEof {
79                offset: address,
80                needed: 5,
81                available: prefix.len() as u64,
82            });
83        }
84
85        if prefix.as_ref()[..4] == OHDR_SIGNATURE {
86            Self::parse_v2_storage(storage, address, offset_size, length_size)
87        } else {
88            Self::parse_v1_storage(storage, address, offset_size, length_size)
89        }
90    }
91
92    /// Resolve shared messages by following references to other object headers.
93    ///
94    /// For `SharedInOhdr`, the referenced object header is parsed and the
95    /// matching message type is extracted. `SharedInSohm` requires the
96    /// storage-backed resolver because the SOHM table lives in file metadata.
97    pub fn resolve_shared_messages(
98        &mut self,
99        data: &[u8],
100        offset_size: u8,
101        length_size: u8,
102    ) -> Result<()> {
103        let old_messages = std::mem::take(&mut self.messages);
104        let mut resolved = Vec::with_capacity(old_messages.len());
105        for msg in old_messages {
106            match msg {
107                HdfMessage::Shared(SharedMessage::SharedInOhdr {
108                    message_type,
109                    address,
110                }) => {
111                    match Self::parse_at(data, address, offset_size, length_size) {
112                        Ok(target_header) => {
113                            if let Some(target_msg) =
114                                select_shared_message(target_header, message_type)
115                            {
116                                resolved.push(target_msg);
117                            }
118                        }
119                        Err(_) => {
120                            // If we can't parse the target, keep the shared ref
121                            resolved.push(HdfMessage::Shared(SharedMessage::SharedInOhdr {
122                                message_type,
123                                address,
124                            }));
125                        }
126                    }
127                }
128                HdfMessage::Shared(SharedMessage::SharedInSohm { .. }) => {
129                    self.messages = resolved;
130                    return Err(Error::Other(
131                        "SOHM table lookup not yet supported — file uses shared object header messages".to_string(),
132                    ));
133                }
134                other => resolved.push(other),
135            }
136        }
137        self.messages = resolved;
138        Ok(())
139    }
140
141    /// Resolve shared messages by following references via random-access storage.
142    pub fn resolve_shared_messages_storage(
143        &mut self,
144        storage: &dyn Storage,
145        offset_size: u8,
146        length_size: u8,
147    ) -> Result<()> {
148        self.resolve_shared_messages_storage_with_sohm(
149            storage,
150            offset_size,
151            length_size,
152            |_heap_id, _message_type| Ok(None),
153        )
154    }
155
156    /// Resolve shared messages using random-access storage and a SOHM resolver.
157    pub(crate) fn resolve_shared_messages_storage_with_sohm<F>(
158        &mut self,
159        storage: &dyn Storage,
160        offset_size: u8,
161        length_size: u8,
162        mut resolve_sohm: F,
163    ) -> Result<()>
164    where
165        F: FnMut(&[u8], u16) -> Result<Option<HdfMessage>>,
166    {
167        let old_messages = std::mem::take(&mut self.messages);
168        let mut resolved = Vec::with_capacity(old_messages.len());
169        for msg in old_messages {
170            match msg {
171                HdfMessage::Shared(SharedMessage::SharedInOhdr {
172                    message_type,
173                    address,
174                }) => match Self::parse_at_storage(storage, address, offset_size, length_size) {
175                    Ok(target_header) => {
176                        if let Some(target_msg) = select_shared_message(target_header, message_type)
177                        {
178                            resolved.push(target_msg);
179                        }
180                    }
181                    Err(_) => {
182                        resolved.push(HdfMessage::Shared(SharedMessage::SharedInOhdr {
183                            message_type,
184                            address,
185                        }));
186                    }
187                },
188                HdfMessage::Shared(SharedMessage::SharedInSohm {
189                    message_type,
190                    heap_id,
191                }) => {
192                    if let Some(message) = resolve_sohm(&heap_id, message_type)? {
193                        resolved.push(message);
194                    } else {
195                        self.messages = resolved;
196                        return Err(Error::Other(format!(
197                            "SOHM entry for message type {message_type:#x} not found"
198                        )));
199                    }
200                }
201                other => resolved.push(other),
202            }
203        }
204        self.messages = resolved;
205        Ok(())
206    }
207
208    // ------------------------------------------------------------------
209    // Version 1
210    // ------------------------------------------------------------------
211
212    /// Parse a version-1 object header.
213    ///
214    /// Layout (16 bytes total):
215    /// ```text
216    ///   version          u8    (must be 1)
217    ///   reserved         u8
218    ///   num_messages     u16
219    ///   ref_count        u32
220    ///   header_data_size u32   (byte count of the message run)
221    ///   reserved         u32   (alignment padding)
222    /// ```
223    fn parse_v1(base: &Cursor<'_>, address: u64, offset_size: u8, length_size: u8) -> Result<Self> {
224        let mut cursor = base.at_offset(address)?;
225
226        let version = cursor.read_u8()?;
227        if version != 1 {
228            return Err(Error::UnsupportedObjectHeaderVersion(version));
229        }
230
231        let _reserved = cursor.read_u8()?;
232        let num_messages = cursor.read_u16_le()?;
233        let reference_count = cursor.read_u32_le()?;
234        let header_data_size = cursor.read_u32_le()? as u64;
235        let _reserved2 = cursor.read_u32_le()?; // alignment padding
236
237        // Messages start right after the 16-byte prefix.
238        let messages_start = cursor.position();
239        let messages_end = messages_start + header_data_size;
240
241        let mut messages: Vec<HdfMessage> = Vec::with_capacity(num_messages as usize);
242        let mut continuations: Vec<(u64, u64)> = Vec::new();
243
244        Self::read_v1_messages(
245            base,
246            messages_start,
247            messages_end,
248            offset_size,
249            length_size,
250            &mut messages,
251            &mut continuations,
252        )?;
253
254        // Follow continuation messages.
255        while let Some((cont_offset, cont_length)) = continuations.pop() {
256            let cont_end = cont_offset + cont_length;
257            Self::read_v1_messages(
258                base,
259                cont_offset,
260                cont_end,
261                offset_size,
262                length_size,
263                &mut messages,
264                &mut continuations,
265            )?;
266        }
267
268        Ok(ObjectHeader {
269            version: 1,
270            messages,
271            reference_count,
272            modification_time: None,
273        })
274    }
275
276    fn parse_v1_storage(
277        storage: &dyn Storage,
278        address: u64,
279        offset_size: u8,
280        length_size: u8,
281    ) -> Result<Self> {
282        let header = storage.read_range(address, 16)?;
283        let mut cursor = Cursor::new(header.as_ref());
284
285        let version = cursor.read_u8()?;
286        if version != 1 {
287            return Err(Error::UnsupportedObjectHeaderVersion(version));
288        }
289
290        let _reserved = cursor.read_u8()?;
291        let num_messages = cursor.read_u16_le()?;
292        let reference_count = cursor.read_u32_le()?;
293        let header_data_size = cursor.read_u32_le()? as u64;
294        let _reserved2 = cursor.read_u32_le()?;
295
296        let first_chunk = storage.read_range(address, (16 + header_data_size) as usize)?;
297        let mut messages = Vec::with_capacity(num_messages as usize);
298        let mut continuations = Vec::new();
299        Self::read_v1_messages_from_slice(
300            &first_chunk.as_ref()[16..],
301            offset_size,
302            length_size,
303            &mut messages,
304            &mut continuations,
305        )?;
306
307        while let Some((cont_offset, cont_length)) = continuations.pop() {
308            let chunk = storage.read_range(cont_offset, cont_length as usize)?;
309            Self::read_v1_messages_from_slice(
310                chunk.as_ref(),
311                offset_size,
312                length_size,
313                &mut messages,
314                &mut continuations,
315            )?;
316        }
317
318        Ok(ObjectHeader {
319            version: 1,
320            messages,
321            reference_count,
322            modification_time: None,
323        })
324    }
325
326    /// Read v1 header messages from `start..end`, appending to `messages`.
327    /// Any continuation messages encountered are pushed onto `continuations`
328    /// for the caller to follow.
329    fn read_v1_messages(
330        base: &Cursor<'_>,
331        start: u64,
332        end: u64,
333        offset_size: u8,
334        length_size: u8,
335        messages: &mut Vec<HdfMessage>,
336        continuations: &mut Vec<(u64, u64)>,
337    ) -> Result<()> {
338        let mut cursor = base.at_offset(start)?;
339
340        while cursor.position() + 8 <= end {
341            let msg_type = cursor.read_u16_le()?;
342            let msg_data_size = cursor.read_u16_le()? as usize;
343            let msg_flags = cursor.read_u8()?;
344            let _reserved = cursor.read_bytes(3)?; // 3 reserved bytes
345
346            // Bounds-check the message data within this chunk.
347            if cursor.position() + msg_data_size as u64 > end {
348                return Err(Error::InvalidData(format!(
349                    "v1 message data ({} bytes) extends past header chunk end",
350                    msg_data_size
351                )));
352            }
353
354            if msg_type == MSG_TYPE_NIL {
355                // Nil / padding — skip the data bytes.
356                cursor.skip(msg_data_size)?;
357                messages.push(HdfMessage::Nil);
358                continue;
359            }
360
361            let msg_data = cursor.read_bytes(msg_data_size)?;
362            let is_shared = (msg_flags & 0x02) != 0;
363
364            if is_shared {
365                // Shared message — the stored bytes are a shared-message
366                // reference, not the message payload itself.
367                let shared_msg = crate::messages::shared::parse(
368                    &mut Cursor::new(msg_data),
369                    msg_type,
370                    offset_size,
371                    length_size,
372                    msg_data_size,
373                )?;
374                messages.push(HdfMessage::Shared(shared_msg));
375            } else if msg_type == MSG_TYPE_CONTINUATION {
376                // Parse the continuation message to get offset + length, then
377                // enqueue it for later processing.
378                let cont = crate::messages::continuation::parse(
379                    &mut Cursor::new(msg_data),
380                    offset_size,
381                    length_size,
382                    msg_data_size,
383                )?;
384                continuations.push((cont.offset, cont.length));
385                messages.push(HdfMessage::ObjectHeaderContinuation);
386            } else {
387                let parsed = parse_message(
388                    msg_type,
389                    msg_data.len(),
390                    &mut Cursor::new(msg_data),
391                    offset_size,
392                    length_size,
393                )?;
394                messages.push(parsed);
395            }
396        }
397
398        Ok(())
399    }
400
401    // ------------------------------------------------------------------
402    // Version 2
403    // ------------------------------------------------------------------
404
405    /// Parse a version-2 object header.
406    ///
407    /// Layout:
408    /// ```text
409    ///   signature  4 bytes  ("OHDR")
410    ///   version    u8       (must be 2)
411    ///   flags      u8
412    ///   [optional timestamps — 4 x u32 if bit 5 of flags]
413    ///   [optional attr phase change — 2 x u16 if bit 4 of flags]
414    ///   chunk0_size  1/2/4/8 bytes (encoded size depends on bits 0-1 of flags)
415    ///   <messages for chunk 0>
416    ///   checksum   u32      (Jenkins lookup3 from "OHDR" through last byte before checksum)
417    /// ```
418    fn parse_v2(base: &Cursor<'_>, address: u64, offset_size: u8, length_size: u8) -> Result<Self> {
419        let mut cursor = base.at_offset(address)?;
420
421        // ---- Fixed prefix ----
422        let sig = cursor.read_bytes(4)?;
423        if sig != OHDR_SIGNATURE {
424            return Err(Error::InvalidObjectHeaderSignature);
425        }
426        let version = cursor.read_u8()?;
427        if version != 2 {
428            return Err(Error::UnsupportedObjectHeaderVersion(version));
429        }
430        let flags = cursor.read_u8()?;
431
432        // Bit 5 — timestamps stored.
433        let modification_time = if (flags & 0x20) != 0 {
434            let _access_time = cursor.read_u32_le()?;
435            let mod_time = cursor.read_u32_le()?;
436            let _change_time = cursor.read_u32_le()?;
437            let _birth_time = cursor.read_u32_le()?;
438            Some(mod_time)
439        } else {
440            None
441        };
442
443        // Bit 4 — non-default attribute storage phase change values.
444        if (flags & 0x10) != 0 {
445            let _max_compact = cursor.read_u16_le()?;
446            let _min_dense = cursor.read_u16_le()?;
447        }
448
449        // Chunk#0 size — width depends on bits 0-1 of flags.
450        let size_field_width = 1usize << (flags & 0x03);
451        let chunk0_data_size = cursor.read_uvar(size_field_width)?;
452
453        // Bit 2 — attribute creation order tracked (affects per-message envelope).
454        let creation_order_tracked = (flags & 0x04) != 0;
455
456        // Messages for chunk 0 run from the current position for
457        // `chunk0_data_size` bytes.  The last 4 bytes of that range are the
458        // checksum.
459        let messages_start = cursor.position();
460        let chunk0_end = messages_start + chunk0_data_size;
461
462        // The checksum covers everything from "OHDR" through the last byte
463        // before the checksum field.
464        let checksum_start = address as usize;
465        let checksum_end = chunk0_end as usize; // the checksum itself sits at chunk0_end
466        let stored_checksum = {
467            let mut ck = base.at_offset(chunk0_end)?;
468            ck.read_u32_le()?
469        };
470        let computed = jenkins_lookup3(&base.data()[checksum_start..checksum_end]);
471        if computed != stored_checksum {
472            return Err(Error::ChecksumMismatch {
473                expected: stored_checksum,
474                actual: computed,
475            });
476        }
477
478        let mut messages: Vec<HdfMessage> = Vec::new();
479        let mut continuations: Vec<(u64, u64)> = Vec::new();
480
481        Self::read_v2_messages(
482            base,
483            messages_start,
484            chunk0_end,
485            offset_size,
486            length_size,
487            creation_order_tracked,
488            &mut messages,
489            &mut continuations,
490        )?;
491
492        // Follow continuation chunks.
493        while let Some((cont_offset, cont_length)) = continuations.pop() {
494            Self::read_v2_continuation_chunk(
495                base,
496                cont_offset,
497                cont_length,
498                offset_size,
499                length_size,
500                creation_order_tracked,
501                &mut messages,
502                &mut continuations,
503            )?;
504        }
505
506        Ok(ObjectHeader {
507            version: 2,
508            messages,
509            reference_count: 0, // v2 does not store a reference count in the header
510            modification_time,
511        })
512    }
513
514    fn parse_v2_storage(
515        storage: &dyn Storage,
516        address: u64,
517        offset_size: u8,
518        length_size: u8,
519    ) -> Result<Self> {
520        let prefix = storage.read_range(address, 64)?;
521        let mut cursor = Cursor::new(prefix.as_ref());
522
523        let sig = cursor.read_bytes(4)?;
524        if sig != OHDR_SIGNATURE {
525            return Err(Error::InvalidObjectHeaderSignature);
526        }
527        let version = cursor.read_u8()?;
528        if version != 2 {
529            return Err(Error::UnsupportedObjectHeaderVersion(version));
530        }
531        let flags = cursor.read_u8()?;
532
533        let modification_time = if (flags & 0x20) != 0 {
534            let _access_time = cursor.read_u32_le()?;
535            let mod_time = cursor.read_u32_le()?;
536            let _change_time = cursor.read_u32_le()?;
537            let _birth_time = cursor.read_u32_le()?;
538            Some(mod_time)
539        } else {
540            None
541        };
542
543        if (flags & 0x10) != 0 {
544            let _max_compact = cursor.read_u16_le()?;
545            let _min_dense = cursor.read_u16_le()?;
546        }
547
548        let size_field_width = 1usize << (flags & 0x03);
549        let chunk0_data_size = cursor.read_uvar(size_field_width)?;
550        let creation_order_tracked = (flags & 0x04) != 0;
551        let messages_start = cursor.position() as usize;
552        let chunk0_end = messages_start + chunk0_data_size as usize;
553
554        let chunk = storage.read_range(address, chunk0_end + 4)?;
555        let stored_checksum = u32::from_le_bytes(
556            chunk.as_ref()[chunk0_end..chunk0_end + 4]
557                .try_into()
558                .unwrap(),
559        );
560        let computed = jenkins_lookup3(&chunk.as_ref()[..chunk0_end]);
561        if computed != stored_checksum {
562            return Err(Error::ChecksumMismatch {
563                expected: stored_checksum,
564                actual: computed,
565            });
566        }
567
568        let mut messages = Vec::new();
569        let mut continuations = Vec::new();
570        Self::read_v2_messages_from_slice(
571            &chunk.as_ref()[messages_start..chunk0_end],
572            offset_size,
573            length_size,
574            creation_order_tracked,
575            &mut messages,
576            &mut continuations,
577        )?;
578
579        while let Some((cont_offset, cont_length)) = continuations.pop() {
580            Self::read_v2_continuation_chunk_storage(
581                storage,
582                cont_offset,
583                cont_length,
584                offset_size,
585                length_size,
586                creation_order_tracked,
587                &mut messages,
588                &mut continuations,
589            )?;
590        }
591
592        Ok(ObjectHeader {
593            version: 2,
594            messages,
595            reference_count: 0,
596            modification_time,
597        })
598    }
599
600    /// Read v2 messages from `start..end`.
601    #[allow(clippy::too_many_arguments)]
602    fn read_v2_messages(
603        base: &Cursor<'_>,
604        start: u64,
605        end: u64,
606        offset_size: u8,
607        length_size: u8,
608        creation_order_tracked: bool,
609        messages: &mut Vec<HdfMessage>,
610        continuations: &mut Vec<(u64, u64)>,
611    ) -> Result<()> {
612        let mut cursor = base.at_offset(start)?;
613
614        // Minimum envelope: type(1) + size(2) + flags(1) = 4 bytes, optionally
615        // +2 for creation order.
616        let min_envelope = if creation_order_tracked { 6 } else { 4 };
617
618        while cursor.position() + min_envelope as u64 <= end {
619            let msg_type = cursor.read_u8()? as u16;
620            let msg_data_size = cursor.read_u16_le()? as usize;
621            let msg_flags = cursor.read_u8()?;
622
623            if creation_order_tracked {
624                let _creation_order = cursor.read_u16_le()?;
625            }
626
627            if msg_type == MSG_TYPE_NIL {
628                if msg_data_size == 0
629                    && base.data()[cursor.position() as usize..end as usize]
630                        .iter()
631                        .all(|byte| *byte == 0)
632                {
633                    break;
634                }
635                cursor.skip(msg_data_size)?;
636                messages.push(HdfMessage::Nil);
637                continue;
638            }
639
640            if cursor.position() + msg_data_size as u64 > end {
641                return Err(Error::InvalidData(format!(
642                    "v2 message data ({} bytes) extends past chunk end",
643                    msg_data_size
644                )));
645            }
646
647            let msg_data = cursor.read_bytes(msg_data_size)?;
648            let is_shared = (msg_flags & 0x02) != 0;
649
650            if is_shared {
651                let shared_msg = crate::messages::shared::parse(
652                    &mut Cursor::new(msg_data),
653                    msg_type,
654                    offset_size,
655                    length_size,
656                    msg_data_size,
657                )?;
658                messages.push(HdfMessage::Shared(shared_msg));
659            } else if msg_type == MSG_TYPE_CONTINUATION {
660                let cont = crate::messages::continuation::parse(
661                    &mut Cursor::new(msg_data),
662                    offset_size,
663                    length_size,
664                    msg_data_size,
665                )?;
666                continuations.push((cont.offset, cont.length));
667                messages.push(HdfMessage::ObjectHeaderContinuation);
668            } else {
669                let parsed = parse_message(
670                    msg_type,
671                    msg_data.len(),
672                    &mut Cursor::new(msg_data),
673                    offset_size,
674                    length_size,
675                )?;
676                messages.push(parsed);
677            }
678        }
679
680        Ok(())
681    }
682
683    fn read_v1_messages_from_slice(
684        data: &[u8],
685        offset_size: u8,
686        length_size: u8,
687        messages: &mut Vec<HdfMessage>,
688        continuations: &mut Vec<(u64, u64)>,
689    ) -> Result<()> {
690        let mut cursor = Cursor::new(data);
691        while cursor.remaining() >= 8 {
692            let msg_type = cursor.read_u16_le()?;
693            let msg_data_size = cursor.read_u16_le()? as usize;
694            let msg_flags = cursor.read_u8()?;
695            let _reserved = cursor.read_bytes(3)?;
696
697            if cursor.remaining() < msg_data_size as u64 {
698                return Err(Error::InvalidData(format!(
699                    "v1 message data ({} bytes) extends past header chunk end",
700                    msg_data_size
701                )));
702            }
703
704            if msg_type == MSG_TYPE_NIL {
705                cursor.skip(msg_data_size)?;
706                messages.push(HdfMessage::Nil);
707                continue;
708            }
709
710            let msg_data = cursor.read_bytes(msg_data_size)?;
711            let is_shared = (msg_flags & 0x02) != 0;
712            if is_shared {
713                let shared_msg = crate::messages::shared::parse(
714                    &mut Cursor::new(msg_data),
715                    msg_type,
716                    offset_size,
717                    length_size,
718                    msg_data_size,
719                )?;
720                messages.push(HdfMessage::Shared(shared_msg));
721            } else if msg_type == MSG_TYPE_CONTINUATION {
722                let cont = crate::messages::continuation::parse(
723                    &mut Cursor::new(msg_data),
724                    offset_size,
725                    length_size,
726                    msg_data_size,
727                )?;
728                continuations.push((cont.offset, cont.length));
729                messages.push(HdfMessage::ObjectHeaderContinuation);
730            } else {
731                let parsed = parse_message(
732                    msg_type,
733                    msg_data.len(),
734                    &mut Cursor::new(msg_data),
735                    offset_size,
736                    length_size,
737                )?;
738                messages.push(parsed);
739            }
740        }
741        Ok(())
742    }
743
744    fn read_v2_messages_from_slice(
745        data: &[u8],
746        offset_size: u8,
747        length_size: u8,
748        creation_order_tracked: bool,
749        messages: &mut Vec<HdfMessage>,
750        continuations: &mut Vec<(u64, u64)>,
751    ) -> Result<()> {
752        let mut cursor = Cursor::new(data);
753        let min_envelope = if creation_order_tracked { 6 } else { 4 };
754
755        while cursor.remaining() >= min_envelope as u64 {
756            let msg_type = cursor.read_u8()? as u16;
757            let msg_data_size = cursor.read_u16_le()? as usize;
758            let msg_flags = cursor.read_u8()?;
759
760            if creation_order_tracked {
761                let _creation_order = cursor.read_u16_le()?;
762            }
763
764            if msg_type == MSG_TYPE_NIL {
765                if msg_data_size == 0
766                    && data[cursor.position() as usize..]
767                        .iter()
768                        .all(|byte| *byte == 0)
769                {
770                    break;
771                }
772                cursor.skip(msg_data_size)?;
773                messages.push(HdfMessage::Nil);
774                continue;
775            }
776
777            if cursor.remaining() < msg_data_size as u64 {
778                return Err(Error::InvalidData(format!(
779                    "v2 message data ({} bytes) extends past chunk end",
780                    msg_data_size
781                )));
782            }
783
784            let msg_data = cursor.read_bytes(msg_data_size)?;
785            let is_shared = (msg_flags & 0x02) != 0;
786            if is_shared {
787                let shared_msg = crate::messages::shared::parse(
788                    &mut Cursor::new(msg_data),
789                    msg_type,
790                    offset_size,
791                    length_size,
792                    msg_data_size,
793                )?;
794                messages.push(HdfMessage::Shared(shared_msg));
795            } else if msg_type == MSG_TYPE_CONTINUATION {
796                let cont = crate::messages::continuation::parse(
797                    &mut Cursor::new(msg_data),
798                    offset_size,
799                    length_size,
800                    msg_data_size,
801                )?;
802                continuations.push((cont.offset, cont.length));
803                messages.push(HdfMessage::ObjectHeaderContinuation);
804            } else {
805                let parsed = parse_message(
806                    msg_type,
807                    msg_data.len(),
808                    &mut Cursor::new(msg_data),
809                    offset_size,
810                    length_size,
811                )?;
812                messages.push(parsed);
813            }
814        }
815
816        Ok(())
817    }
818
819    #[allow(clippy::too_many_arguments)]
820    fn read_v2_continuation_chunk_storage(
821        storage: &dyn Storage,
822        cont_offset: u64,
823        cont_length: u64,
824        offset_size: u8,
825        length_size: u8,
826        creation_order_tracked: bool,
827        messages: &mut Vec<HdfMessage>,
828        continuations: &mut Vec<(u64, u64)>,
829    ) -> Result<()> {
830        let chunk = storage.read_range(cont_offset, cont_length as usize)?;
831        if chunk.len() < 8 || chunk.as_ref()[..4] != OCHK_SIGNATURE {
832            return Err(Error::InvalidObjectHeaderSignature);
833        }
834        let messages_end = chunk.len() - 4;
835        let stored_checksum = u32::from_le_bytes(
836            chunk.as_ref()[messages_end..messages_end + 4]
837                .try_into()
838                .unwrap(),
839        );
840        let computed = jenkins_lookup3(&chunk.as_ref()[..messages_end]);
841        if computed != stored_checksum {
842            return Err(Error::ChecksumMismatch {
843                expected: stored_checksum,
844                actual: computed,
845            });
846        }
847
848        Self::read_v2_messages_from_slice(
849            &chunk.as_ref()[4..messages_end],
850            offset_size,
851            length_size,
852            creation_order_tracked,
853            messages,
854            continuations,
855        )
856    }
857
858    /// Read and verify a v2 continuation chunk (`OCHK`).
859    #[allow(clippy::too_many_arguments)]
860    ///
861    /// Layout:
862    /// ```text
863    ///   "OCHK"    4 bytes
864    ///   messages  (cont_length - 4 - 4) bytes
865    ///   checksum  u32
866    /// ```
867    fn read_v2_continuation_chunk(
868        base: &Cursor<'_>,
869        cont_offset: u64,
870        cont_length: u64,
871        offset_size: u8,
872        length_size: u8,
873        creation_order_tracked: bool,
874        messages: &mut Vec<HdfMessage>,
875        continuations: &mut Vec<(u64, u64)>,
876    ) -> Result<()> {
877        let mut cursor = base.at_offset(cont_offset)?;
878
879        let sig = cursor.read_bytes(4)?;
880        if sig != OCHK_SIGNATURE {
881            return Err(Error::InvalidObjectHeaderSignature);
882        }
883
884        let chunk_end = cont_offset + cont_length;
885        // The last 4 bytes of the chunk are the checksum.
886        let messages_end = chunk_end - 4;
887        let messages_start = cursor.position(); // right after "OCHK"
888
889        // Verify checksum: covers "OCHK" through the byte before the checksum.
890        let checksum_start = cont_offset as usize;
891        let checksum_end = messages_end as usize;
892        let stored_checksum = {
893            let mut ck = base.at_offset(messages_end)?;
894            ck.read_u32_le()?
895        };
896        let computed = jenkins_lookup3(&base.data()[checksum_start..checksum_end]);
897        if computed != stored_checksum {
898            return Err(Error::ChecksumMismatch {
899                expected: stored_checksum,
900                actual: computed,
901            });
902        }
903
904        Self::read_v2_messages(
905            base,
906            messages_start,
907            messages_end,
908            offset_size,
909            length_size,
910            creation_order_tracked,
911            messages,
912            continuations,
913        )
914    }
915}
916
917fn select_shared_message(header: ObjectHeader, message_type: u16) -> Option<HdfMessage> {
918    let mut first_real_message = None;
919    for message in header.messages {
920        match message {
921            HdfMessage::Nil | HdfMessage::ObjectHeaderContinuation | HdfMessage::Shared(_) => {
922                continue;
923            }
924            other if message_matches_type(&other, message_type) => return Some(other),
925            other => {
926                if first_real_message.is_none() {
927                    first_real_message = Some(other);
928                }
929            }
930        }
931    }
932    first_real_message
933}
934
935fn message_matches_type(message: &HdfMessage, message_type: u16) -> bool {
936    use crate::messages::*;
937
938    matches!(
939        (message_type, message),
940        (MSG_DATASPACE, HdfMessage::Dataspace(_))
941            | (MSG_DATATYPE, HdfMessage::Datatype(_))
942            | (MSG_FILL_VALUE, HdfMessage::FillValue(_))
943            | (MSG_FILL_VALUE_OLD, HdfMessage::FillValue(_))
944            | (MSG_DATA_LAYOUT, HdfMessage::DataLayout(_))
945            | (MSG_FILTER_PIPELINE, HdfMessage::FilterPipeline(_))
946            | (MSG_ATTRIBUTE, HdfMessage::Attribute(_))
947            | (MSG_ATTRIBUTE_INFO, HdfMessage::AttributeInfo(_))
948            | (MSG_LINK, HdfMessage::Link(_))
949            | (MSG_LINK_INFO, HdfMessage::LinkInfo(_))
950            | (MSG_GROUP_INFO, HdfMessage::GroupInfo(_))
951            | (MSG_SYMBOL_TABLE, HdfMessage::SymbolTable(_))
952            | (MSG_CONTINUATION, HdfMessage::Continuation(_))
953            | (MSG_MODIFICATION_TIME, HdfMessage::ModificationTime(_))
954            | (MSG_MODIFICATION_TIME_OLD, HdfMessage::ModificationTime(_))
955            | (MSG_BTREE_K, HdfMessage::BTreeK(_))
956            | (MSG_EXTERNAL_FILES, HdfMessage::ExternalFiles(_))
957            | (MSG_SHARED_TABLE, HdfMessage::SharedTable(_))
958            | (MSG_COMMENT, HdfMessage::Comment(_))
959            | (MSG_REFERENCE_COUNT, HdfMessage::ReferenceCount(_))
960    )
961}
962
963#[cfg(test)]
964mod tests {
965    use super::*;
966    use crate::checksum::jenkins_lookup3;
967
968    // ------------------------------------------------------------------
969    // Helpers
970    // ------------------------------------------------------------------
971
972    /// Build a v1 object header containing the given pre-encoded messages.
973    /// Each entry in `raw_messages` is `(type_id, flags, payload)`.
974    fn build_v1_header(raw_messages: &[(u16, u8, &[u8])], ref_count: u32) -> Vec<u8> {
975        // Compute total message data size.
976        let data_size: usize = raw_messages
977            .iter()
978            .map(|(_, _, payload)| 8 + payload.len()) // 8-byte envelope per message
979            .sum();
980
981        let mut buf = Vec::new();
982        // Version
983        buf.push(1);
984        // Reserved
985        buf.push(0);
986        // Number of messages
987        buf.extend_from_slice(&(raw_messages.len() as u16).to_le_bytes());
988        // Reference count
989        buf.extend_from_slice(&ref_count.to_le_bytes());
990        // Header data size
991        buf.extend_from_slice(&(data_size as u32).to_le_bytes());
992        // Reserved padding (4 bytes)
993        buf.extend_from_slice(&[0u8; 4]);
994
995        // Messages
996        for (type_id, flags, payload) in raw_messages {
997            buf.extend_from_slice(&type_id.to_le_bytes());
998            buf.extend_from_slice(&(payload.len() as u16).to_le_bytes());
999            buf.push(*flags);
1000            buf.extend_from_slice(&[0u8; 3]); // reserved
1001            buf.extend_from_slice(payload);
1002        }
1003
1004        buf
1005    }
1006
1007    /// Build a v2 OHDR chunk#0 with the given raw messages.
1008    /// `flags` controls the header flags byte.  Timestamps and phase-change
1009    /// values are added automatically when the corresponding flag bits are set.
1010    /// Each entry in `raw_messages` is `(type_id, flags, payload)`.
1011    /// Returns the complete OHDR block including the trailing checksum.
1012    fn build_v2_header(
1013        header_flags: u8,
1014        raw_messages: &[(u8, u8, &[u8])],
1015        timestamps: Option<[u32; 4]>,
1016        phase_change: Option<(u16, u16)>,
1017    ) -> Vec<u8> {
1018        let creation_order = (header_flags & 0x04) != 0;
1019
1020        // Compute message data size.
1021        let envelope_size: usize = if creation_order { 6 } else { 4 };
1022        let msg_data_size: usize = raw_messages
1023            .iter()
1024            .map(|(_, _, payload)| envelope_size + payload.len())
1025            .sum();
1026
1027        let mut buf = Vec::new();
1028        // Signature
1029        buf.extend_from_slice(&OHDR_SIGNATURE);
1030        // Version
1031        buf.push(2);
1032        // Flags
1033        buf.push(header_flags);
1034
1035        // Timestamps (bit 5)
1036        if let Some(ts) = timestamps {
1037            for &t in &ts {
1038                buf.extend_from_slice(&t.to_le_bytes());
1039            }
1040        }
1041
1042        // Phase change (bit 4)
1043        if let Some((max_compact, min_dense)) = phase_change {
1044            buf.extend_from_slice(&max_compact.to_le_bytes());
1045            buf.extend_from_slice(&min_dense.to_le_bytes());
1046        }
1047
1048        // Chunk#0 size field — encode using the width dictated by bits 0-1.
1049        let size_width = 1usize << (header_flags & 0x03);
1050        match size_width {
1051            1 => buf.push(msg_data_size as u8),
1052            2 => buf.extend_from_slice(&(msg_data_size as u16).to_le_bytes()),
1053            4 => buf.extend_from_slice(&(msg_data_size as u32).to_le_bytes()),
1054            8 => buf.extend_from_slice(&(msg_data_size as u64).to_le_bytes()),
1055            _ => unreachable!(),
1056        }
1057
1058        // Messages
1059        for (type_id, mflags, payload) in raw_messages {
1060            buf.push(*type_id);
1061            buf.extend_from_slice(&(payload.len() as u16).to_le_bytes());
1062            buf.push(*mflags);
1063            if creation_order {
1064                buf.extend_from_slice(&0u16.to_le_bytes());
1065            }
1066            buf.extend_from_slice(payload);
1067        }
1068
1069        // Checksum — covers everything so far.
1070        let ck = jenkins_lookup3(&buf);
1071        buf.extend_from_slice(&ck.to_le_bytes());
1072
1073        buf
1074    }
1075
1076    /// Build a v2 OCHK continuation chunk containing the given raw messages.
1077    fn build_v2_ochk(raw_messages: &[(u8, u8, &[u8])], creation_order: bool) -> Vec<u8> {
1078        let mut buf = Vec::new();
1079        // Signature
1080        buf.extend_from_slice(&OCHK_SIGNATURE);
1081
1082        // Messages
1083        for (type_id, mflags, payload) in raw_messages {
1084            buf.push(*type_id);
1085            buf.extend_from_slice(&(payload.len() as u16).to_le_bytes());
1086            buf.push(*mflags);
1087            if creation_order {
1088                buf.extend_from_slice(&0u16.to_le_bytes());
1089            }
1090            buf.extend_from_slice(payload);
1091        }
1092
1093        // Checksum over everything before the checksum itself.
1094        let ck = jenkins_lookup3(&buf);
1095        buf.extend_from_slice(&ck.to_le_bytes());
1096
1097        buf
1098    }
1099
1100    // ------------------------------------------------------------------
1101    // Tests — Version 1
1102    // ------------------------------------------------------------------
1103
1104    #[test]
1105    fn v1_empty_header() {
1106        let data = build_v1_header(&[], 1);
1107        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1108        assert_eq!(hdr.version, 1);
1109        assert_eq!(hdr.reference_count, 1);
1110        assert!(hdr.messages.is_empty());
1111        assert!(hdr.modification_time.is_none());
1112    }
1113
1114    #[test]
1115    fn v1_nil_message() {
1116        // A single nil message with 4 bytes of padding payload.
1117        let data = build_v1_header(&[(0x0000, 0, &[0u8; 4])], 1);
1118        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1119        assert_eq!(hdr.messages.len(), 1);
1120        assert!(matches!(hdr.messages[0], HdfMessage::Nil));
1121    }
1122
1123    #[test]
1124    fn v1_unknown_message() {
1125        // An unknown message type should be stored as HdfMessage::Unknown.
1126        let payload = [0xAA, 0xBB, 0xCC];
1127        let data = build_v1_header(&[(0x00FF, 0, &payload)], 2);
1128        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1129        assert_eq!(hdr.reference_count, 2);
1130        assert_eq!(hdr.messages.len(), 1);
1131        match &hdr.messages[0] {
1132            HdfMessage::Unknown { type_id, data } => {
1133                assert_eq!(*type_id, 0x00FF);
1134                assert_eq!(data.as_slice(), &payload);
1135            }
1136            other => panic!("expected Unknown, got {:?}", other),
1137        }
1138    }
1139
1140    #[test]
1141    fn v1_symbol_table_message() {
1142        // Type 0x0011 — symbol table message.
1143        // Payload: btree address (8 bytes) + heap address (8 bytes).
1144        let mut payload = Vec::new();
1145        payload.extend_from_slice(&0x1000u64.to_le_bytes());
1146        payload.extend_from_slice(&0x2000u64.to_le_bytes());
1147
1148        let data = build_v1_header(&[(0x0011, 0, &payload)], 1);
1149        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1150        assert_eq!(hdr.messages.len(), 1);
1151        match &hdr.messages[0] {
1152            HdfMessage::SymbolTable(st) => {
1153                assert_eq!(st.btree_address, 0x1000);
1154                assert_eq!(st.heap_address, 0x2000);
1155            }
1156            other => panic!("expected SymbolTable, got {:?}", other),
1157        }
1158    }
1159
1160    #[test]
1161    fn v1_continuation_message() {
1162        // Build a continuation payload that points to a second chunk.
1163        // The second chunk contains one unknown message.
1164        let unknown_payload = [0xDD; 2];
1165
1166        // Build the continuation target (a raw v1 message run, no header prefix).
1167        let mut cont_chunk = Vec::new();
1168        // message type 0x00FE
1169        cont_chunk.extend_from_slice(&0x00FEu16.to_le_bytes());
1170        // message data size
1171        cont_chunk.extend_from_slice(&(unknown_payload.len() as u16).to_le_bytes());
1172        // flags
1173        cont_chunk.push(0);
1174        // reserved
1175        cont_chunk.extend_from_slice(&[0u8; 3]);
1176        // payload
1177        cont_chunk.extend_from_slice(&unknown_payload);
1178
1179        // We will place the continuation chunk after the main header.
1180        // First build the main header with a continuation message.
1181        let main_header_base_size = 16; // v1 prefix
1182                                        // The continuation message envelope = 8, payload = offset_size + length_size.
1183                                        // With offset_size=8, length_size=8, the continuation payload is 16 bytes.
1184        let cont_msg_envelope_size = 8 + 16; // 24
1185        let cont_chunk_offset = (main_header_base_size + cont_msg_envelope_size) as u64;
1186
1187        let mut cont_payload = Vec::new();
1188        cont_payload.extend_from_slice(&cont_chunk_offset.to_le_bytes()); // offset
1189        cont_payload.extend_from_slice(&(cont_chunk.len() as u64).to_le_bytes()); // length
1190
1191        let main_header = build_v1_header(&[(MSG_TYPE_CONTINUATION, 0, &cont_payload)], 1);
1192
1193        // Concatenate main header + continuation chunk.
1194        let mut file_data = main_header;
1195        assert_eq!(file_data.len() as u64, cont_chunk_offset);
1196        file_data.extend_from_slice(&cont_chunk);
1197
1198        let hdr = ObjectHeader::parse_at(&file_data, 0, 8, 8).unwrap();
1199        // Should have the continuation marker + the unknown message from the continuation chunk.
1200        assert_eq!(hdr.messages.len(), 2);
1201        assert!(matches!(
1202            hdr.messages[0],
1203            HdfMessage::ObjectHeaderContinuation
1204        ));
1205        match &hdr.messages[1] {
1206            HdfMessage::Unknown { type_id, data } => {
1207                assert_eq!(*type_id, 0x00FE);
1208                assert_eq!(data.as_slice(), &unknown_payload);
1209            }
1210            other => panic!("expected Unknown from continuation, got {:?}", other),
1211        }
1212    }
1213
1214    #[test]
1215    fn v1_nonzero_address_offset() {
1216        // Place the header at a non-zero offset in the file.
1217        let prefix_pad = vec![0xFFu8; 64];
1218        let header = build_v1_header(&[(0x00AA, 0, &[0x01])], 3);
1219
1220        let mut file_data = prefix_pad;
1221        file_data.extend_from_slice(&header);
1222
1223        let hdr = ObjectHeader::parse_at(&file_data, 64, 8, 8).unwrap();
1224        assert_eq!(hdr.version, 1);
1225        assert_eq!(hdr.reference_count, 3);
1226        assert_eq!(hdr.messages.len(), 1);
1227    }
1228
1229    #[test]
1230    fn v1_bad_version() {
1231        let mut data = build_v1_header(&[], 1);
1232        data[0] = 3; // corrupt version to 3
1233        let err = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap_err();
1234        assert!(matches!(err, Error::UnsupportedObjectHeaderVersion(3)));
1235    }
1236
1237    // ------------------------------------------------------------------
1238    // Tests — Version 2
1239    // ------------------------------------------------------------------
1240
1241    #[test]
1242    fn v2_empty_header() {
1243        // Flags=0 → 1-byte size field, no timestamps, no phase change, no creation order.
1244        let data = build_v2_header(0x00, &[], None, None);
1245        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1246        assert_eq!(hdr.version, 2);
1247        assert!(hdr.messages.is_empty());
1248        assert!(hdr.modification_time.is_none());
1249    }
1250
1251    #[test]
1252    fn v2_nil_message() {
1253        let data = build_v2_header(0x00, &[(0x00, 0, &[0u8; 3])], None, None);
1254        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1255        assert_eq!(hdr.messages.len(), 1);
1256        assert!(matches!(hdr.messages[0], HdfMessage::Nil));
1257    }
1258
1259    #[test]
1260    fn v2_unknown_message() {
1261        let payload = [0x11, 0x22];
1262        let data = build_v2_header(0x00, &[(0xFE, 0, &payload)], None, None);
1263        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1264        assert_eq!(hdr.messages.len(), 1);
1265        match &hdr.messages[0] {
1266            HdfMessage::Unknown { type_id, data } => {
1267                assert_eq!(*type_id, 0x00FE);
1268                assert_eq!(data.as_slice(), &payload);
1269            }
1270            other => panic!("expected Unknown, got {:?}", other),
1271        }
1272    }
1273
1274    #[test]
1275    fn v2_with_timestamps() {
1276        // Flags: bit 5 (timestamps) + bits 0-1 = 0 (1-byte size field).
1277        let flags = 0x20;
1278        let ts = [1000u32, 2000, 3000, 4000]; // access, modification, change, birth
1279        let data = build_v2_header(flags, &[], Some(ts), None);
1280        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1281        assert_eq!(hdr.modification_time, Some(2000));
1282    }
1283
1284    #[test]
1285    fn v2_with_phase_change() {
1286        // Flags: bit 4 (phase change) + bits 0-1 = 0.
1287        let flags = 0x10;
1288        let data = build_v2_header(flags, &[], None, Some((8, 6)));
1289        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1290        assert!(hdr.messages.is_empty());
1291    }
1292
1293    #[test]
1294    fn v2_with_creation_order() {
1295        // Flags: bit 2 (creation order tracked) + bits 0-1 = 0.
1296        let flags = 0x04;
1297        let payload = [0xAA];
1298        let data = build_v2_header(flags, &[(0xFE, 0, &payload)], None, None);
1299        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1300        assert_eq!(hdr.messages.len(), 1);
1301        match &hdr.messages[0] {
1302            HdfMessage::Unknown { type_id, .. } => assert_eq!(*type_id, 0x00FE),
1303            other => panic!("expected Unknown, got {:?}", other),
1304        }
1305    }
1306
1307    #[test]
1308    fn v2_2byte_size_field() {
1309        // bits 0-1 = 1 → 2-byte size field.
1310        let flags = 0x01;
1311        let payload = [0x42; 5];
1312        let data = build_v2_header(flags, &[(0xFE, 0, &payload)], None, None);
1313        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1314        assert_eq!(hdr.messages.len(), 1);
1315    }
1316
1317    #[test]
1318    fn v2_4byte_size_field() {
1319        // bits 0-1 = 2 → 4-byte size field.
1320        let flags = 0x02;
1321        let data = build_v2_header(flags, &[(0xFE, 0, &[0x01])], None, None);
1322        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1323        assert_eq!(hdr.messages.len(), 1);
1324    }
1325
1326    #[test]
1327    fn v2_8byte_size_field() {
1328        // bits 0-1 = 3 → 8-byte size field.
1329        let flags = 0x03;
1330        let data = build_v2_header(flags, &[(0xFE, 0, &[0x01])], None, None);
1331        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1332        assert_eq!(hdr.messages.len(), 1);
1333    }
1334
1335    #[test]
1336    fn v2_checksum_mismatch() {
1337        let mut data = build_v2_header(0x00, &[(0xFE, 0, &[0x01])], None, None);
1338        // Corrupt the last byte (part of checksum).
1339        let last = data.len() - 1;
1340        data[last] ^= 0xFF;
1341        let err = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap_err();
1342        assert!(matches!(err, Error::ChecksumMismatch { .. }));
1343    }
1344
1345    #[test]
1346    fn v2_continuation_chunk() {
1347        // Build a continuation chunk (OCHK) that holds one unknown message.
1348        let unknown_payload = [0xCC; 3];
1349        let ochk = build_v2_ochk(&[(0xFD, 0, &unknown_payload)], false);
1350
1351        // The continuation message payload is offset(8) + length(8) = 16 bytes.
1352        // We will compute the offset of the OCHK after building the main OHDR.
1353        // Strategy: build OHDR first with a placeholder, measure its size,
1354        // set the actual offset, then rebuild.
1355
1356        // Placeholder continuation payload (will rewrite).
1357        let mut cont_payload = vec![0u8; 16];
1358
1359        // Build OHDR with the continuation message.  The OHDR occupies:
1360        //   4 (sig) + 1 (ver) + 1 (flags) + 1 (size field, flags=0) + messages + 4 (checksum)
1361        // Message envelope: type(1) + size(2) + flags(1) = 4; payload = 16.
1362        // Total OHDR = 4 + 1 + 1 + 1 + 4 + 16 + 4 = 31 bytes.
1363        // The OCHK starts at byte 31.
1364
1365        // We need the offset to be the byte where OCHK starts.
1366        // OHDR: sig(4) + ver(1) + flags(1) + size(1) + [envelope(4)+payload(16)] + checksum(4) = 31
1367        let ohdr_size = 4 + 1 + 1 + 1 + (4 + cont_payload.len()) + 4;
1368        let ochk_offset = ohdr_size as u64;
1369
1370        // Rebuild continuation payload with correct offset.
1371        cont_payload.clear();
1372        cont_payload.extend_from_slice(&ochk_offset.to_le_bytes());
1373        cont_payload.extend_from_slice(&(ochk.len() as u64).to_le_bytes());
1374
1375        let ohdr = build_v2_header(0x00, &[(0x10, 0, &cont_payload)], None, None);
1376        assert_eq!(ohdr.len(), ohdr_size);
1377
1378        let mut file_data = ohdr;
1379        file_data.extend_from_slice(&ochk);
1380
1381        let hdr = ObjectHeader::parse_at(&file_data, 0, 8, 8).unwrap();
1382        // Should have: continuation marker + unknown message from OCHK.
1383        assert_eq!(hdr.messages.len(), 2);
1384        assert!(matches!(
1385            hdr.messages[0],
1386            HdfMessage::ObjectHeaderContinuation
1387        ));
1388        match &hdr.messages[1] {
1389            HdfMessage::Unknown { type_id, data } => {
1390                assert_eq!(*type_id, 0x00FD);
1391                assert_eq!(data.as_slice(), &unknown_payload);
1392            }
1393            other => panic!("expected Unknown from OCHK, got {:?}", other),
1394        }
1395    }
1396
1397    #[test]
1398    fn v2_ochk_checksum_mismatch() {
1399        let unknown_payload = [0xCC; 3];
1400        let mut ochk = build_v2_ochk(&[(0xFD, 0, &unknown_payload)], false);
1401        // Corrupt OCHK checksum.
1402        let last = ochk.len() - 1;
1403        ochk[last] ^= 0xFF;
1404
1405        let ohdr_size = 4 + 1 + 1 + 1 + (4 + 16) + 4; // 31
1406        let ochk_offset = ohdr_size as u64;
1407
1408        let mut cont_payload = Vec::new();
1409        cont_payload.extend_from_slice(&ochk_offset.to_le_bytes());
1410        cont_payload.extend_from_slice(&(ochk.len() as u64).to_le_bytes());
1411
1412        let ohdr = build_v2_header(0x00, &[(0x10, 0, &cont_payload)], None, None);
1413        let mut file_data = ohdr;
1414        file_data.extend_from_slice(&ochk);
1415
1416        let err = ObjectHeader::parse_at(&file_data, 0, 8, 8).unwrap_err();
1417        assert!(matches!(err, Error::ChecksumMismatch { .. }));
1418    }
1419
1420    #[test]
1421    fn v2_multiple_messages() {
1422        // Two unknown messages in the same chunk.
1423        let p1 = [0x01, 0x02];
1424        let p2 = [0x03, 0x04, 0x05];
1425        let data = build_v2_header(0x00, &[(0xA0, 0, &p1), (0xA1, 0, &p2)], None, None);
1426        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1427        assert_eq!(hdr.messages.len(), 2);
1428        match &hdr.messages[0] {
1429            HdfMessage::Unknown { type_id, .. } => assert_eq!(*type_id, 0x00A0),
1430            other => panic!("expected Unknown 0xA0, got {:?}", other),
1431        }
1432        match &hdr.messages[1] {
1433            HdfMessage::Unknown { type_id, .. } => assert_eq!(*type_id, 0x00A1),
1434            other => panic!("expected Unknown 0xA1, got {:?}", other),
1435        }
1436    }
1437
1438    #[test]
1439    fn v2_zero_length_nil_before_more_messages() {
1440        let p1 = [0xAA];
1441        let p2 = [0xBB];
1442        let data = build_v2_header(
1443            0x04,
1444            &[(0xFE, 0, &p1), (0x00, 0, &[]), (0xFD, 0, &p2)],
1445            None,
1446            None,
1447        );
1448        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1449        assert_eq!(hdr.messages.len(), 3);
1450        assert!(matches!(hdr.messages[0], HdfMessage::Unknown { .. }));
1451        assert!(matches!(hdr.messages[1], HdfMessage::Nil));
1452        assert!(matches!(hdr.messages[2], HdfMessage::Unknown { .. }));
1453    }
1454
1455    #[test]
1456    fn v2_nonzero_address() {
1457        // Place the OHDR at offset 128 in a larger buffer.
1458        let prefix_pad = vec![0u8; 128];
1459        let ohdr = build_v2_header(0x00, &[(0xFE, 0, &[0x42])], None, None);
1460
1461        let mut file_data = prefix_pad;
1462        file_data.extend_from_slice(&ohdr);
1463
1464        let hdr = ObjectHeader::parse_at(&file_data, 128, 8, 8).unwrap();
1465        assert_eq!(hdr.version, 2);
1466        assert_eq!(hdr.messages.len(), 1);
1467    }
1468
1469    #[test]
1470    fn v2_all_flags_combined() {
1471        // Combine timestamps (0x20) + phase change (0x10) + creation order (0x04) + 2-byte size (0x01).
1472        let flags = 0x20 | 0x10 | 0x04 | 0x01;
1473        let ts = [100u32, 200, 300, 400];
1474        let payload = [0xBB];
1475        let data = build_v2_header(flags, &[(0xFE, 0, &payload)], Some(ts), Some((12, 8)));
1476        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1477        assert_eq!(hdr.version, 2);
1478        assert_eq!(hdr.modification_time, Some(200));
1479        assert_eq!(hdr.messages.len(), 1);
1480    }
1481
1482    #[test]
1483    fn v1_multiple_messages() {
1484        // Two messages in a single v1 header.
1485        let p1 = [0xAA; 4];
1486        let p2 = [0xBB; 8];
1487        let data = build_v1_header(&[(0x00FF, 0, &p1), (0x00FE, 0, &p2)], 5);
1488        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1489        assert_eq!(hdr.version, 1);
1490        assert_eq!(hdr.reference_count, 5);
1491        assert_eq!(hdr.messages.len(), 2);
1492    }
1493
1494    #[test]
1495    fn v1_4byte_offsets() {
1496        // Verify correct operation with 4-byte offset/length sizes.
1497        // Symbol table message with 4-byte addresses.
1498        let mut payload = Vec::new();
1499        payload.extend_from_slice(&0x1000u32.to_le_bytes());
1500        payload.extend_from_slice(&0x2000u32.to_le_bytes());
1501
1502        let data = build_v1_header(&[(0x0011, 0, &payload)], 1);
1503        let hdr = ObjectHeader::parse_at(&data, 0, 4, 4).unwrap();
1504        assert_eq!(hdr.messages.len(), 1);
1505        match &hdr.messages[0] {
1506            HdfMessage::SymbolTable(st) => {
1507                assert_eq!(st.btree_address, 0x1000);
1508                assert_eq!(st.heap_address, 0x2000);
1509            }
1510            other => panic!("expected SymbolTable, got {:?}", other),
1511        }
1512    }
1513}