Skip to main content

hdf5_reader/
object_header.rs

1//! HDF5 Object Header parser (v1 and v2).
2//!
3//! Object headers contain a collection of header messages that describe an
4//! HDF5 object (group, dataset, committed datatype, etc.).  Two on-disk
5//! formats exist:
6//!
7//! * **Version 1** (HDF5 < 1.8) — 16-byte fixed prefix, messages each have an
8//!   8-byte envelope (type u16 + size u16 + flags u8 + reserved 3).
9//! * **Version 2** (HDF5 >= 1.8) — begins with the `OHDR` signature, variable-
10//!   length prefix, messages have a 4-or-6-byte envelope, and every chunk is
11//!   checksummed with Jenkins lookup3.
12//!
13//! Continuation messages (type `0x0010`) cause the parser to follow an offset
14//! to an additional chunk of messages (an `OCHK` block in v2, or a raw message
15//! run in v1).
16
17use crate::checksum::jenkins_lookup3;
18use crate::error::{Error, Result};
19use crate::io::Cursor;
20use crate::messages::shared::SharedMessage;
21use crate::messages::{parse_message, HdfMessage};
22use crate::storage::Storage;
23
24/// Magic signature for v2 object headers.
25const OHDR_SIGNATURE: [u8; 4] = *b"OHDR";
26
27/// Magic signature for v2 continuation chunks.
28const OCHK_SIGNATURE: [u8; 4] = *b"OCHK";
29
30/// Header continuation message type id.
31const MSG_TYPE_CONTINUATION: u16 = 0x0010;
32
33/// Nil (padding) message type id.
34const MSG_TYPE_NIL: u16 = 0x0000;
35
36fn checked_usize(value: u64, context: &str) -> Result<usize> {
37    usize::try_from(value).map_err(|_| {
38        Error::InvalidData(format!(
39            "{context} value {value} exceeds platform usize capacity"
40        ))
41    })
42}
43
44fn checked_add_usize(lhs: usize, rhs: usize, context: &str) -> Result<usize> {
45    lhs.checked_add(rhs)
46        .ok_or_else(|| Error::InvalidData(format!("{context} exceeds platform usize capacity")))
47}
48
49/// Parsed object header with all its messages.
50#[derive(Debug, Clone)]
51pub struct ObjectHeader {
52    /// Object header format version (1 or 2).
53    pub version: u8,
54    /// All parsed header messages, collected from every chunk.
55    pub messages: Vec<HdfMessage>,
56    /// Object reference count.
57    pub reference_count: u32,
58    /// Modification time in seconds since the UNIX epoch (v2 only, when the
59    /// "times stored" flag is set).
60    pub modification_time: Option<u32>,
61}
62
63impl ObjectHeader {
64    /// Parse an object header at the given absolute file address.
65    ///
66    /// `data` is the entire file mapped into memory, `address` is the byte
67    /// offset where the object header starts, and `offset_size` / `length_size`
68    /// come from the superblock.
69    pub fn parse_at(data: &[u8], address: u64, offset_size: u8, length_size: u8) -> Result<Self> {
70        let mut cursor = Cursor::new(data);
71        cursor.set_position(address);
72
73        // Peek at the first four bytes to decide v1 vs v2.
74        let sig = cursor.peek_bytes(4)?;
75        if sig == OHDR_SIGNATURE {
76            Self::parse_v2(&cursor, address, offset_size, length_size)
77        } else {
78            Self::parse_v1(&cursor, address, offset_size, length_size)
79        }
80    }
81
82    /// Parse an object header from random-access storage.
83    pub fn parse_at_storage(
84        storage: &dyn Storage,
85        address: u64,
86        offset_size: u8,
87        length_size: u8,
88    ) -> Result<Self> {
89        let prefix = storage.read_range(address, 4)?;
90        if prefix.len() < 4 {
91            return Err(Error::UnexpectedEof {
92                offset: address,
93                needed: 4,
94                available: prefix.len() as u64,
95            });
96        }
97
98        if prefix.as_ref()[..4] == OHDR_SIGNATURE {
99            Self::parse_v2_storage(storage, address, offset_size, length_size)
100        } else {
101            Self::parse_v1_storage(storage, address, offset_size, length_size)
102        }
103    }
104
105    /// Resolve shared messages by following references to other object headers.
106    ///
107    /// For `SharedInOhdr`, the referenced object header is parsed and the
108    /// matching message type is extracted. `SharedInSohm` requires the
109    /// storage-backed resolver because the SOHM table lives in file metadata.
110    pub fn resolve_shared_messages(
111        &mut self,
112        data: &[u8],
113        offset_size: u8,
114        length_size: u8,
115    ) -> Result<()> {
116        let old_messages = std::mem::take(&mut self.messages);
117        let mut resolved = Vec::with_capacity(old_messages.len());
118        for msg in old_messages {
119            match msg {
120                HdfMessage::Shared(SharedMessage::SharedInOhdr {
121                    message_type,
122                    address,
123                }) => {
124                    match Self::parse_at(data, address, offset_size, length_size) {
125                        Ok(target_header) => {
126                            if let Some(target_msg) =
127                                select_shared_message(target_header, message_type)
128                            {
129                                resolved.push(target_msg);
130                            }
131                        }
132                        Err(_) => {
133                            // If we can't parse the target, keep the shared ref
134                            resolved.push(HdfMessage::Shared(SharedMessage::SharedInOhdr {
135                                message_type,
136                                address,
137                            }));
138                        }
139                    }
140                }
141                HdfMessage::Shared(SharedMessage::SharedInSohm { .. }) => {
142                    self.messages = resolved;
143                    return Err(Error::Other(
144                        "SOHM table lookup not yet supported — file uses shared object header messages".to_string(),
145                    ));
146                }
147                other => resolved.push(other),
148            }
149        }
150        self.messages = resolved;
151        Ok(())
152    }
153
154    /// Resolve shared messages by following references via random-access storage.
155    pub fn resolve_shared_messages_storage(
156        &mut self,
157        storage: &dyn Storage,
158        offset_size: u8,
159        length_size: u8,
160    ) -> Result<()> {
161        self.resolve_shared_messages_storage_with_sohm(
162            storage,
163            offset_size,
164            length_size,
165            |_heap_id, _message_type| Ok(None),
166        )
167    }
168
169    /// Resolve shared messages using random-access storage and a SOHM resolver.
170    pub(crate) fn resolve_shared_messages_storage_with_sohm<F>(
171        &mut self,
172        storage: &dyn Storage,
173        offset_size: u8,
174        length_size: u8,
175        mut resolve_sohm: F,
176    ) -> Result<()>
177    where
178        F: FnMut(&[u8], u16) -> Result<Option<HdfMessage>>,
179    {
180        let old_messages = std::mem::take(&mut self.messages);
181        let mut resolved = Vec::with_capacity(old_messages.len());
182        for msg in old_messages {
183            match msg {
184                HdfMessage::Shared(SharedMessage::SharedInOhdr {
185                    message_type,
186                    address,
187                }) => match Self::parse_at_storage(storage, address, offset_size, length_size) {
188                    Ok(target_header) => {
189                        if let Some(target_msg) = select_shared_message(target_header, message_type)
190                        {
191                            resolved.push(target_msg);
192                        }
193                    }
194                    Err(_) => {
195                        resolved.push(HdfMessage::Shared(SharedMessage::SharedInOhdr {
196                            message_type,
197                            address,
198                        }));
199                    }
200                },
201                HdfMessage::Shared(SharedMessage::SharedInSohm {
202                    message_type,
203                    heap_id,
204                }) => {
205                    if let Some(message) = resolve_sohm(&heap_id, message_type)? {
206                        resolved.push(message);
207                    } else {
208                        self.messages = resolved;
209                        return Err(Error::Other(format!(
210                            "SOHM entry for message type {message_type:#x} not found"
211                        )));
212                    }
213                }
214                other => resolved.push(other),
215            }
216        }
217        self.messages = resolved;
218        Ok(())
219    }
220
221    // ------------------------------------------------------------------
222    // Version 1
223    // ------------------------------------------------------------------
224
225    /// Parse a version-1 object header.
226    ///
227    /// Layout (16 bytes total):
228    /// ```text
229    ///   version          u8    (must be 1)
230    ///   reserved         u8
231    ///   num_messages     u16
232    ///   ref_count        u32
233    ///   header_data_size u32   (byte count of the message run)
234    ///   reserved         u32   (alignment padding)
235    /// ```
236    fn parse_v1(base: &Cursor<'_>, address: u64, offset_size: u8, length_size: u8) -> Result<Self> {
237        let mut cursor = base.at_offset(address)?;
238
239        let version = cursor.read_u8()?;
240        if version != 1 {
241            return Err(Error::UnsupportedObjectHeaderVersion(version));
242        }
243
244        let _reserved = cursor.read_u8()?;
245        let num_messages = cursor.read_u16_le()?;
246        let reference_count = cursor.read_u32_le()?;
247        let header_data_size = cursor.read_u32_le()? as u64;
248        let _reserved2 = cursor.read_u32_le()?; // alignment padding
249
250        // Messages start right after the 16-byte prefix.
251        let messages_start = cursor.position();
252        let messages_end = messages_start + header_data_size;
253
254        let mut messages: Vec<HdfMessage> = Vec::with_capacity(num_messages as usize);
255        let mut continuations: Vec<(u64, u64)> = Vec::new();
256
257        Self::read_v1_messages(
258            base,
259            messages_start,
260            messages_end,
261            offset_size,
262            length_size,
263            &mut messages,
264            &mut continuations,
265        )?;
266
267        // Follow continuation messages.
268        while let Some((cont_offset, cont_length)) = continuations.pop() {
269            let cont_end = cont_offset + cont_length;
270            Self::read_v1_messages(
271                base,
272                cont_offset,
273                cont_end,
274                offset_size,
275                length_size,
276                &mut messages,
277                &mut continuations,
278            )?;
279        }
280
281        Ok(ObjectHeader {
282            version: 1,
283            messages,
284            reference_count,
285            modification_time: None,
286        })
287    }
288
289    fn parse_v1_storage(
290        storage: &dyn Storage,
291        address: u64,
292        offset_size: u8,
293        length_size: u8,
294    ) -> Result<Self> {
295        let header = storage.read_range(address, 16)?;
296        let mut cursor = Cursor::new(header.as_ref());
297
298        let version = cursor.read_u8()?;
299        if version != 1 {
300            return Err(Error::UnsupportedObjectHeaderVersion(version));
301        }
302
303        let _reserved = cursor.read_u8()?;
304        let num_messages = cursor.read_u16_le()?;
305        let reference_count = cursor.read_u32_le()?;
306        let header_data_size = cursor.read_u32_le()? as u64;
307        let _reserved2 = cursor.read_u32_le()?;
308
309        let header_data_size = checked_usize(header_data_size, "v1 object header data size")?;
310        let first_chunk_len =
311            checked_add_usize(16, header_data_size, "v1 object header chunk length")?;
312        let first_chunk = storage.read_range(address, first_chunk_len)?;
313        let mut messages = Vec::with_capacity(num_messages as usize);
314        let mut continuations = Vec::new();
315        Self::read_v1_messages_from_slice(
316            &first_chunk.as_ref()[16..],
317            offset_size,
318            length_size,
319            &mut messages,
320            &mut continuations,
321        )?;
322
323        while let Some((cont_offset, cont_length)) = continuations.pop() {
324            let cont_length = checked_usize(cont_length, "v1 object header continuation length")?;
325            let chunk = storage.read_range(cont_offset, cont_length)?;
326            Self::read_v1_messages_from_slice(
327                chunk.as_ref(),
328                offset_size,
329                length_size,
330                &mut messages,
331                &mut continuations,
332            )?;
333        }
334
335        Ok(ObjectHeader {
336            version: 1,
337            messages,
338            reference_count,
339            modification_time: None,
340        })
341    }
342
343    /// Read v1 header messages from `start..end`, appending to `messages`.
344    /// Any continuation messages encountered are pushed onto `continuations`
345    /// for the caller to follow.
346    fn read_v1_messages(
347        base: &Cursor<'_>,
348        start: u64,
349        end: u64,
350        offset_size: u8,
351        length_size: u8,
352        messages: &mut Vec<HdfMessage>,
353        continuations: &mut Vec<(u64, u64)>,
354    ) -> Result<()> {
355        let mut cursor = base.at_offset(start)?;
356
357        while cursor.position() + 8 <= end {
358            let msg_type = cursor.read_u16_le()?;
359            let msg_data_size = cursor.read_u16_le()? as usize;
360            let msg_flags = cursor.read_u8()?;
361            let _reserved = cursor.read_bytes(3)?; // 3 reserved bytes
362
363            // Bounds-check the message data within this chunk.
364            if cursor.position() + msg_data_size as u64 > end {
365                return Err(Error::InvalidData(format!(
366                    "v1 message data ({} bytes) extends past header chunk end",
367                    msg_data_size
368                )));
369            }
370
371            if msg_type == MSG_TYPE_NIL {
372                // Nil / padding — skip the data bytes.
373                cursor.skip(msg_data_size)?;
374                messages.push(HdfMessage::Nil);
375                continue;
376            }
377
378            let msg_data = cursor.read_bytes(msg_data_size)?;
379            let is_shared = (msg_flags & 0x02) != 0;
380
381            if is_shared {
382                // Shared message — the stored bytes are a shared-message
383                // reference, not the message payload itself.
384                let shared_msg = crate::messages::shared::parse(
385                    &mut Cursor::new(msg_data),
386                    msg_type,
387                    offset_size,
388                    length_size,
389                    msg_data_size,
390                )?;
391                messages.push(HdfMessage::Shared(shared_msg));
392            } else if msg_type == MSG_TYPE_CONTINUATION {
393                // Parse the continuation message to get offset + length, then
394                // enqueue it for later processing.
395                let cont = crate::messages::continuation::parse(
396                    &mut Cursor::new(msg_data),
397                    offset_size,
398                    length_size,
399                    msg_data_size,
400                )?;
401                continuations.push((cont.offset, cont.length));
402                messages.push(HdfMessage::ObjectHeaderContinuation);
403            } else {
404                let parsed = parse_message(
405                    msg_type,
406                    msg_data.len(),
407                    &mut Cursor::new(msg_data),
408                    offset_size,
409                    length_size,
410                )?;
411                messages.push(parsed);
412            }
413        }
414
415        Ok(())
416    }
417
418    // ------------------------------------------------------------------
419    // Version 2
420    // ------------------------------------------------------------------
421
422    /// Parse a version-2 object header.
423    ///
424    /// Layout:
425    /// ```text
426    ///   signature  4 bytes  ("OHDR")
427    ///   version    u8       (must be 2)
428    ///   flags      u8
429    ///   [optional timestamps — 4 x u32 if bit 5 of flags]
430    ///   [optional attr phase change — 2 x u16 if bit 4 of flags]
431    ///   chunk0_size  1/2/4/8 bytes (encoded size depends on bits 0-1 of flags)
432    ///   <messages for chunk 0>
433    ///   checksum   u32      (Jenkins lookup3 from "OHDR" through last byte before checksum)
434    /// ```
435    fn parse_v2(base: &Cursor<'_>, address: u64, offset_size: u8, length_size: u8) -> Result<Self> {
436        let mut cursor = base.at_offset(address)?;
437
438        // ---- Fixed prefix ----
439        let sig = cursor.read_bytes(4)?;
440        if sig != OHDR_SIGNATURE {
441            return Err(Error::InvalidObjectHeaderSignature);
442        }
443        let version = cursor.read_u8()?;
444        if version != 2 {
445            return Err(Error::UnsupportedObjectHeaderVersion(version));
446        }
447        let flags = cursor.read_u8()?;
448
449        // Bit 5 — timestamps stored.
450        let modification_time = if (flags & 0x20) != 0 {
451            let _access_time = cursor.read_u32_le()?;
452            let mod_time = cursor.read_u32_le()?;
453            let _change_time = cursor.read_u32_le()?;
454            let _birth_time = cursor.read_u32_le()?;
455            Some(mod_time)
456        } else {
457            None
458        };
459
460        // Bit 4 — non-default attribute storage phase change values.
461        if (flags & 0x10) != 0 {
462            let _max_compact = cursor.read_u16_le()?;
463            let _min_dense = cursor.read_u16_le()?;
464        }
465
466        // Chunk#0 size — width depends on bits 0-1 of flags.
467        let size_field_width = 1usize << (flags & 0x03);
468        let chunk0_data_size = cursor.read_uvar(size_field_width)?;
469
470        // Bit 2 — attribute creation order tracked (affects per-message envelope).
471        let creation_order_tracked = (flags & 0x04) != 0;
472
473        // Messages for chunk 0 run from the current position for
474        // `chunk0_data_size` bytes.  The last 4 bytes of that range are the
475        // checksum.
476        let messages_start = cursor.position();
477        let chunk0_end = messages_start + chunk0_data_size;
478
479        // The checksum covers everything from "OHDR" through the last byte
480        // before the checksum field.
481        let checksum_start = address as usize;
482        let checksum_end = chunk0_end as usize; // the checksum itself sits at chunk0_end
483        let stored_checksum = {
484            let mut ck = base.at_offset(chunk0_end)?;
485            ck.read_u32_le()?
486        };
487        let computed = jenkins_lookup3(&base.data()[checksum_start..checksum_end]);
488        if computed != stored_checksum {
489            return Err(Error::ChecksumMismatch {
490                expected: stored_checksum,
491                actual: computed,
492            });
493        }
494
495        let mut messages: Vec<HdfMessage> = Vec::new();
496        let mut continuations: Vec<(u64, u64)> = Vec::new();
497
498        Self::read_v2_messages(
499            base,
500            messages_start,
501            chunk0_end,
502            offset_size,
503            length_size,
504            creation_order_tracked,
505            &mut messages,
506            &mut continuations,
507        )?;
508
509        // Follow continuation chunks.
510        while let Some((cont_offset, cont_length)) = continuations.pop() {
511            Self::read_v2_continuation_chunk(
512                base,
513                cont_offset,
514                cont_length,
515                offset_size,
516                length_size,
517                creation_order_tracked,
518                &mut messages,
519                &mut continuations,
520            )?;
521        }
522
523        Ok(ObjectHeader {
524            version: 2,
525            messages,
526            reference_count: 0, // v2 does not store a reference count in the header
527            modification_time,
528        })
529    }
530
531    fn parse_v2_storage(
532        storage: &dyn Storage,
533        address: u64,
534        offset_size: u8,
535        length_size: u8,
536    ) -> Result<Self> {
537        let fixed_prefix = storage.read_range(address, 6)?;
538        let mut cursor = Cursor::new(fixed_prefix.as_ref());
539
540        let sig = cursor.read_bytes(4)?;
541        if sig != OHDR_SIGNATURE {
542            return Err(Error::InvalidObjectHeaderSignature);
543        }
544        let version = cursor.read_u8()?;
545        if version != 2 {
546            return Err(Error::UnsupportedObjectHeaderVersion(version));
547        }
548        let flags = cursor.read_u8()?;
549
550        let size_field_width = 1usize << (flags & 0x03);
551        let mut prefix_len = 6usize;
552        if (flags & 0x20) != 0 {
553            prefix_len = checked_add_usize(prefix_len, 16, "v2 object header prefix length")?;
554        }
555        if (flags & 0x10) != 0 {
556            prefix_len = checked_add_usize(prefix_len, 4, "v2 object header prefix length")?;
557        }
558        prefix_len = checked_add_usize(
559            prefix_len,
560            size_field_width,
561            "v2 object header prefix length",
562        )?;
563
564        let prefix = storage.read_range(address, prefix_len)?;
565        let mut cursor = Cursor::new(prefix.as_ref());
566        let sig = cursor.read_bytes(4)?;
567        if sig != OHDR_SIGNATURE {
568            return Err(Error::InvalidObjectHeaderSignature);
569        }
570        let version = cursor.read_u8()?;
571        if version != 2 {
572            return Err(Error::UnsupportedObjectHeaderVersion(version));
573        }
574        let flags = cursor.read_u8()?;
575
576        let modification_time = if (flags & 0x20) != 0 {
577            let _access_time = cursor.read_u32_le()?;
578            let mod_time = cursor.read_u32_le()?;
579            let _change_time = cursor.read_u32_le()?;
580            let _birth_time = cursor.read_u32_le()?;
581            Some(mod_time)
582        } else {
583            None
584        };
585
586        if (flags & 0x10) != 0 {
587            let _max_compact = cursor.read_u16_le()?;
588            let _min_dense = cursor.read_u16_le()?;
589        }
590
591        let chunk0_data_size = cursor.read_uvar(size_field_width)?;
592        let creation_order_tracked = (flags & 0x04) != 0;
593        let messages_start = checked_usize(cursor.position(), "v2 object header message start")?;
594        let chunk0_data_size = checked_usize(chunk0_data_size, "v2 object header chunk0 size")?;
595        let chunk0_end = checked_add_usize(
596            messages_start,
597            chunk0_data_size,
598            "v2 object header chunk0 end",
599        )?;
600
601        let chunk_with_checksum_len =
602            checked_add_usize(chunk0_end, 4, "v2 object header chunk0 checksum end")?;
603        let chunk = storage.read_range(address, chunk_with_checksum_len)?;
604        let stored_checksum = u32::from_le_bytes(
605            chunk.as_ref()[chunk0_end..chunk0_end + 4]
606                .try_into()
607                .unwrap(),
608        );
609        let computed = jenkins_lookup3(&chunk.as_ref()[..chunk0_end]);
610        if computed != stored_checksum {
611            return Err(Error::ChecksumMismatch {
612                expected: stored_checksum,
613                actual: computed,
614            });
615        }
616
617        let mut messages = Vec::new();
618        let mut continuations = Vec::new();
619        Self::read_v2_messages_from_slice(
620            &chunk.as_ref()[messages_start..chunk0_end],
621            offset_size,
622            length_size,
623            creation_order_tracked,
624            &mut messages,
625            &mut continuations,
626        )?;
627
628        while let Some((cont_offset, cont_length)) = continuations.pop() {
629            Self::read_v2_continuation_chunk_storage(
630                storage,
631                cont_offset,
632                cont_length,
633                offset_size,
634                length_size,
635                creation_order_tracked,
636                &mut messages,
637                &mut continuations,
638            )?;
639        }
640
641        Ok(ObjectHeader {
642            version: 2,
643            messages,
644            reference_count: 0,
645            modification_time,
646        })
647    }
648
649    /// Read v2 messages from `start..end`.
650    #[allow(clippy::too_many_arguments)]
651    fn read_v2_messages(
652        base: &Cursor<'_>,
653        start: u64,
654        end: u64,
655        offset_size: u8,
656        length_size: u8,
657        creation_order_tracked: bool,
658        messages: &mut Vec<HdfMessage>,
659        continuations: &mut Vec<(u64, u64)>,
660    ) -> Result<()> {
661        let mut cursor = base.at_offset(start)?;
662
663        // Minimum envelope: type(1) + size(2) + flags(1) = 4 bytes, optionally
664        // +2 for creation order.
665        let min_envelope = if creation_order_tracked { 6 } else { 4 };
666
667        while cursor.position() + min_envelope as u64 <= end {
668            let msg_type = cursor.read_u8()? as u16;
669            let msg_data_size = cursor.read_u16_le()? as usize;
670            let msg_flags = cursor.read_u8()?;
671
672            if creation_order_tracked {
673                let _creation_order = cursor.read_u16_le()?;
674            }
675
676            if msg_type == MSG_TYPE_NIL {
677                if msg_data_size == 0
678                    && base.data()[cursor.position() as usize..end as usize]
679                        .iter()
680                        .all(|byte| *byte == 0)
681                {
682                    break;
683                }
684                cursor.skip(msg_data_size)?;
685                messages.push(HdfMessage::Nil);
686                continue;
687            }
688
689            if cursor.position() + msg_data_size as u64 > end {
690                return Err(Error::InvalidData(format!(
691                    "v2 message data ({} bytes) extends past chunk end",
692                    msg_data_size
693                )));
694            }
695
696            let msg_data = cursor.read_bytes(msg_data_size)?;
697            let is_shared = (msg_flags & 0x02) != 0;
698
699            if is_shared {
700                let shared_msg = crate::messages::shared::parse(
701                    &mut Cursor::new(msg_data),
702                    msg_type,
703                    offset_size,
704                    length_size,
705                    msg_data_size,
706                )?;
707                messages.push(HdfMessage::Shared(shared_msg));
708            } else if msg_type == MSG_TYPE_CONTINUATION {
709                let cont = crate::messages::continuation::parse(
710                    &mut Cursor::new(msg_data),
711                    offset_size,
712                    length_size,
713                    msg_data_size,
714                )?;
715                continuations.push((cont.offset, cont.length));
716                messages.push(HdfMessage::ObjectHeaderContinuation);
717            } else {
718                let parsed = parse_message(
719                    msg_type,
720                    msg_data.len(),
721                    &mut Cursor::new(msg_data),
722                    offset_size,
723                    length_size,
724                )?;
725                messages.push(parsed);
726            }
727        }
728
729        Ok(())
730    }
731
732    fn read_v1_messages_from_slice(
733        data: &[u8],
734        offset_size: u8,
735        length_size: u8,
736        messages: &mut Vec<HdfMessage>,
737        continuations: &mut Vec<(u64, u64)>,
738    ) -> Result<()> {
739        let mut cursor = Cursor::new(data);
740        while cursor.remaining() >= 8 {
741            let msg_type = cursor.read_u16_le()?;
742            let msg_data_size = cursor.read_u16_le()? as usize;
743            let msg_flags = cursor.read_u8()?;
744            let _reserved = cursor.read_bytes(3)?;
745
746            if cursor.remaining() < msg_data_size as u64 {
747                return Err(Error::InvalidData(format!(
748                    "v1 message data ({} bytes) extends past header chunk end",
749                    msg_data_size
750                )));
751            }
752
753            if msg_type == MSG_TYPE_NIL {
754                cursor.skip(msg_data_size)?;
755                messages.push(HdfMessage::Nil);
756                continue;
757            }
758
759            let msg_data = cursor.read_bytes(msg_data_size)?;
760            let is_shared = (msg_flags & 0x02) != 0;
761            if is_shared {
762                let shared_msg = crate::messages::shared::parse(
763                    &mut Cursor::new(msg_data),
764                    msg_type,
765                    offset_size,
766                    length_size,
767                    msg_data_size,
768                )?;
769                messages.push(HdfMessage::Shared(shared_msg));
770            } else if msg_type == MSG_TYPE_CONTINUATION {
771                let cont = crate::messages::continuation::parse(
772                    &mut Cursor::new(msg_data),
773                    offset_size,
774                    length_size,
775                    msg_data_size,
776                )?;
777                continuations.push((cont.offset, cont.length));
778                messages.push(HdfMessage::ObjectHeaderContinuation);
779            } else {
780                let parsed = parse_message(
781                    msg_type,
782                    msg_data.len(),
783                    &mut Cursor::new(msg_data),
784                    offset_size,
785                    length_size,
786                )?;
787                messages.push(parsed);
788            }
789        }
790        Ok(())
791    }
792
793    fn read_v2_messages_from_slice(
794        data: &[u8],
795        offset_size: u8,
796        length_size: u8,
797        creation_order_tracked: bool,
798        messages: &mut Vec<HdfMessage>,
799        continuations: &mut Vec<(u64, u64)>,
800    ) -> Result<()> {
801        let mut cursor = Cursor::new(data);
802        let min_envelope = if creation_order_tracked { 6 } else { 4 };
803
804        while cursor.remaining() >= min_envelope as u64 {
805            let msg_type = cursor.read_u8()? as u16;
806            let msg_data_size = cursor.read_u16_le()? as usize;
807            let msg_flags = cursor.read_u8()?;
808
809            if creation_order_tracked {
810                let _creation_order = cursor.read_u16_le()?;
811            }
812
813            if msg_type == MSG_TYPE_NIL {
814                if msg_data_size == 0
815                    && data[cursor.position() as usize..]
816                        .iter()
817                        .all(|byte| *byte == 0)
818                {
819                    break;
820                }
821                cursor.skip(msg_data_size)?;
822                messages.push(HdfMessage::Nil);
823                continue;
824            }
825
826            if cursor.remaining() < msg_data_size as u64 {
827                return Err(Error::InvalidData(format!(
828                    "v2 message data ({} bytes) extends past chunk end",
829                    msg_data_size
830                )));
831            }
832
833            let msg_data = cursor.read_bytes(msg_data_size)?;
834            let is_shared = (msg_flags & 0x02) != 0;
835            if is_shared {
836                let shared_msg = crate::messages::shared::parse(
837                    &mut Cursor::new(msg_data),
838                    msg_type,
839                    offset_size,
840                    length_size,
841                    msg_data_size,
842                )?;
843                messages.push(HdfMessage::Shared(shared_msg));
844            } else if msg_type == MSG_TYPE_CONTINUATION {
845                let cont = crate::messages::continuation::parse(
846                    &mut Cursor::new(msg_data),
847                    offset_size,
848                    length_size,
849                    msg_data_size,
850                )?;
851                continuations.push((cont.offset, cont.length));
852                messages.push(HdfMessage::ObjectHeaderContinuation);
853            } else {
854                let parsed = parse_message(
855                    msg_type,
856                    msg_data.len(),
857                    &mut Cursor::new(msg_data),
858                    offset_size,
859                    length_size,
860                )?;
861                messages.push(parsed);
862            }
863        }
864
865        Ok(())
866    }
867
868    #[allow(clippy::too_many_arguments)]
869    fn read_v2_continuation_chunk_storage(
870        storage: &dyn Storage,
871        cont_offset: u64,
872        cont_length: u64,
873        offset_size: u8,
874        length_size: u8,
875        creation_order_tracked: bool,
876        messages: &mut Vec<HdfMessage>,
877        continuations: &mut Vec<(u64, u64)>,
878    ) -> Result<()> {
879        let cont_length = checked_usize(cont_length, "v2 object header continuation length")?;
880        let chunk = storage.read_range(cont_offset, cont_length)?;
881        if chunk.len() < 8 || chunk.as_ref()[..4] != OCHK_SIGNATURE {
882            return Err(Error::InvalidObjectHeaderSignature);
883        }
884        let messages_end = chunk.len() - 4;
885        let stored_checksum = u32::from_le_bytes(
886            chunk.as_ref()[messages_end..messages_end + 4]
887                .try_into()
888                .unwrap(),
889        );
890        let computed = jenkins_lookup3(&chunk.as_ref()[..messages_end]);
891        if computed != stored_checksum {
892            return Err(Error::ChecksumMismatch {
893                expected: stored_checksum,
894                actual: computed,
895            });
896        }
897
898        Self::read_v2_messages_from_slice(
899            &chunk.as_ref()[4..messages_end],
900            offset_size,
901            length_size,
902            creation_order_tracked,
903            messages,
904            continuations,
905        )
906    }
907
908    /// Read and verify a v2 continuation chunk (`OCHK`).
909    #[allow(clippy::too_many_arguments)]
910    ///
911    /// Layout:
912    /// ```text
913    ///   "OCHK"    4 bytes
914    ///   messages  (cont_length - 4 - 4) bytes
915    ///   checksum  u32
916    /// ```
917    fn read_v2_continuation_chunk(
918        base: &Cursor<'_>,
919        cont_offset: u64,
920        cont_length: u64,
921        offset_size: u8,
922        length_size: u8,
923        creation_order_tracked: bool,
924        messages: &mut Vec<HdfMessage>,
925        continuations: &mut Vec<(u64, u64)>,
926    ) -> Result<()> {
927        let mut cursor = base.at_offset(cont_offset)?;
928
929        let sig = cursor.read_bytes(4)?;
930        if sig != OCHK_SIGNATURE {
931            return Err(Error::InvalidObjectHeaderSignature);
932        }
933
934        let chunk_end = cont_offset + cont_length;
935        // The last 4 bytes of the chunk are the checksum.
936        let messages_end = chunk_end - 4;
937        let messages_start = cursor.position(); // right after "OCHK"
938
939        // Verify checksum: covers "OCHK" through the byte before the checksum.
940        let checksum_start = cont_offset as usize;
941        let checksum_end = messages_end as usize;
942        let stored_checksum = {
943            let mut ck = base.at_offset(messages_end)?;
944            ck.read_u32_le()?
945        };
946        let computed = jenkins_lookup3(&base.data()[checksum_start..checksum_end]);
947        if computed != stored_checksum {
948            return Err(Error::ChecksumMismatch {
949                expected: stored_checksum,
950                actual: computed,
951            });
952        }
953
954        Self::read_v2_messages(
955            base,
956            messages_start,
957            messages_end,
958            offset_size,
959            length_size,
960            creation_order_tracked,
961            messages,
962            continuations,
963        )
964    }
965}
966
967fn select_shared_message(header: ObjectHeader, message_type: u16) -> Option<HdfMessage> {
968    let mut first_real_message = None;
969    for message in header.messages {
970        match message {
971            HdfMessage::Nil | HdfMessage::ObjectHeaderContinuation | HdfMessage::Shared(_) => {
972                continue;
973            }
974            other if message_matches_type(&other, message_type) => return Some(other),
975            other => {
976                if first_real_message.is_none() {
977                    first_real_message = Some(other);
978                }
979            }
980        }
981    }
982    first_real_message
983}
984
985fn message_matches_type(message: &HdfMessage, message_type: u16) -> bool {
986    use crate::messages::*;
987
988    matches!(
989        (message_type, message),
990        (MSG_DATASPACE, HdfMessage::Dataspace(_))
991            | (MSG_DATATYPE, HdfMessage::Datatype(_))
992            | (MSG_FILL_VALUE, HdfMessage::FillValue(_))
993            | (MSG_FILL_VALUE_OLD, HdfMessage::FillValue(_))
994            | (MSG_DATA_LAYOUT, HdfMessage::DataLayout(_))
995            | (MSG_FILTER_PIPELINE, HdfMessage::FilterPipeline(_))
996            | (MSG_ATTRIBUTE, HdfMessage::Attribute(_))
997            | (MSG_ATTRIBUTE_INFO, HdfMessage::AttributeInfo(_))
998            | (MSG_LINK, HdfMessage::Link(_))
999            | (MSG_LINK_INFO, HdfMessage::LinkInfo(_))
1000            | (MSG_GROUP_INFO, HdfMessage::GroupInfo(_))
1001            | (MSG_SYMBOL_TABLE, HdfMessage::SymbolTable(_))
1002            | (MSG_CONTINUATION, HdfMessage::Continuation(_))
1003            | (MSG_MODIFICATION_TIME, HdfMessage::ModificationTime(_))
1004            | (MSG_MODIFICATION_TIME_OLD, HdfMessage::ModificationTime(_))
1005            | (MSG_BTREE_K, HdfMessage::BTreeK(_))
1006            | (MSG_EXTERNAL_FILES, HdfMessage::ExternalFiles(_))
1007            | (MSG_SHARED_TABLE, HdfMessage::SharedTable(_))
1008            | (MSG_COMMENT, HdfMessage::Comment(_))
1009            | (MSG_REFERENCE_COUNT, HdfMessage::ReferenceCount(_))
1010    )
1011}
1012
1013#[cfg(test)]
1014mod tests {
1015    use super::*;
1016    use crate::checksum::jenkins_lookup3;
1017    use crate::storage::BytesStorage;
1018
1019    // ------------------------------------------------------------------
1020    // Helpers
1021    // ------------------------------------------------------------------
1022
1023    /// Build a v1 object header containing the given pre-encoded messages.
1024    /// Each entry in `raw_messages` is `(type_id, flags, payload)`.
1025    fn build_v1_header(raw_messages: &[(u16, u8, &[u8])], ref_count: u32) -> Vec<u8> {
1026        // Compute total message data size.
1027        let data_size: usize = raw_messages
1028            .iter()
1029            .map(|(_, _, payload)| 8 + payload.len()) // 8-byte envelope per message
1030            .sum();
1031
1032        let mut buf = Vec::new();
1033        // Version
1034        buf.push(1);
1035        // Reserved
1036        buf.push(0);
1037        // Number of messages
1038        buf.extend_from_slice(&(raw_messages.len() as u16).to_le_bytes());
1039        // Reference count
1040        buf.extend_from_slice(&ref_count.to_le_bytes());
1041        // Header data size
1042        buf.extend_from_slice(&(data_size as u32).to_le_bytes());
1043        // Reserved padding (4 bytes)
1044        buf.extend_from_slice(&[0u8; 4]);
1045
1046        // Messages
1047        for (type_id, flags, payload) in raw_messages {
1048            buf.extend_from_slice(&type_id.to_le_bytes());
1049            buf.extend_from_slice(&(payload.len() as u16).to_le_bytes());
1050            buf.push(*flags);
1051            buf.extend_from_slice(&[0u8; 3]); // reserved
1052            buf.extend_from_slice(payload);
1053        }
1054
1055        buf
1056    }
1057
1058    /// Build a v2 OHDR chunk#0 with the given raw messages.
1059    /// `flags` controls the header flags byte.  Timestamps and phase-change
1060    /// values are added automatically when the corresponding flag bits are set.
1061    /// Each entry in `raw_messages` is `(type_id, flags, payload)`.
1062    /// Returns the complete OHDR block including the trailing checksum.
1063    fn build_v2_header(
1064        header_flags: u8,
1065        raw_messages: &[(u8, u8, &[u8])],
1066        timestamps: Option<[u32; 4]>,
1067        phase_change: Option<(u16, u16)>,
1068    ) -> Vec<u8> {
1069        let creation_order = (header_flags & 0x04) != 0;
1070
1071        // Compute message data size.
1072        let envelope_size: usize = if creation_order { 6 } else { 4 };
1073        let msg_data_size: usize = raw_messages
1074            .iter()
1075            .map(|(_, _, payload)| envelope_size + payload.len())
1076            .sum();
1077
1078        let mut buf = Vec::new();
1079        // Signature
1080        buf.extend_from_slice(&OHDR_SIGNATURE);
1081        // Version
1082        buf.push(2);
1083        // Flags
1084        buf.push(header_flags);
1085
1086        // Timestamps (bit 5)
1087        if let Some(ts) = timestamps {
1088            for &t in &ts {
1089                buf.extend_from_slice(&t.to_le_bytes());
1090            }
1091        }
1092
1093        // Phase change (bit 4)
1094        if let Some((max_compact, min_dense)) = phase_change {
1095            buf.extend_from_slice(&max_compact.to_le_bytes());
1096            buf.extend_from_slice(&min_dense.to_le_bytes());
1097        }
1098
1099        // Chunk#0 size field — encode using the width dictated by bits 0-1.
1100        let size_width = 1usize << (header_flags & 0x03);
1101        match size_width {
1102            1 => buf.push(msg_data_size as u8),
1103            2 => buf.extend_from_slice(&(msg_data_size as u16).to_le_bytes()),
1104            4 => buf.extend_from_slice(&(msg_data_size as u32).to_le_bytes()),
1105            8 => buf.extend_from_slice(&(msg_data_size as u64).to_le_bytes()),
1106            _ => unreachable!(),
1107        }
1108
1109        // Messages
1110        for (type_id, mflags, payload) in raw_messages {
1111            buf.push(*type_id);
1112            buf.extend_from_slice(&(payload.len() as u16).to_le_bytes());
1113            buf.push(*mflags);
1114            if creation_order {
1115                buf.extend_from_slice(&0u16.to_le_bytes());
1116            }
1117            buf.extend_from_slice(payload);
1118        }
1119
1120        // Checksum — covers everything so far.
1121        let ck = jenkins_lookup3(&buf);
1122        buf.extend_from_slice(&ck.to_le_bytes());
1123
1124        buf
1125    }
1126
1127    /// Build a v2 OCHK continuation chunk containing the given raw messages.
1128    fn build_v2_ochk(raw_messages: &[(u8, u8, &[u8])], creation_order: bool) -> Vec<u8> {
1129        let mut buf = Vec::new();
1130        // Signature
1131        buf.extend_from_slice(&OCHK_SIGNATURE);
1132
1133        // Messages
1134        for (type_id, mflags, payload) in raw_messages {
1135            buf.push(*type_id);
1136            buf.extend_from_slice(&(payload.len() as u16).to_le_bytes());
1137            buf.push(*mflags);
1138            if creation_order {
1139                buf.extend_from_slice(&0u16.to_le_bytes());
1140            }
1141            buf.extend_from_slice(payload);
1142        }
1143
1144        // Checksum over everything before the checksum itself.
1145        let ck = jenkins_lookup3(&buf);
1146        buf.extend_from_slice(&ck.to_le_bytes());
1147
1148        buf
1149    }
1150
1151    #[test]
1152    fn parse_v1_storage_accepts_header_near_eof() {
1153        let header = build_v1_header(&[], 7);
1154        let mut file_data = vec![0xAA; 3];
1155        let address = file_data.len() as u64;
1156        file_data.extend_from_slice(&header);
1157        let storage = BytesStorage::new(file_data);
1158
1159        let hdr = ObjectHeader::parse_at_storage(&storage, address, 8, 8).unwrap();
1160
1161        assert_eq!(hdr.version, 1);
1162        assert_eq!(hdr.reference_count, 7);
1163        assert!(hdr.messages.is_empty());
1164    }
1165
1166    #[test]
1167    fn parse_v2_storage_accepts_header_near_eof() {
1168        let header = build_v2_header(0x00, &[], None, None);
1169        let mut file_data = vec![0xAA; 5];
1170        let address = file_data.len() as u64;
1171        file_data.extend_from_slice(&header);
1172        let storage = BytesStorage::new(file_data);
1173
1174        let hdr = ObjectHeader::parse_at_storage(&storage, address, 8, 8).unwrap();
1175
1176        assert_eq!(hdr.version, 2);
1177        assert!(hdr.messages.is_empty());
1178    }
1179
1180    #[test]
1181    fn parse_v2_storage_rejects_oversized_chunk0_size() {
1182        let mut header = Vec::new();
1183        header.extend_from_slice(&OHDR_SIGNATURE);
1184        header.push(2);
1185        header.push(0x03);
1186        header.extend_from_slice(&u64::MAX.to_le_bytes());
1187        let storage = BytesStorage::new(header);
1188
1189        let err = ObjectHeader::parse_at_storage(&storage, 0, 8, 8).unwrap_err();
1190
1191        assert!(matches!(err, Error::InvalidData(_)));
1192    }
1193
1194    // ------------------------------------------------------------------
1195    // Tests — Version 1
1196    // ------------------------------------------------------------------
1197
1198    #[test]
1199    fn v1_empty_header() {
1200        let data = build_v1_header(&[], 1);
1201        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1202        assert_eq!(hdr.version, 1);
1203        assert_eq!(hdr.reference_count, 1);
1204        assert!(hdr.messages.is_empty());
1205        assert!(hdr.modification_time.is_none());
1206    }
1207
1208    #[test]
1209    fn v1_nil_message() {
1210        // A single nil message with 4 bytes of padding payload.
1211        let data = build_v1_header(&[(0x0000, 0, &[0u8; 4])], 1);
1212        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1213        assert_eq!(hdr.messages.len(), 1);
1214        assert!(matches!(hdr.messages[0], HdfMessage::Nil));
1215    }
1216
1217    #[test]
1218    fn v1_unknown_message() {
1219        // An unknown message type should be stored as HdfMessage::Unknown.
1220        let payload = [0xAA, 0xBB, 0xCC];
1221        let data = build_v1_header(&[(0x00FF, 0, &payload)], 2);
1222        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1223        assert_eq!(hdr.reference_count, 2);
1224        assert_eq!(hdr.messages.len(), 1);
1225        match &hdr.messages[0] {
1226            HdfMessage::Unknown { type_id, data } => {
1227                assert_eq!(*type_id, 0x00FF);
1228                assert_eq!(data.as_slice(), &payload);
1229            }
1230            other => panic!("expected Unknown, got {:?}", other),
1231        }
1232    }
1233
1234    #[test]
1235    fn v1_symbol_table_message() {
1236        // Type 0x0011 — symbol table message.
1237        // Payload: btree address (8 bytes) + heap address (8 bytes).
1238        let mut payload = Vec::new();
1239        payload.extend_from_slice(&0x1000u64.to_le_bytes());
1240        payload.extend_from_slice(&0x2000u64.to_le_bytes());
1241
1242        let data = build_v1_header(&[(0x0011, 0, &payload)], 1);
1243        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1244        assert_eq!(hdr.messages.len(), 1);
1245        match &hdr.messages[0] {
1246            HdfMessage::SymbolTable(st) => {
1247                assert_eq!(st.btree_address, 0x1000);
1248                assert_eq!(st.heap_address, 0x2000);
1249            }
1250            other => panic!("expected SymbolTable, got {:?}", other),
1251        }
1252    }
1253
1254    #[test]
1255    fn v1_continuation_message() {
1256        // Build a continuation payload that points to a second chunk.
1257        // The second chunk contains one unknown message.
1258        let unknown_payload = [0xDD; 2];
1259
1260        // Build the continuation target (a raw v1 message run, no header prefix).
1261        let mut cont_chunk = Vec::new();
1262        // message type 0x00FE
1263        cont_chunk.extend_from_slice(&0x00FEu16.to_le_bytes());
1264        // message data size
1265        cont_chunk.extend_from_slice(&(unknown_payload.len() as u16).to_le_bytes());
1266        // flags
1267        cont_chunk.push(0);
1268        // reserved
1269        cont_chunk.extend_from_slice(&[0u8; 3]);
1270        // payload
1271        cont_chunk.extend_from_slice(&unknown_payload);
1272
1273        // We will place the continuation chunk after the main header.
1274        // First build the main header with a continuation message.
1275        let main_header_base_size = 16; // v1 prefix
1276                                        // The continuation message envelope = 8, payload = offset_size + length_size.
1277                                        // With offset_size=8, length_size=8, the continuation payload is 16 bytes.
1278        let cont_msg_envelope_size = 8 + 16; // 24
1279        let cont_chunk_offset = (main_header_base_size + cont_msg_envelope_size) as u64;
1280
1281        let mut cont_payload = Vec::new();
1282        cont_payload.extend_from_slice(&cont_chunk_offset.to_le_bytes()); // offset
1283        cont_payload.extend_from_slice(&(cont_chunk.len() as u64).to_le_bytes()); // length
1284
1285        let main_header = build_v1_header(&[(MSG_TYPE_CONTINUATION, 0, &cont_payload)], 1);
1286
1287        // Concatenate main header + continuation chunk.
1288        let mut file_data = main_header;
1289        assert_eq!(file_data.len() as u64, cont_chunk_offset);
1290        file_data.extend_from_slice(&cont_chunk);
1291
1292        let hdr = ObjectHeader::parse_at(&file_data, 0, 8, 8).unwrap();
1293        // Should have the continuation marker + the unknown message from the continuation chunk.
1294        assert_eq!(hdr.messages.len(), 2);
1295        assert!(matches!(
1296            hdr.messages[0],
1297            HdfMessage::ObjectHeaderContinuation
1298        ));
1299        match &hdr.messages[1] {
1300            HdfMessage::Unknown { type_id, data } => {
1301                assert_eq!(*type_id, 0x00FE);
1302                assert_eq!(data.as_slice(), &unknown_payload);
1303            }
1304            other => panic!("expected Unknown from continuation, got {:?}", other),
1305        }
1306    }
1307
1308    #[test]
1309    fn v1_nonzero_address_offset() {
1310        // Place the header at a non-zero offset in the file.
1311        let prefix_pad = vec![0xFFu8; 64];
1312        let header = build_v1_header(&[(0x00AA, 0, &[0x01])], 3);
1313
1314        let mut file_data = prefix_pad;
1315        file_data.extend_from_slice(&header);
1316
1317        let hdr = ObjectHeader::parse_at(&file_data, 64, 8, 8).unwrap();
1318        assert_eq!(hdr.version, 1);
1319        assert_eq!(hdr.reference_count, 3);
1320        assert_eq!(hdr.messages.len(), 1);
1321    }
1322
1323    #[test]
1324    fn v1_bad_version() {
1325        let mut data = build_v1_header(&[], 1);
1326        data[0] = 3; // corrupt version to 3
1327        let err = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap_err();
1328        assert!(matches!(err, Error::UnsupportedObjectHeaderVersion(3)));
1329    }
1330
1331    // ------------------------------------------------------------------
1332    // Tests — Version 2
1333    // ------------------------------------------------------------------
1334
1335    #[test]
1336    fn v2_empty_header() {
1337        // Flags=0 → 1-byte size field, no timestamps, no phase change, no creation order.
1338        let data = build_v2_header(0x00, &[], None, None);
1339        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1340        assert_eq!(hdr.version, 2);
1341        assert!(hdr.messages.is_empty());
1342        assert!(hdr.modification_time.is_none());
1343    }
1344
1345    #[test]
1346    fn v2_nil_message() {
1347        let data = build_v2_header(0x00, &[(0x00, 0, &[0u8; 3])], None, None);
1348        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1349        assert_eq!(hdr.messages.len(), 1);
1350        assert!(matches!(hdr.messages[0], HdfMessage::Nil));
1351    }
1352
1353    #[test]
1354    fn v2_unknown_message() {
1355        let payload = [0x11, 0x22];
1356        let data = build_v2_header(0x00, &[(0xFE, 0, &payload)], None, None);
1357        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1358        assert_eq!(hdr.messages.len(), 1);
1359        match &hdr.messages[0] {
1360            HdfMessage::Unknown { type_id, data } => {
1361                assert_eq!(*type_id, 0x00FE);
1362                assert_eq!(data.as_slice(), &payload);
1363            }
1364            other => panic!("expected Unknown, got {:?}", other),
1365        }
1366    }
1367
1368    #[test]
1369    fn v2_with_timestamps() {
1370        // Flags: bit 5 (timestamps) + bits 0-1 = 0 (1-byte size field).
1371        let flags = 0x20;
1372        let ts = [1000u32, 2000, 3000, 4000]; // access, modification, change, birth
1373        let data = build_v2_header(flags, &[], Some(ts), None);
1374        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1375        assert_eq!(hdr.modification_time, Some(2000));
1376    }
1377
1378    #[test]
1379    fn v2_with_phase_change() {
1380        // Flags: bit 4 (phase change) + bits 0-1 = 0.
1381        let flags = 0x10;
1382        let data = build_v2_header(flags, &[], None, Some((8, 6)));
1383        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1384        assert!(hdr.messages.is_empty());
1385    }
1386
1387    #[test]
1388    fn v2_with_creation_order() {
1389        // Flags: bit 2 (creation order tracked) + bits 0-1 = 0.
1390        let flags = 0x04;
1391        let payload = [0xAA];
1392        let data = build_v2_header(flags, &[(0xFE, 0, &payload)], None, None);
1393        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1394        assert_eq!(hdr.messages.len(), 1);
1395        match &hdr.messages[0] {
1396            HdfMessage::Unknown { type_id, .. } => assert_eq!(*type_id, 0x00FE),
1397            other => panic!("expected Unknown, got {:?}", other),
1398        }
1399    }
1400
1401    #[test]
1402    fn v2_2byte_size_field() {
1403        // bits 0-1 = 1 → 2-byte size field.
1404        let flags = 0x01;
1405        let payload = [0x42; 5];
1406        let data = build_v2_header(flags, &[(0xFE, 0, &payload)], None, None);
1407        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1408        assert_eq!(hdr.messages.len(), 1);
1409    }
1410
1411    #[test]
1412    fn v2_4byte_size_field() {
1413        // bits 0-1 = 2 → 4-byte size field.
1414        let flags = 0x02;
1415        let data = build_v2_header(flags, &[(0xFE, 0, &[0x01])], None, None);
1416        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1417        assert_eq!(hdr.messages.len(), 1);
1418    }
1419
1420    #[test]
1421    fn v2_8byte_size_field() {
1422        // bits 0-1 = 3 → 8-byte size field.
1423        let flags = 0x03;
1424        let data = build_v2_header(flags, &[(0xFE, 0, &[0x01])], None, None);
1425        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1426        assert_eq!(hdr.messages.len(), 1);
1427    }
1428
1429    #[test]
1430    fn v2_checksum_mismatch() {
1431        let mut data = build_v2_header(0x00, &[(0xFE, 0, &[0x01])], None, None);
1432        // Corrupt the last byte (part of checksum).
1433        let last = data.len() - 1;
1434        data[last] ^= 0xFF;
1435        let err = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap_err();
1436        assert!(matches!(err, Error::ChecksumMismatch { .. }));
1437    }
1438
1439    #[test]
1440    fn v2_continuation_chunk() {
1441        // Build a continuation chunk (OCHK) that holds one unknown message.
1442        let unknown_payload = [0xCC; 3];
1443        let ochk = build_v2_ochk(&[(0xFD, 0, &unknown_payload)], false);
1444
1445        // The continuation message payload is offset(8) + length(8) = 16 bytes.
1446        // We will compute the offset of the OCHK after building the main OHDR.
1447        // Strategy: build OHDR first with a placeholder, measure its size,
1448        // set the actual offset, then rebuild.
1449
1450        // Placeholder continuation payload (will rewrite).
1451        let mut cont_payload = vec![0u8; 16];
1452
1453        // Build OHDR with the continuation message.  The OHDR occupies:
1454        //   4 (sig) + 1 (ver) + 1 (flags) + 1 (size field, flags=0) + messages + 4 (checksum)
1455        // Message envelope: type(1) + size(2) + flags(1) = 4; payload = 16.
1456        // Total OHDR = 4 + 1 + 1 + 1 + 4 + 16 + 4 = 31 bytes.
1457        // The OCHK starts at byte 31.
1458
1459        // We need the offset to be the byte where OCHK starts.
1460        // OHDR: sig(4) + ver(1) + flags(1) + size(1) + [envelope(4)+payload(16)] + checksum(4) = 31
1461        let ohdr_size = 4 + 1 + 1 + 1 + (4 + cont_payload.len()) + 4;
1462        let ochk_offset = ohdr_size as u64;
1463
1464        // Rebuild continuation payload with correct offset.
1465        cont_payload.clear();
1466        cont_payload.extend_from_slice(&ochk_offset.to_le_bytes());
1467        cont_payload.extend_from_slice(&(ochk.len() as u64).to_le_bytes());
1468
1469        let ohdr = build_v2_header(0x00, &[(0x10, 0, &cont_payload)], None, None);
1470        assert_eq!(ohdr.len(), ohdr_size);
1471
1472        let mut file_data = ohdr;
1473        file_data.extend_from_slice(&ochk);
1474
1475        let hdr = ObjectHeader::parse_at(&file_data, 0, 8, 8).unwrap();
1476        // Should have: continuation marker + unknown message from OCHK.
1477        assert_eq!(hdr.messages.len(), 2);
1478        assert!(matches!(
1479            hdr.messages[0],
1480            HdfMessage::ObjectHeaderContinuation
1481        ));
1482        match &hdr.messages[1] {
1483            HdfMessage::Unknown { type_id, data } => {
1484                assert_eq!(*type_id, 0x00FD);
1485                assert_eq!(data.as_slice(), &unknown_payload);
1486            }
1487            other => panic!("expected Unknown from OCHK, got {:?}", other),
1488        }
1489    }
1490
1491    #[test]
1492    fn v2_ochk_checksum_mismatch() {
1493        let unknown_payload = [0xCC; 3];
1494        let mut ochk = build_v2_ochk(&[(0xFD, 0, &unknown_payload)], false);
1495        // Corrupt OCHK checksum.
1496        let last = ochk.len() - 1;
1497        ochk[last] ^= 0xFF;
1498
1499        let ohdr_size = 4 + 1 + 1 + 1 + (4 + 16) + 4; // 31
1500        let ochk_offset = ohdr_size as u64;
1501
1502        let mut cont_payload = Vec::new();
1503        cont_payload.extend_from_slice(&ochk_offset.to_le_bytes());
1504        cont_payload.extend_from_slice(&(ochk.len() as u64).to_le_bytes());
1505
1506        let ohdr = build_v2_header(0x00, &[(0x10, 0, &cont_payload)], None, None);
1507        let mut file_data = ohdr;
1508        file_data.extend_from_slice(&ochk);
1509
1510        let err = ObjectHeader::parse_at(&file_data, 0, 8, 8).unwrap_err();
1511        assert!(matches!(err, Error::ChecksumMismatch { .. }));
1512    }
1513
1514    #[test]
1515    fn v2_multiple_messages() {
1516        // Two unknown messages in the same chunk.
1517        let p1 = [0x01, 0x02];
1518        let p2 = [0x03, 0x04, 0x05];
1519        let data = build_v2_header(0x00, &[(0xA0, 0, &p1), (0xA1, 0, &p2)], None, None);
1520        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1521        assert_eq!(hdr.messages.len(), 2);
1522        match &hdr.messages[0] {
1523            HdfMessage::Unknown { type_id, .. } => assert_eq!(*type_id, 0x00A0),
1524            other => panic!("expected Unknown 0xA0, got {:?}", other),
1525        }
1526        match &hdr.messages[1] {
1527            HdfMessage::Unknown { type_id, .. } => assert_eq!(*type_id, 0x00A1),
1528            other => panic!("expected Unknown 0xA1, got {:?}", other),
1529        }
1530    }
1531
1532    #[test]
1533    fn v2_zero_length_nil_before_more_messages() {
1534        let p1 = [0xAA];
1535        let p2 = [0xBB];
1536        let data = build_v2_header(
1537            0x04,
1538            &[(0xFE, 0, &p1), (0x00, 0, &[]), (0xFD, 0, &p2)],
1539            None,
1540            None,
1541        );
1542        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1543        assert_eq!(hdr.messages.len(), 3);
1544        assert!(matches!(hdr.messages[0], HdfMessage::Unknown { .. }));
1545        assert!(matches!(hdr.messages[1], HdfMessage::Nil));
1546        assert!(matches!(hdr.messages[2], HdfMessage::Unknown { .. }));
1547    }
1548
1549    #[test]
1550    fn v2_nonzero_address() {
1551        // Place the OHDR at offset 128 in a larger buffer.
1552        let prefix_pad = vec![0u8; 128];
1553        let ohdr = build_v2_header(0x00, &[(0xFE, 0, &[0x42])], None, None);
1554
1555        let mut file_data = prefix_pad;
1556        file_data.extend_from_slice(&ohdr);
1557
1558        let hdr = ObjectHeader::parse_at(&file_data, 128, 8, 8).unwrap();
1559        assert_eq!(hdr.version, 2);
1560        assert_eq!(hdr.messages.len(), 1);
1561    }
1562
1563    #[test]
1564    fn v2_all_flags_combined() {
1565        // Combine timestamps (0x20) + phase change (0x10) + creation order (0x04) + 2-byte size (0x01).
1566        let flags = 0x20 | 0x10 | 0x04 | 0x01;
1567        let ts = [100u32, 200, 300, 400];
1568        let payload = [0xBB];
1569        let data = build_v2_header(flags, &[(0xFE, 0, &payload)], Some(ts), Some((12, 8)));
1570        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1571        assert_eq!(hdr.version, 2);
1572        assert_eq!(hdr.modification_time, Some(200));
1573        assert_eq!(hdr.messages.len(), 1);
1574    }
1575
1576    #[test]
1577    fn v1_multiple_messages() {
1578        // Two messages in a single v1 header.
1579        let p1 = [0xAA; 4];
1580        let p2 = [0xBB; 8];
1581        let data = build_v1_header(&[(0x00FF, 0, &p1), (0x00FE, 0, &p2)], 5);
1582        let hdr = ObjectHeader::parse_at(&data, 0, 8, 8).unwrap();
1583        assert_eq!(hdr.version, 1);
1584        assert_eq!(hdr.reference_count, 5);
1585        assert_eq!(hdr.messages.len(), 2);
1586    }
1587
1588    #[test]
1589    fn v1_4byte_offsets() {
1590        // Verify correct operation with 4-byte offset/length sizes.
1591        // Symbol table message with 4-byte addresses.
1592        let mut payload = Vec::new();
1593        payload.extend_from_slice(&0x1000u32.to_le_bytes());
1594        payload.extend_from_slice(&0x2000u32.to_le_bytes());
1595
1596        let data = build_v1_header(&[(0x0011, 0, &payload)], 1);
1597        let hdr = ObjectHeader::parse_at(&data, 0, 4, 4).unwrap();
1598        assert_eq!(hdr.messages.len(), 1);
1599        match &hdr.messages[0] {
1600            HdfMessage::SymbolTable(st) => {
1601                assert_eq!(st.btree_address, 0x1000);
1602                assert_eq!(st.heap_address, 0x2000);
1603            }
1604            other => panic!("expected SymbolTable, got {:?}", other),
1605        }
1606    }
1607}