Skip to main content

hdf5_reader/
btree_v2.rs

1//! HDF5 B-tree Version 2.
2//!
3//! V2 B-trees are used by newer-style groups and datasets for indexed link
4//! storage, attribute storage, and chunked dataset indexing. The header
5//! (`BTHD`) describes the tree parameters. Internal nodes (`BTIN`) and leaf
6//! nodes (`BTLF`) contain the actual records.
7//!
8//! This module provides the header parse, record types, and a traversal
9//! function that collects all records from a tree.
10
11use std::collections::HashSet;
12
13use crate::checksum::jenkins_lookup3;
14use crate::error::{Error, Result};
15use crate::io::Cursor;
16use crate::storage::Storage;
17
18// ---------------------------------------------------------------------------
19// Signatures
20// ---------------------------------------------------------------------------
21
22const BTHD_SIGNATURE: [u8; 4] = *b"BTHD";
23const BTIN_SIGNATURE: [u8; 4] = *b"BTIN";
24const BTLF_SIGNATURE: [u8; 4] = *b"BTLF";
25const MAX_BTREE_V2_DEPTH: u16 = 64;
26
27// ---------------------------------------------------------------------------
28// Header
29// ---------------------------------------------------------------------------
30
31/// Parsed B-tree v2 header.
32#[derive(Debug, Clone)]
33pub struct BTreeV2Header {
34    /// B-tree type (determines the record format).
35    pub btree_type: u8,
36    /// Size in bytes of each B-tree node (both internal and leaf).
37    pub node_size: u32,
38    /// Size in bytes of each record.
39    pub record_size: u16,
40    /// Depth of the tree (0 = root is a leaf).
41    pub depth: u16,
42    /// Percent full at which to split a node.
43    pub split_percent: u8,
44    /// Percent full at which to merge a node.
45    pub merge_percent: u8,
46    /// Address of the root node.
47    pub root_node_address: u64,
48    /// Number of records in the root node.
49    pub num_records_in_root: u16,
50    /// Total number of records in the entire tree.
51    pub total_records: u64,
52}
53
54impl BTreeV2Header {
55    /// Parse a B-tree v2 header at the current cursor position.
56    ///
57    /// Format:
58    /// - Signature: `BTHD` (4 bytes)
59    /// - Version: 0 (1 byte)
60    /// - B-tree type (u8)
61    /// - Node size (u32 LE)
62    /// - Record size (u16 LE)
63    /// - Depth (u16 LE)
64    /// - Split percent (u8)
65    /// - Merge percent (u8)
66    /// - Root node address (`offset_size` bytes)
67    /// - Number of records in root node (u16 LE)
68    /// - Total number of records in tree (`length_size` bytes)
69    /// - Checksum (u32 LE)
70    pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
71        let start = cursor.position();
72
73        let sig = cursor.read_bytes(4)?;
74        if sig != BTHD_SIGNATURE {
75            return Err(Error::InvalidBTreeV2Signature { context: "header" });
76        }
77
78        let version = cursor.read_u8()?;
79        if version != 0 {
80            return Err(Error::UnsupportedBTreeVersion(version));
81        }
82
83        let btree_type = cursor.read_u8()?;
84        let node_size = cursor.read_u32_le()?;
85        let record_size = cursor.read_u16_le()?;
86        let depth = cursor.read_u16_le()?;
87        let split_percent = cursor.read_u8()?;
88        let merge_percent = cursor.read_u8()?;
89        let root_node_address = cursor.read_offset(offset_size)?;
90        let num_records_in_root = cursor.read_u16_le()?;
91        let total_records = cursor.read_length(length_size)?;
92
93        // Checksum covers everything from signature through total_records.
94        let checksum_end = cursor.position();
95        let stored_checksum = cursor.read_u32_le()?;
96
97        let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_end as usize]);
98        if computed != stored_checksum {
99            return Err(Error::ChecksumMismatch {
100                expected: stored_checksum,
101                actual: computed,
102            });
103        }
104
105        Ok(BTreeV2Header {
106            btree_type,
107            node_size,
108            record_size,
109            depth,
110            split_percent,
111            merge_percent,
112            root_node_address,
113            num_records_in_root,
114            total_records,
115        })
116    }
117
118    /// Parse a B-tree v2 header from random-access storage.
119    pub fn parse_at_storage(
120        storage: &dyn Storage,
121        address: u64,
122        offset_size: u8,
123        length_size: u8,
124    ) -> Result<Self> {
125        let header_len = 4
126            + 1
127            + 1
128            + 4
129            + 2
130            + 2
131            + 1
132            + 1
133            + usize::from(offset_size)
134            + 2
135            + usize::from(length_size)
136            + 4;
137        let bytes = storage.read_range(address, header_len)?;
138        let mut cursor = Cursor::new(bytes.as_ref());
139        Self::parse(&mut cursor, offset_size, length_size)
140    }
141}
142
143// ---------------------------------------------------------------------------
144// Records
145// ---------------------------------------------------------------------------
146
147/// A record from a B-tree v2.
148///
149/// The record format depends on the B-tree type field in the header.
150#[derive(Debug, Clone)]
151pub enum BTreeV2Record {
152    /// Type 1: indirectly accessed, non-filtered huge fractal heap object.
153    HugeIndirectNonFiltered {
154        address: u64,
155        length: u64,
156        object_id: u64,
157    },
158    /// Type 2: indirectly accessed, filtered huge fractal heap object.
159    HugeIndirectFiltered {
160        address: u64,
161        filtered_length: u64,
162        filter_mask: u32,
163        memory_length: u64,
164        object_id: u64,
165    },
166    /// Type 3: directly accessed, non-filtered huge fractal heap object.
167    HugeDirectNonFiltered { address: u64, length: u64 },
168    /// Type 4: directly accessed, filtered huge fractal heap object.
169    HugeDirectFiltered {
170        address: u64,
171        filtered_length: u64,
172        filter_mask: u32,
173        memory_length: u64,
174    },
175    /// Type 5: Link name for indexed group (hashed).
176    LinkNameHash { hash: u32, heap_id: Vec<u8> },
177    /// Type 6: Creation order for indexed group.
178    CreationOrder { order: u64, heap_id: Vec<u8> },
179    /// Type 8: Attribute name for indexed group (hashed).
180    AttributeNameHash {
181        hash: u32,
182        flags: u8,
183        creation_order: u32,
184        heap_id: Vec<u8>,
185    },
186    /// Type 9: Attribute creation order.
187    AttributeCreationOrder { order: u32, heap_id: Vec<u8> },
188    /// Type 10: Non-filtered chunked dataset record (v2 chunk index).
189    ChunkedNonFiltered { address: u64, offsets: Vec<u64> },
190    /// Type 11: Filtered chunked dataset record (v2 chunk index).
191    ChunkedFiltered {
192        address: u64,
193        chunk_size: u64,
194        filter_mask: u32,
195        offsets: Vec<u64>,
196    },
197    /// Type 7: shared object-header message stored in the SOHM heap.
198    SharedMessageHeap {
199        hash: u32,
200        reference_count: u32,
201        heap_id: Vec<u8>,
202    },
203    /// Type 7: shared object-header message stored in an object header.
204    SharedMessageObjectHeader {
205        hash: u32,
206        message_type: u16,
207        object_header_index: u16,
208        object_header_address: u64,
209    },
210    /// Unknown/unsupported record type — raw bytes preserved.
211    Unknown { record_type: u8, data: Vec<u8> },
212}
213
214// ---------------------------------------------------------------------------
215// Record parsing
216// ---------------------------------------------------------------------------
217
218/// Parse a single record of the given B-tree type.
219#[allow(clippy::too_many_arguments)]
220fn parse_record(
221    cursor: &mut Cursor,
222    btree_type: u8,
223    record_size: u16,
224    offset_size: u8,
225    length_size: u8,
226    ndims: Option<u32>,
227    chunk_dims: &[u32],
228    heap_id_len: usize,
229) -> Result<BTreeV2Record> {
230    let record_start = cursor.position();
231
232    let record = match btree_type {
233        // Type 1: indirectly accessed, non-filtered huge fractal heap object.
234        1 => BTreeV2Record::HugeIndirectNonFiltered {
235            address: cursor.read_offset(offset_size)?,
236            length: cursor.read_length(length_size)?,
237            object_id: cursor.read_length(length_size)?,
238        },
239
240        // Type 2: indirectly accessed, filtered huge fractal heap object.
241        2 => BTreeV2Record::HugeIndirectFiltered {
242            address: cursor.read_offset(offset_size)?,
243            filtered_length: cursor.read_length(length_size)?,
244            filter_mask: cursor.read_u32_le()?,
245            memory_length: cursor.read_length(length_size)?,
246            object_id: cursor.read_length(length_size)?,
247        },
248
249        // Type 3: directly accessed, non-filtered huge fractal heap object.
250        3 => BTreeV2Record::HugeDirectNonFiltered {
251            address: cursor.read_offset(offset_size)?,
252            length: cursor.read_length(length_size)?,
253        },
254
255        // Type 4: directly accessed, filtered huge fractal heap object.
256        4 => BTreeV2Record::HugeDirectFiltered {
257            address: cursor.read_offset(offset_size)?,
258            filtered_length: cursor.read_length(length_size)?,
259            filter_mask: cursor.read_u32_le()?,
260            memory_length: cursor.read_length(length_size)?,
261        },
262
263        // Type 5: link name hash
264        5 => {
265            let hash = cursor.read_u32_le()?;
266            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
267            BTreeV2Record::LinkNameHash { hash, heap_id }
268        }
269
270        // Type 6: creation order
271        6 => {
272            let order = cursor.read_u64_le()?;
273            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
274            BTreeV2Record::CreationOrder { order, heap_id }
275        }
276
277        // Type 7: shared object-header messages.
278        7 => {
279            let location = cursor.read_u8()?;
280            cursor.skip(3)?;
281            let hash = cursor.read_u32_le()?;
282            match location {
283                0 => {
284                    let reference_count = cursor.read_u32_le()?;
285                    let heap_id = cursor.read_bytes(8)?.to_vec();
286                    BTreeV2Record::SharedMessageHeap {
287                        hash,
288                        reference_count,
289                        heap_id,
290                    }
291                }
292                1 => {
293                    let _reserved = cursor.read_u8()?;
294                    let message_type = u16::from(cursor.read_u8()?);
295                    let object_header_index = cursor.read_u16_le()?;
296                    let object_header_address = cursor.read_offset(offset_size)?;
297                    BTreeV2Record::SharedMessageObjectHeader {
298                        hash,
299                        message_type,
300                        object_header_index,
301                        object_header_address,
302                    }
303                }
304                other => {
305                    return Err(Error::InvalidData(format!(
306                        "unknown SOHM B-tree record location: {other}"
307                    )));
308                }
309            }
310        }
311
312        // Type 8: attribute name hash
313        8 => {
314            let hash = cursor.read_u32_le()?;
315            let flags = cursor.read_u8()?;
316            let creation_order = cursor.read_u32_le()?;
317            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
318            BTreeV2Record::AttributeNameHash {
319                hash,
320                flags,
321                creation_order,
322                heap_id,
323            }
324        }
325
326        // Type 9: attribute creation order
327        9 => {
328            let order = cursor.read_u32_le()?;
329            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
330            BTreeV2Record::AttributeCreationOrder { order, heap_id }
331        }
332
333        // Type 10: non-filtered chunk
334        10 => {
335            // Chunk offsets are encoded as scaled 64-bit values.
336            // The number of offset dimensions is calculated from the record size.
337            // Each offset is 8 bytes in a type-10 record.
338            let offset_bytes_available = record_payload_len(
339                record_size,
340                offset_size as usize,
341                "B-tree v2 type-10 chunk record is shorter than its address",
342            )?;
343            let address = cursor.read_offset(offset_size)?;
344            let num_offsets = offset_bytes_available / 8;
345            let offsets = read_scaled_chunk_offsets(cursor, num_offsets, ndims, chunk_dims)?;
346            BTreeV2Record::ChunkedNonFiltered { address, offsets }
347        }
348
349        // Type 11: filtered chunk
350        11 => {
351            // nbytes (chunk size on disk) is encoded using length_size bytes.
352            let nbytes_size = length_size as usize;
353            let fixed_size = offset_size as usize + nbytes_size + 4; // filter_mask
354            let remaining = record_payload_len(
355                record_size,
356                fixed_size,
357                "B-tree v2 type-11 chunk record is shorter than its fixed fields",
358            )?;
359            let address = cursor.read_offset(offset_size)?;
360            let chunk_size = cursor.read_length(length_size)?;
361            let filter_mask = cursor.read_u32_le()?;
362            let num_offsets = remaining / 8;
363            let offsets = read_scaled_chunk_offsets(cursor, num_offsets, ndims, chunk_dims)?;
364            BTreeV2Record::ChunkedFiltered {
365                address,
366                chunk_size,
367                filter_mask,
368                offsets,
369            }
370        }
371
372        // Unknown type — read raw bytes.
373        _ => {
374            let data = cursor.read_bytes(record_size as usize)?.to_vec();
375            return Ok(BTreeV2Record::Unknown {
376                record_type: btree_type,
377                data,
378            });
379        }
380    };
381
382    // Ensure we consumed no more than record_size bytes, then skip padding.
383    let consumed = cursor.position() - record_start;
384    let record_size = u64::from(record_size);
385    if consumed > record_size {
386        return Err(Error::InvalidData(format!(
387            "B-tree v2 type-{btree_type} record consumed {consumed} bytes but record size is {record_size}"
388        )));
389    }
390    if consumed < record_size {
391        cursor.skip((record_size - consumed) as usize)?;
392    }
393
394    Ok(record)
395}
396
397fn record_payload_len(record_size: u16, fixed_size: usize, error_message: &str) -> Result<usize> {
398    (record_size as usize)
399        .checked_sub(fixed_size)
400        .ok_or_else(|| Error::InvalidData(error_message.into()))
401}
402
403fn read_scaled_chunk_offsets(
404    cursor: &mut Cursor,
405    num_offsets: usize,
406    ndims: Option<u32>,
407    chunk_dims: &[u32],
408) -> Result<Vec<u64>> {
409    if let Some(ndims) = ndims {
410        let ndims = usize::try_from(ndims)
411            .map_err(|_| Error::InvalidData("B-tree v2 chunk rank exceeds usize".into()))?;
412        if num_offsets != ndims {
413            return Err(Error::InvalidData(format!(
414                "B-tree v2 chunk record has {num_offsets} offsets for {ndims} dimensions"
415            )));
416        }
417    }
418
419    if !chunk_dims.is_empty() && num_offsets != chunk_dims.len() {
420        return Err(Error::InvalidData(format!(
421            "B-tree v2 chunk record has {num_offsets} offsets but {} chunk dimensions",
422            chunk_dims.len()
423        )));
424    }
425
426    let mut offsets = Vec::with_capacity(num_offsets);
427    for dim in 0..num_offsets {
428        let scaled = cursor.read_u64_le()?;
429        let chunk_extent = chunk_dims.get(dim).copied().unwrap_or(1);
430        let offset = scaled
431            .checked_mul(u64::from(chunk_extent))
432            .ok_or_else(|| Error::InvalidData("B-tree v2 chunk offset overflows u64".into()))?;
433        offsets.push(offset);
434    }
435    Ok(offsets)
436}
437
438fn record_matches_chunk_bounds(
439    record: &BTreeV2Record,
440    chunk_dims: &[u32],
441    chunk_bounds: Option<(&[u64], &[u64])>,
442) -> bool {
443    let Some((first_chunk, last_chunk)) = chunk_bounds else {
444        return true;
445    };
446
447    let offsets = match record {
448        BTreeV2Record::ChunkedNonFiltered { offsets, .. }
449        | BTreeV2Record::ChunkedFiltered { offsets, .. } => offsets,
450        _ => return true,
451    };
452
453    offsets.iter().enumerate().all(|(dim, offset)| {
454        let chunk_index = *offset / u64::from(chunk_dims[dim]);
455        chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
456    })
457}
458
459fn record_matches_link_name_hash(record: &BTreeV2Record, target_hash: Option<u32>) -> bool {
460    match target_hash {
461        Some(target_hash) => {
462            matches!(record, BTreeV2Record::LinkNameHash { hash, .. } if *hash == target_hash)
463        }
464        None => true,
465    }
466}
467
468fn record_matches_query(
469    record: &BTreeV2Record,
470    chunk_dims: &[u32],
471    chunk_bounds: Option<(&[u64], &[u64])>,
472    link_name_hash: Option<u32>,
473) -> bool {
474    record_matches_link_name_hash(record, link_name_hash)
475        && record_matches_chunk_bounds(record, chunk_dims, chunk_bounds)
476}
477
478fn link_name_hash(record: &BTreeV2Record) -> Option<u32> {
479    match record {
480        BTreeV2Record::LinkNameHash { hash, .. } => Some(*hash),
481        _ => None,
482    }
483}
484
485fn child_may_match_link_name_hash(
486    records: &[BTreeV2Record],
487    child_index: usize,
488    target_hash: Option<u32>,
489) -> bool {
490    let Some(target_hash) = target_hash else {
491        return true;
492    };
493
494    let lower_matches = child_index == 0
495        || link_name_hash(&records[child_index - 1]).map_or(true, |hash| target_hash >= hash);
496    let upper_matches = child_index == records.len()
497        || link_name_hash(&records[child_index]).map_or(true, |hash| target_hash <= hash);
498    lower_matches && upper_matches
499}
500
501fn validate_btree_v2_depth(depth: u16) -> Result<()> {
502    if depth > MAX_BTREE_V2_DEPTH {
503        return Err(Error::InvalidData(format!(
504            "B-tree v2 depth {depth} exceeds traversal limit {MAX_BTREE_V2_DEPTH}"
505        )));
506    }
507    Ok(())
508}
509
510fn enter_btree_v2_node(visited: &mut HashSet<u64>, address: u64) -> Result<()> {
511    if !visited.insert(address) {
512        return Err(Error::InvalidData(format!(
513            "B-tree v2 traversal revisits node at offset {address:#x}"
514        )));
515    }
516    Ok(())
517}
518
519// ---------------------------------------------------------------------------
520// Node parsing
521// ---------------------------------------------------------------------------
522
523/// Compute the number of bytes needed to represent `max_records` as an
524/// unsigned integer (used for child-node record counts in internal nodes).
525fn num_records_size(max_records: u64) -> usize {
526    if max_records <= 0xFF {
527        1
528    } else if max_records <= 0xFFFF {
529        2
530    } else if max_records <= 0xFFFF_FFFF {
531        4
532    } else {
533        8
534    }
535}
536
537/// Compute the maximum number of records that fit in a leaf node.
538fn max_leaf_records(node_size: u32, record_size: u16) -> u64 {
539    // Leaf node overhead: signature(4) + version(1) + type(1) + checksum(4) = 10
540    let overhead = 10u32;
541    if node_size <= overhead || record_size == 0 {
542        return 0;
543    }
544    ((node_size - overhead) / record_size as u32) as u64
545}
546
547/// Compute the maximum number of records that fit in an internal node.
548/// This depends on the pointer size (offset_size) and the number-of-records
549/// encoding for child nodes, which makes it recursive in principle. We use
550/// an iterative approach.
551fn max_internal_records(
552    node_size: u32,
553    record_size: u16,
554    offset_size: u8,
555    max_child_records: u64,
556) -> u64 {
557    // Internal node overhead: signature(4) + version(1) + type(1) + checksum(4) = 10
558    let overhead = 10u32;
559    if node_size <= overhead || record_size == 0 {
560        return 0;
561    }
562    let available = (node_size - overhead) as u64;
563    // Each entry in an internal node is: record(record_size) + child_pointer(offset_size) + num_records(var)
564    let nrec_size = num_records_size(max_child_records) as u64;
565    let entry_size = record_size as u64 + offset_size as u64 + nrec_size;
566    // There is one more child pointer + num_records than records.
567    // So: n * record_size + (n+1) * (offset_size + nrec_size) <= available
568    // => n * (record_size + offset_size + nrec_size) + offset_size + nrec_size <= available
569    let extra = offset_size as u64 + nrec_size;
570    if available <= extra {
571        return 0;
572    }
573    (available - extra) / entry_size
574}
575
576fn node_checksum_bounds(
577    cursor: &Cursor<'_>,
578    start: u64,
579    node_size: u32,
580    compact_end: u64,
581    context: &'static str,
582) -> Result<usize> {
583    let start = usize::try_from(start)
584        .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} offset is too large")))?;
585    let node_size = usize::try_from(node_size)
586        .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} size is too large")))?;
587    if node_size < 10 {
588        return Err(Error::InvalidData(format!(
589            "B-tree v2 {context} node is too small: {node_size} bytes"
590        )));
591    }
592
593    let checksum_pos = usize::try_from(compact_end)
594        .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} checksum is too large")))?;
595    if checksum_pos < start {
596        return Err(Error::InvalidData(format!(
597            "B-tree v2 {context} payload starts before node"
598        )));
599    }
600    let node_end = start
601        .checked_add(node_size)
602        .ok_or_else(|| Error::InvalidData(format!("B-tree v2 {context} size overflow")))?;
603
604    if node_end > cursor.data().len() {
605        return Err(Error::UnexpectedEof {
606            offset: start as u64,
607            needed: node_size as u64,
608            available: cursor.data().len().saturating_sub(start) as u64,
609        });
610    }
611    let checksum_end = checksum_pos
612        .checked_add(4)
613        .ok_or_else(|| Error::InvalidData(format!("B-tree v2 {context} checksum overflow")))?;
614    if checksum_end > node_end {
615        return Err(Error::InvalidData(format!(
616            "B-tree v2 {context} contents exceed node size"
617        )));
618    }
619
620    Ok(checksum_pos)
621}
622
623fn verify_node_checksum(
624    cursor: &mut Cursor<'_>,
625    start: u64,
626    node_size: u32,
627    compact_end: u64,
628    context: &'static str,
629) -> Result<()> {
630    let checksum_pos = node_checksum_bounds(cursor, start, node_size, compact_end, context)?;
631    cursor.set_position(checksum_pos as u64);
632    let stored_checksum = cursor.read_u32_le()?;
633    let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_pos]);
634    if computed != stored_checksum {
635        return Err(Error::ChecksumMismatch {
636            expected: stored_checksum,
637            actual: computed,
638        });
639    }
640    Ok(())
641}
642
643/// Parse a leaf node and collect its records.
644#[allow(clippy::too_many_arguments)]
645fn parse_leaf_node(
646    cursor: &mut Cursor,
647    header: &BTreeV2Header,
648    offset_size: u8,
649    length_size: u8,
650    ndims: Option<u32>,
651    chunk_dims: &[u32],
652    chunk_bounds: Option<(&[u64], &[u64])>,
653    link_name_hash: Option<u32>,
654    num_records: u16,
655    heap_id_len: usize,
656    records: &mut Vec<BTreeV2Record>,
657) -> Result<()> {
658    let start = cursor.position();
659
660    let sig = cursor.read_bytes(4)?;
661    if sig != BTLF_SIGNATURE {
662        return Err(Error::InvalidBTreeV2Signature {
663            context: "leaf node",
664        });
665    }
666
667    let version = cursor.read_u8()?;
668    if version != 0 {
669        return Err(Error::UnsupportedBTreeVersion(version));
670    }
671
672    let node_type = cursor.read_u8()?;
673    if node_type != header.btree_type {
674        return Err(Error::InvalidData(format!(
675            "B-tree v2 leaf node type mismatch: header says {}, node says {}",
676            header.btree_type, node_type
677        )));
678    }
679
680    for _ in 0..num_records {
681        let record = parse_record(
682            cursor,
683            header.btree_type,
684            header.record_size,
685            offset_size,
686            length_size,
687            ndims,
688            chunk_dims,
689            heap_id_len,
690        )?;
691        if record_matches_query(&record, chunk_dims, chunk_bounds, link_name_hash) {
692            records.push(record);
693        }
694    }
695
696    verify_node_checksum(
697        cursor,
698        start,
699        header.node_size,
700        cursor.position(),
701        "leaf node",
702    )?;
703
704    Ok(())
705}
706
707/// Parse an internal node, collecting child addresses and recursing.
708#[allow(clippy::too_many_arguments)]
709fn parse_internal_node(
710    data: &[u8],
711    address: u64,
712    header: &BTreeV2Header,
713    offset_size: u8,
714    length_size: u8,
715    ndims: Option<u32>,
716    chunk_dims: &[u32],
717    chunk_bounds: Option<(&[u64], &[u64])>,
718    link_name_hash: Option<u32>,
719    num_records: u16,
720    depth: u16,
721    heap_id_len: usize,
722    visited: &mut HashSet<u64>,
723    records: &mut Vec<BTreeV2Record>,
724) -> Result<()> {
725    if depth == 0 {
726        return Err(Error::InvalidData(
727            "B-tree v2 internal node traversal reached depth zero".into(),
728        ));
729    }
730    enter_btree_v2_node(visited, address)?;
731
732    let mut cursor = Cursor::new(data);
733    cursor.set_position(address);
734
735    let start = cursor.position();
736
737    let sig = cursor.read_bytes(4)?;
738    if sig != BTIN_SIGNATURE {
739        return Err(Error::InvalidBTreeV2Signature {
740            context: "internal node",
741        });
742    }
743
744    let version = cursor.read_u8()?;
745    if version != 0 {
746        return Err(Error::UnsupportedBTreeVersion(version));
747    }
748
749    let node_type = cursor.read_u8()?;
750    if node_type != header.btree_type {
751        return Err(Error::InvalidData(format!(
752            "B-tree v2 internal node type mismatch: header says {}, node says {}",
753            header.btree_type, node_type
754        )));
755    }
756
757    // Compute max records for children to know the encoding size for
758    // child record counts.
759    let max_child_records = if depth == 1 {
760        max_leaf_records(header.node_size, header.record_size)
761    } else {
762        // For deeper trees, compute iteratively.
763        let leaf_max = max_leaf_records(header.node_size, header.record_size);
764        let mut prev_max = leaf_max;
765        for _ in 1..depth {
766            prev_max =
767                max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
768        }
769        prev_max
770    };
771    let nrec_bytes = num_records_size(max_child_records);
772
773    // Read records and child pointers interleaved:
774    // child[0], record[0], child[1], record[1], ..., record[n-1], child[n]
775    // Plus total_records counts for each child.
776    //
777    // Actually per the HDF5 spec the layout is:
778    // record[0], record[1], ..., record[n-1],
779    // child_ptr[0], nrec[0], total[0], child_ptr[1], nrec[1], total[1], ..., child_ptr[n], nrec[n], total[n]
780    //
781    // Records first, then child pointers with their metadata.
782
783    // Read all records.
784    let mut node_records = Vec::with_capacity(num_records as usize);
785    for _ in 0..num_records {
786        let record = parse_record(
787            &mut cursor,
788            header.btree_type,
789            header.record_size,
790            offset_size,
791            length_size,
792            ndims,
793            chunk_dims,
794            heap_id_len,
795        )?;
796        node_records.push(record);
797    }
798
799    // Read child node pointers (num_records + 1 of them).
800    let num_children = num_records as usize + 1;
801
802    // Whether to include a "total records" field for each child.
803    // This is present when depth > 1.
804    let has_total_records = depth > 1;
805    // total_records encoding size for deeper nodes
806    let total_nrec_bytes = if has_total_records {
807        // Total records in a sub-tree — need enough bytes to hold the
808        // maximum total records. We use length_size as an upper bound.
809        length_size as usize
810    } else {
811        0
812    };
813
814    let mut child_addresses = Vec::with_capacity(num_children);
815    let mut child_nrecords = Vec::with_capacity(num_children);
816
817    for _ in 0..num_children {
818        let child_addr = cursor.read_offset(offset_size)?;
819        child_addresses.push(child_addr);
820        let nrec = cursor.read_uvar(nrec_bytes)?;
821        child_nrecords.push(nrec as u16);
822        if has_total_records {
823            // Skip total records count
824            cursor.read_uvar(total_nrec_bytes)?;
825        }
826    }
827
828    let compact_end = cursor.position();
829    verify_node_checksum(
830        &mut cursor,
831        start,
832        header.node_size,
833        compact_end,
834        "internal node",
835    )?;
836
837    // HDF5's v2 B-tree iterator visits child[i], then record[i], then the
838    // final child. Internal-node records are real records, not only separators.
839    let child_depth = depth - 1;
840    for (i, record) in node_records.iter().enumerate() {
841        let child_addr = child_addresses[i];
842        if !Cursor::is_undefined_offset(child_addr, offset_size)
843            && child_may_match_link_name_hash(&node_records, i, link_name_hash)
844        {
845            let child_nrec = child_nrecords[i];
846            if child_depth == 0 {
847                enter_btree_v2_node(visited, child_addr)?;
848                let mut child_cursor = Cursor::new(data);
849                child_cursor.set_position(child_addr);
850                parse_leaf_node(
851                    &mut child_cursor,
852                    header,
853                    offset_size,
854                    length_size,
855                    ndims,
856                    chunk_dims,
857                    chunk_bounds,
858                    link_name_hash,
859                    child_nrec,
860                    heap_id_len,
861                    records,
862                )?;
863            } else {
864                parse_internal_node(
865                    data,
866                    child_addr,
867                    header,
868                    offset_size,
869                    length_size,
870                    ndims,
871                    chunk_dims,
872                    chunk_bounds,
873                    link_name_hash,
874                    child_nrec,
875                    child_depth,
876                    heap_id_len,
877                    visited,
878                    records,
879                )?;
880            }
881        }
882
883        if record_matches_query(record, chunk_dims, chunk_bounds, link_name_hash) {
884            records.push(record.clone());
885        }
886    }
887
888    let final_child_index = node_records.len();
889    let final_child_addr = child_addresses[final_child_index];
890    if !Cursor::is_undefined_offset(final_child_addr, offset_size)
891        && child_may_match_link_name_hash(&node_records, final_child_index, link_name_hash)
892    {
893        let child_nrec = child_nrecords[final_child_index];
894        if child_depth == 0 {
895            enter_btree_v2_node(visited, final_child_addr)?;
896            let mut child_cursor = Cursor::new(data);
897            child_cursor.set_position(final_child_addr);
898            parse_leaf_node(
899                &mut child_cursor,
900                header,
901                offset_size,
902                length_size,
903                ndims,
904                chunk_dims,
905                chunk_bounds,
906                link_name_hash,
907                child_nrec,
908                heap_id_len,
909                records,
910            )?;
911        } else {
912            parse_internal_node(
913                data,
914                final_child_addr,
915                header,
916                offset_size,
917                length_size,
918                ndims,
919                chunk_dims,
920                chunk_bounds,
921                link_name_hash,
922                child_nrec,
923                child_depth,
924                heap_id_len,
925                visited,
926                records,
927            )?;
928        }
929    }
930
931    Ok(())
932}
933
934// ---------------------------------------------------------------------------
935// Public traversal
936// ---------------------------------------------------------------------------
937
938/// Collect all records from a B-tree v2 by traversing from the root.
939///
940/// `header` must be a previously parsed `BTreeV2Header`. `data` is the full
941/// file buffer. `ndims` is needed for chunk index record types (10 and 11).
942pub fn collect_btree_v2_records(
943    data: &[u8],
944    header: &BTreeV2Header,
945    offset_size: u8,
946    length_size: u8,
947    ndims: Option<u32>,
948    chunk_dims: &[u32],
949    chunk_bounds: Option<(&[u64], &[u64])>,
950) -> Result<Vec<BTreeV2Record>> {
951    collect_btree_v2_records_with_link_hash(
952        data,
953        header,
954        offset_size,
955        length_size,
956        ndims,
957        chunk_dims,
958        chunk_bounds,
959        None,
960    )
961}
962
963#[allow(clippy::too_many_arguments)]
964fn collect_btree_v2_records_with_link_hash(
965    data: &[u8],
966    header: &BTreeV2Header,
967    offset_size: u8,
968    length_size: u8,
969    ndims: Option<u32>,
970    chunk_dims: &[u32],
971    chunk_bounds: Option<(&[u64], &[u64])>,
972    link_name_hash: Option<u32>,
973) -> Result<Vec<BTreeV2Record>> {
974    if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
975        return Ok(Vec::new());
976    }
977
978    if header.total_records == 0 || header.num_records_in_root == 0 {
979        return Ok(Vec::new());
980    }
981    validate_btree_v2_depth(header.depth)?;
982
983    // Determine heap_id_len from the record_size and btree_type.
984    let heap_id_len = compute_heap_id_len(header);
985
986    let mut records = Vec::new();
987    let mut visited = HashSet::new();
988
989    if header.depth == 0 {
990        // Root is a leaf node.
991        enter_btree_v2_node(&mut visited, header.root_node_address)?;
992        let mut cursor = Cursor::new(data);
993        cursor.set_position(header.root_node_address);
994        parse_leaf_node(
995            &mut cursor,
996            header,
997            offset_size,
998            length_size,
999            ndims,
1000            chunk_dims,
1001            chunk_bounds,
1002            link_name_hash,
1003            header.num_records_in_root,
1004            heap_id_len,
1005            &mut records,
1006        )?;
1007    } else {
1008        // Root is an internal node.
1009        parse_internal_node(
1010            data,
1011            header.root_node_address,
1012            header,
1013            offset_size,
1014            length_size,
1015            ndims,
1016            chunk_dims,
1017            chunk_bounds,
1018            link_name_hash,
1019            header.num_records_in_root,
1020            header.depth,
1021            heap_id_len,
1022            &mut visited,
1023            &mut records,
1024        )?;
1025    }
1026
1027    Ok(records)
1028}
1029
1030/// Collect link-name records whose stored hash matches `target_hash`.
1031#[cfg(test)]
1032pub(crate) fn collect_btree_v2_link_name_hash_records(
1033    data: &[u8],
1034    header: &BTreeV2Header,
1035    offset_size: u8,
1036    length_size: u8,
1037    target_hash: u32,
1038) -> Result<Vec<BTreeV2Record>> {
1039    collect_btree_v2_records_with_link_hash(
1040        data,
1041        header,
1042        offset_size,
1043        length_size,
1044        None,
1045        &[],
1046        None,
1047        Some(target_hash),
1048    )
1049}
1050
1051/// Collect all records from a B-tree v2 using random-access storage.
1052pub fn collect_btree_v2_records_storage(
1053    storage: &dyn Storage,
1054    header: &BTreeV2Header,
1055    offset_size: u8,
1056    length_size: u8,
1057    ndims: Option<u32>,
1058    chunk_dims: &[u32],
1059    chunk_bounds: Option<(&[u64], &[u64])>,
1060) -> Result<Vec<BTreeV2Record>> {
1061    collect_btree_v2_records_storage_with_link_hash(
1062        storage,
1063        header,
1064        offset_size,
1065        length_size,
1066        ndims,
1067        chunk_dims,
1068        chunk_bounds,
1069        None,
1070    )
1071}
1072
1073#[allow(clippy::too_many_arguments)]
1074fn collect_btree_v2_records_storage_with_link_hash(
1075    storage: &dyn Storage,
1076    header: &BTreeV2Header,
1077    offset_size: u8,
1078    length_size: u8,
1079    ndims: Option<u32>,
1080    chunk_dims: &[u32],
1081    chunk_bounds: Option<(&[u64], &[u64])>,
1082    link_name_hash: Option<u32>,
1083) -> Result<Vec<BTreeV2Record>> {
1084    if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
1085        return Ok(Vec::new());
1086    }
1087
1088    if header.total_records == 0 || header.num_records_in_root == 0 {
1089        return Ok(Vec::new());
1090    }
1091    validate_btree_v2_depth(header.depth)?;
1092
1093    let heap_id_len = compute_heap_id_len(header);
1094    let mut records = Vec::new();
1095    let mut visited = HashSet::new();
1096
1097    if header.depth == 0 {
1098        enter_btree_v2_node(&mut visited, header.root_node_address)?;
1099        parse_leaf_node_storage(
1100            storage,
1101            header.root_node_address,
1102            header,
1103            offset_size,
1104            length_size,
1105            ndims,
1106            chunk_dims,
1107            chunk_bounds,
1108            link_name_hash,
1109            header.num_records_in_root,
1110            heap_id_len,
1111            &mut records,
1112        )?;
1113    } else {
1114        parse_internal_node_storage(
1115            storage,
1116            header.root_node_address,
1117            header,
1118            offset_size,
1119            length_size,
1120            ndims,
1121            chunk_dims,
1122            chunk_bounds,
1123            link_name_hash,
1124            header.num_records_in_root,
1125            header.depth,
1126            heap_id_len,
1127            &mut visited,
1128            &mut records,
1129        )?;
1130    }
1131
1132    Ok(records)
1133}
1134
1135/// Collect link-name records whose stored hash matches `target_hash`.
1136pub(crate) fn collect_btree_v2_link_name_hash_records_storage(
1137    storage: &dyn Storage,
1138    header: &BTreeV2Header,
1139    offset_size: u8,
1140    length_size: u8,
1141    target_hash: u32,
1142) -> Result<Vec<BTreeV2Record>> {
1143    collect_btree_v2_records_storage_with_link_hash(
1144        storage,
1145        header,
1146        offset_size,
1147        length_size,
1148        None,
1149        &[],
1150        None,
1151        Some(target_hash),
1152    )
1153}
1154
1155#[allow(clippy::too_many_arguments)]
1156fn parse_leaf_node_storage(
1157    storage: &dyn Storage,
1158    address: u64,
1159    header: &BTreeV2Header,
1160    offset_size: u8,
1161    length_size: u8,
1162    ndims: Option<u32>,
1163    chunk_dims: &[u32],
1164    chunk_bounds: Option<(&[u64], &[u64])>,
1165    link_name_hash: Option<u32>,
1166    num_records: u16,
1167    heap_id_len: usize,
1168    records: &mut Vec<BTreeV2Record>,
1169) -> Result<()> {
1170    let node_len = usize::try_from(header.node_size).map_err(|_| {
1171        Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
1172    })?;
1173    let node_bytes = storage.read_range(address, node_len)?;
1174    let mut cursor = Cursor::new(node_bytes.as_ref());
1175    parse_leaf_node(
1176        &mut cursor,
1177        header,
1178        offset_size,
1179        length_size,
1180        ndims,
1181        chunk_dims,
1182        chunk_bounds,
1183        link_name_hash,
1184        num_records,
1185        heap_id_len,
1186        records,
1187    )
1188}
1189
1190#[allow(clippy::too_many_arguments)]
1191fn parse_internal_node_storage(
1192    storage: &dyn Storage,
1193    address: u64,
1194    header: &BTreeV2Header,
1195    offset_size: u8,
1196    length_size: u8,
1197    ndims: Option<u32>,
1198    chunk_dims: &[u32],
1199    chunk_bounds: Option<(&[u64], &[u64])>,
1200    link_name_hash: Option<u32>,
1201    num_records: u16,
1202    depth: u16,
1203    heap_id_len: usize,
1204    visited: &mut HashSet<u64>,
1205    records: &mut Vec<BTreeV2Record>,
1206) -> Result<()> {
1207    if depth == 0 {
1208        return Err(Error::InvalidData(
1209            "B-tree v2 internal node traversal reached depth zero".into(),
1210        ));
1211    }
1212    enter_btree_v2_node(visited, address)?;
1213
1214    let node_len = usize::try_from(header.node_size).map_err(|_| {
1215        Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
1216    })?;
1217    let node_bytes = storage.read_range(address, node_len)?;
1218    let mut cursor = Cursor::new(node_bytes.as_ref());
1219    let start = cursor.position();
1220
1221    let sig = cursor.read_bytes(4)?;
1222    if sig != BTIN_SIGNATURE {
1223        return Err(Error::InvalidBTreeV2Signature {
1224            context: "internal node",
1225        });
1226    }
1227
1228    let version = cursor.read_u8()?;
1229    if version != 0 {
1230        return Err(Error::UnsupportedBTreeVersion(version));
1231    }
1232
1233    let node_type = cursor.read_u8()?;
1234    if node_type != header.btree_type {
1235        return Err(Error::InvalidData(format!(
1236            "B-tree v2 internal node type mismatch: header says {}, node says {}",
1237            header.btree_type, node_type
1238        )));
1239    }
1240
1241    let max_child_records = if depth == 1 {
1242        max_leaf_records(header.node_size, header.record_size)
1243    } else {
1244        let leaf_max = max_leaf_records(header.node_size, header.record_size);
1245        let mut prev_max = leaf_max;
1246        for _ in 1..depth {
1247            prev_max =
1248                max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
1249        }
1250        prev_max
1251    };
1252    let nrec_bytes = num_records_size(max_child_records);
1253
1254    let mut node_records = Vec::with_capacity(num_records as usize);
1255    for _ in 0..num_records {
1256        let record = parse_record(
1257            &mut cursor,
1258            header.btree_type,
1259            header.record_size,
1260            offset_size,
1261            length_size,
1262            ndims,
1263            chunk_dims,
1264            heap_id_len,
1265        )?;
1266        node_records.push(record);
1267    }
1268
1269    let num_children = usize::from(num_records) + 1;
1270    let has_total_records = depth > 1;
1271    let total_nrec_bytes = if has_total_records {
1272        usize::from(length_size)
1273    } else {
1274        0
1275    };
1276
1277    let mut child_addresses = Vec::with_capacity(num_children);
1278    let mut child_nrecords = Vec::with_capacity(num_children);
1279    for _ in 0..num_children {
1280        child_addresses.push(cursor.read_offset(offset_size)?);
1281        child_nrecords.push(cursor.read_uvar(nrec_bytes)? as u16);
1282        if has_total_records {
1283            cursor.read_uvar(total_nrec_bytes)?;
1284        }
1285    }
1286
1287    let compact_end = cursor.position();
1288    verify_node_checksum(
1289        &mut cursor,
1290        start,
1291        header.node_size,
1292        compact_end,
1293        "internal node",
1294    )?;
1295
1296    let child_depth = depth - 1;
1297    for (i, record) in node_records.iter().enumerate() {
1298        let child_addr = child_addresses[i];
1299        if !Cursor::is_undefined_offset(child_addr, offset_size)
1300            && child_may_match_link_name_hash(&node_records, i, link_name_hash)
1301        {
1302            let child_nrec = child_nrecords[i];
1303            if child_depth == 0 {
1304                enter_btree_v2_node(visited, child_addr)?;
1305                parse_leaf_node_storage(
1306                    storage,
1307                    child_addr,
1308                    header,
1309                    offset_size,
1310                    length_size,
1311                    ndims,
1312                    chunk_dims,
1313                    chunk_bounds,
1314                    link_name_hash,
1315                    child_nrec,
1316                    heap_id_len,
1317                    records,
1318                )?;
1319            } else {
1320                parse_internal_node_storage(
1321                    storage,
1322                    child_addr,
1323                    header,
1324                    offset_size,
1325                    length_size,
1326                    ndims,
1327                    chunk_dims,
1328                    chunk_bounds,
1329                    link_name_hash,
1330                    child_nrec,
1331                    child_depth,
1332                    heap_id_len,
1333                    visited,
1334                    records,
1335                )?;
1336            }
1337        }
1338
1339        if record_matches_query(record, chunk_dims, chunk_bounds, link_name_hash) {
1340            records.push(record.clone());
1341        }
1342    }
1343
1344    let final_child_index = node_records.len();
1345    let final_child_addr = child_addresses[final_child_index];
1346    if !Cursor::is_undefined_offset(final_child_addr, offset_size)
1347        && child_may_match_link_name_hash(&node_records, final_child_index, link_name_hash)
1348    {
1349        let child_nrec = child_nrecords[final_child_index];
1350        if child_depth == 0 {
1351            enter_btree_v2_node(visited, final_child_addr)?;
1352            parse_leaf_node_storage(
1353                storage,
1354                final_child_addr,
1355                header,
1356                offset_size,
1357                length_size,
1358                ndims,
1359                chunk_dims,
1360                chunk_bounds,
1361                link_name_hash,
1362                child_nrec,
1363                heap_id_len,
1364                records,
1365            )?;
1366        } else {
1367            parse_internal_node_storage(
1368                storage,
1369                final_child_addr,
1370                header,
1371                offset_size,
1372                length_size,
1373                ndims,
1374                chunk_dims,
1375                chunk_bounds,
1376                link_name_hash,
1377                child_nrec,
1378                child_depth,
1379                heap_id_len,
1380                visited,
1381                records,
1382            )?;
1383        }
1384    }
1385
1386    Ok(())
1387}
1388
1389/// Compute the heap ID length from the record size and tree type.
1390///
1391/// For link/attribute B-trees (types 5, 6, 8, 9), the heap ID occupies
1392/// the remaining bytes after the fixed fields. For chunk types (10, 11)
1393/// or unknown types, return 0 (heap_id is not used).
1394fn compute_heap_id_len(header: &BTreeV2Header) -> usize {
1395    let rs = header.record_size as usize;
1396    match header.btree_type {
1397        5 => rs.saturating_sub(4),         // hash(4)
1398        6 => rs.saturating_sub(8),         // order(8)
1399        8 => rs.saturating_sub(4 + 1 + 4), // hash(4) + flags(1) + creation_order(4)
1400        9 => rs.saturating_sub(4),         // order(4)
1401        _ => 0,
1402    }
1403}
1404
1405#[cfg(test)]
1406mod tests {
1407    use super::*;
1408
1409    /// Build a minimal BTHD with the given parameters.
1410    #[allow(clippy::too_many_arguments)]
1411    fn build_header(
1412        btree_type: u8,
1413        node_size: u32,
1414        record_size: u16,
1415        depth: u16,
1416        root_node_address: u64,
1417        num_records_in_root: u16,
1418        total_records: u64,
1419        offset_size: u8,
1420        length_size: u8,
1421    ) -> Vec<u8> {
1422        let mut buf = Vec::new();
1423        buf.extend_from_slice(b"BTHD");
1424        buf.push(0); // version
1425        buf.push(btree_type);
1426        buf.extend_from_slice(&node_size.to_le_bytes());
1427        buf.extend_from_slice(&record_size.to_le_bytes());
1428        buf.extend_from_slice(&depth.to_le_bytes());
1429        buf.push(75); // split percent
1430        buf.push(40); // merge percent
1431        match offset_size {
1432            4 => buf.extend_from_slice(&(root_node_address as u32).to_le_bytes()),
1433            8 => buf.extend_from_slice(&root_node_address.to_le_bytes()),
1434            _ => panic!("unsupported"),
1435        }
1436        buf.extend_from_slice(&num_records_in_root.to_le_bytes());
1437        match length_size {
1438            4 => buf.extend_from_slice(&(total_records as u32).to_le_bytes()),
1439            8 => buf.extend_from_slice(&total_records.to_le_bytes()),
1440            _ => panic!("unsupported"),
1441        }
1442        // Compute and append checksum.
1443        let checksum = jenkins_lookup3(&buf);
1444        buf.extend_from_slice(&checksum.to_le_bytes());
1445        buf
1446    }
1447
1448    #[test]
1449    fn parse_header() {
1450        let data = build_header(5, 4096, 12, 0, 0x1000, 3, 3, 8, 8);
1451        let mut cursor = Cursor::new(&data);
1452        let hdr = BTreeV2Header::parse(&mut cursor, 8, 8).unwrap();
1453
1454        assert_eq!(hdr.btree_type, 5);
1455        assert_eq!(hdr.node_size, 4096);
1456        assert_eq!(hdr.record_size, 12);
1457        assert_eq!(hdr.depth, 0);
1458        assert_eq!(hdr.split_percent, 75);
1459        assert_eq!(hdr.merge_percent, 40);
1460        assert_eq!(hdr.root_node_address, 0x1000);
1461        assert_eq!(hdr.num_records_in_root, 3);
1462        assert_eq!(hdr.total_records, 3);
1463    }
1464
1465    #[test]
1466    fn bad_signature() {
1467        let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1468        data[0] = b'X';
1469        let mut cursor = Cursor::new(&data);
1470        assert!(matches!(
1471            BTreeV2Header::parse(&mut cursor, 8, 8),
1472            Err(Error::InvalidBTreeV2Signature { .. })
1473        ));
1474    }
1475
1476    #[test]
1477    fn bad_checksum() {
1478        let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1479        // Corrupt a byte in the middle.
1480        data[6] = 0xFF;
1481        let mut cursor = Cursor::new(&data);
1482        assert!(matches!(
1483            BTreeV2Header::parse(&mut cursor, 8, 8),
1484            Err(Error::ChecksumMismatch { .. })
1485        ));
1486    }
1487
1488    #[test]
1489    fn collect_empty_tree() {
1490        let header = BTreeV2Header {
1491            btree_type: 5,
1492            node_size: 4096,
1493            record_size: 12,
1494            depth: 0,
1495            split_percent: 75,
1496            merge_percent: 40,
1497            root_node_address: u64::MAX,
1498            num_records_in_root: 0,
1499            total_records: 0,
1500        };
1501        let data = vec![0u8; 100];
1502        let records = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap();
1503        assert!(records.is_empty());
1504    }
1505
1506    #[test]
1507    fn heap_id_len_uses_record_type_sizes() {
1508        // Type 5: record_size - 4
1509        let h5 = BTreeV2Header {
1510            btree_type: 5,
1511            record_size: 12,
1512            node_size: 0,
1513            depth: 0,
1514            split_percent: 0,
1515            merge_percent: 0,
1516            root_node_address: 0,
1517            num_records_in_root: 0,
1518            total_records: 0,
1519        };
1520        assert_eq!(compute_heap_id_len(&h5), 8);
1521
1522        // Type 8: record_size - 9
1523        let h8 = BTreeV2Header {
1524            btree_type: 8,
1525            record_size: 17,
1526            ..h5
1527        };
1528        assert_eq!(compute_heap_id_len(&h8), 8);
1529    }
1530
1531    #[test]
1532    fn max_leaf_records_accounts_for_node_overhead() {
1533        // node_size=4096, record_size=12, overhead=10
1534        // => (4096 - 10) / 12 = 340
1535        assert_eq!(max_leaf_records(4096, 12), 340);
1536    }
1537
1538    #[test]
1539    fn record_count_size_scales_at_integer_boundaries() {
1540        assert_eq!(num_records_size(0), 1);
1541        assert_eq!(num_records_size(255), 1);
1542        assert_eq!(num_records_size(256), 2);
1543        assert_eq!(num_records_size(65535), 2);
1544        assert_eq!(num_records_size(65536), 4);
1545    }
1546
1547    #[test]
1548    fn parse_huge_indirect_record() {
1549        let mut data = Vec::new();
1550        data.extend_from_slice(&0x1234u64.to_le_bytes());
1551        data.extend_from_slice(&99u64.to_le_bytes());
1552        data.extend_from_slice(&7u64.to_le_bytes());
1553        let mut cursor = Cursor::new(&data);
1554
1555        let record = parse_record(&mut cursor, 1, data.len() as u16, 8, 8, None, &[], 0).unwrap();
1556        match record {
1557            BTreeV2Record::HugeIndirectNonFiltered {
1558                address,
1559                length,
1560                object_id,
1561            } => {
1562                assert_eq!(address, 0x1234);
1563                assert_eq!(length, 99);
1564                assert_eq!(object_id, 7);
1565            }
1566            other => panic!("expected huge record, got {:?}", other),
1567        }
1568    }
1569
1570    #[test]
1571    fn parse_shared_heap_record() {
1572        let mut data = Vec::new();
1573        data.push(0);
1574        data.extend_from_slice(&[0, 0, 0]);
1575        data.extend_from_slice(&0xAABB_CCDDu32.to_le_bytes());
1576        data.extend_from_slice(&3u32.to_le_bytes());
1577        data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1578        let mut cursor = Cursor::new(&data);
1579
1580        let record = parse_record(&mut cursor, 7, data.len() as u16, 8, 8, None, &[], 0).unwrap();
1581        match record {
1582            BTreeV2Record::SharedMessageHeap {
1583                hash,
1584                reference_count,
1585                heap_id,
1586            } => {
1587                assert_eq!(hash, 0xAABB_CCDD);
1588                assert_eq!(reference_count, 3);
1589                assert_eq!(heap_id, vec![1, 2, 3, 4, 5, 6, 7, 8]);
1590            }
1591            other => panic!("expected shared heap record, got {:?}", other),
1592        }
1593    }
1594
1595    #[test]
1596    fn parse_record_rejects_known_record_that_exceeds_record_size() {
1597        let mut data = Vec::new();
1598        data.extend_from_slice(&0x1234u64.to_le_bytes());
1599        data.extend_from_slice(&99u64.to_le_bytes());
1600        data.extend_from_slice(&7u64.to_le_bytes());
1601        let mut cursor = Cursor::new(&data);
1602
1603        let err = parse_record(&mut cursor, 1, 16, 8, 8, None, &[], 0).unwrap_err();
1604        assert!(
1605            matches!(err, Error::InvalidData(message) if message.contains("consumed 24 bytes but record size is 16"))
1606        );
1607    }
1608
1609    #[test]
1610    fn parse_chunk_record_rejects_size_shorter_than_address() {
1611        let mut data = Vec::new();
1612        data.extend_from_slice(&0x1234u64.to_le_bytes());
1613        let mut cursor = Cursor::new(&data);
1614
1615        let err = parse_record(&mut cursor, 10, 4, 8, 8, None, &[], 0).unwrap_err();
1616        assert!(
1617            matches!(err, Error::InvalidData(message) if message.contains("shorter than its address"))
1618        );
1619        assert_eq!(cursor.position(), 0);
1620    }
1621
1622    #[test]
1623    fn parse_filtered_chunk_record_rejects_size_shorter_than_fixed_fields() {
1624        let mut data = Vec::new();
1625        data.extend_from_slice(&0x1234u64.to_le_bytes());
1626        data.extend_from_slice(&99u64.to_le_bytes());
1627        data.extend_from_slice(&0xAu32.to_le_bytes());
1628        let mut cursor = Cursor::new(&data);
1629
1630        let err = parse_record(&mut cursor, 11, 16, 8, 8, None, &[], 0).unwrap_err();
1631        assert!(
1632            matches!(err, Error::InvalidData(message) if message.contains("shorter than its fixed fields"))
1633        );
1634        assert_eq!(cursor.position(), 0);
1635    }
1636
1637    #[test]
1638    fn parse_chunk_record_scales_offsets_by_chunk_dimensions() {
1639        let mut data = Vec::new();
1640        data.extend_from_slice(&0x1234u64.to_le_bytes());
1641        data.extend_from_slice(&2u64.to_le_bytes());
1642        data.extend_from_slice(&1u64.to_le_bytes());
1643        let mut cursor = Cursor::new(&data);
1644
1645        let record = parse_record(
1646            &mut cursor,
1647            10,
1648            data.len() as u16,
1649            8,
1650            8,
1651            Some(2),
1652            &[5, 7],
1653            0,
1654        )
1655        .unwrap();
1656        match record {
1657            BTreeV2Record::ChunkedNonFiltered { address, offsets } => {
1658                assert_eq!(address, 0x1234);
1659                assert_eq!(offsets, vec![10, 7]);
1660            }
1661            other => panic!("expected non-filtered chunk record, got {:?}", other),
1662        }
1663    }
1664
1665    #[test]
1666    fn parse_filtered_chunk_record_scales_offsets_by_chunk_dimensions() {
1667        let mut data = Vec::new();
1668        data.extend_from_slice(&0x1234u64.to_le_bytes());
1669        data.extend_from_slice(&99u64.to_le_bytes());
1670        data.extend_from_slice(&0xAu32.to_le_bytes());
1671        data.extend_from_slice(&3u64.to_le_bytes());
1672        data.extend_from_slice(&4u64.to_le_bytes());
1673        let mut cursor = Cursor::new(&data);
1674
1675        let record = parse_record(
1676            &mut cursor,
1677            11,
1678            data.len() as u16,
1679            8,
1680            8,
1681            Some(2),
1682            &[5, 7],
1683            0,
1684        )
1685        .unwrap();
1686        match record {
1687            BTreeV2Record::ChunkedFiltered {
1688                address,
1689                chunk_size,
1690                filter_mask,
1691                offsets,
1692            } => {
1693                assert_eq!(address, 0x1234);
1694                assert_eq!(chunk_size, 99);
1695                assert_eq!(filter_mask, 0xA);
1696                assert_eq!(offsets, vec![15, 28]);
1697            }
1698            other => panic!("expected filtered chunk record, got {:?}", other),
1699        }
1700    }
1701
1702    #[test]
1703    fn parse_leaf_with_type5_records() {
1704        // Build a leaf node with 2 type-5 records (link name hash).
1705        // record_size = 12 (hash=4 + heap_id=8)
1706        let record_size: u16 = 12;
1707        let node_size: u32 = 4096;
1708
1709        let header = BTreeV2Header {
1710            btree_type: 5,
1711            node_size,
1712            record_size,
1713            depth: 0,
1714            split_percent: 75,
1715            merge_percent: 40,
1716            root_node_address: 0, // will point to our leaf
1717            num_records_in_root: 2,
1718            total_records: 2,
1719        };
1720
1721        // Build the leaf node.
1722        let mut leaf = Vec::new();
1723        leaf.extend_from_slice(b"BTLF"); // signature
1724        leaf.push(0); // version
1725        leaf.push(5); // type
1726
1727        // Record 1: hash=0xAABBCCDD, heap_id=[1,2,3,4,5,6,7,8]
1728        leaf.extend_from_slice(&0xAABBCCDDu32.to_le_bytes());
1729        leaf.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1730
1731        // Record 2: hash=0x11223344, heap_id=[9,10,11,12,13,14,15,16]
1732        leaf.extend_from_slice(&0x11223344u32.to_le_bytes());
1733        leaf.extend_from_slice(&[9, 10, 11, 12, 13, 14, 15, 16]);
1734
1735        // The checksum covers the populated node payload. The node allocation
1736        // may be larger and is padded after the checksum.
1737        let checksum = jenkins_lookup3(&leaf);
1738        leaf.extend_from_slice(&checksum.to_le_bytes());
1739        leaf.resize(node_size as usize, 0);
1740
1741        let mut records = Vec::new();
1742        let mut cursor = Cursor::new(&leaf);
1743        parse_leaf_node(
1744            &mut cursor,
1745            &header,
1746            8,
1747            8,
1748            None,
1749            &[],
1750            None,
1751            None,
1752            2,
1753            8, // heap_id_len
1754            &mut records,
1755        )
1756        .unwrap();
1757
1758        assert_eq!(records.len(), 2);
1759        match &records[0] {
1760            BTreeV2Record::LinkNameHash { hash, heap_id } => {
1761                assert_eq!(*hash, 0xAABBCCDD);
1762                assert_eq!(heap_id, &[1, 2, 3, 4, 5, 6, 7, 8]);
1763            }
1764            _ => panic!("expected LinkNameHash"),
1765        }
1766        match &records[1] {
1767            BTreeV2Record::LinkNameHash { hash, heap_id } => {
1768                assert_eq!(*hash, 0x11223344);
1769                assert_eq!(heap_id, &[9, 10, 11, 12, 13, 14, 15, 16]);
1770            }
1771            _ => panic!("expected LinkNameHash"),
1772        }
1773    }
1774
1775    fn build_type5_record(hash: u32, heap_id: [u8; 8]) -> Vec<u8> {
1776        let mut record = Vec::new();
1777        record.extend_from_slice(&hash.to_le_bytes());
1778        record.extend_from_slice(&heap_id);
1779        record
1780    }
1781
1782    fn build_type5_leaf(records: &[(u32, [u8; 8])], node_size: usize) -> Vec<u8> {
1783        let mut leaf = Vec::new();
1784        leaf.extend_from_slice(b"BTLF");
1785        leaf.push(0);
1786        leaf.push(5);
1787        for &(hash, heap_id) in records {
1788            leaf.extend_from_slice(&build_type5_record(hash, heap_id));
1789        }
1790        let checksum = jenkins_lookup3(&leaf);
1791        leaf.extend_from_slice(&checksum.to_le_bytes());
1792        leaf.resize(node_size, 0);
1793        leaf
1794    }
1795
1796    fn build_type5_internal(
1797        records: &[(u32, [u8; 8])],
1798        children: &[(u64, u8)],
1799        node_size: usize,
1800    ) -> Vec<u8> {
1801        assert_eq!(children.len(), records.len() + 1);
1802
1803        let mut node = Vec::new();
1804        node.extend_from_slice(b"BTIN");
1805        node.push(0);
1806        node.push(5);
1807        for &(hash, heap_id) in records {
1808            node.extend_from_slice(&build_type5_record(hash, heap_id));
1809        }
1810        for &(address, child_records) in children {
1811            node.extend_from_slice(&address.to_le_bytes());
1812            node.push(child_records);
1813        }
1814        let checksum = jenkins_lookup3(&node);
1815        node.extend_from_slice(&checksum.to_le_bytes());
1816        node.resize(node_size, 0);
1817        node
1818    }
1819
1820    fn link_hashes(records: &[BTreeV2Record]) -> Vec<u32> {
1821        records
1822            .iter()
1823            .map(|record| match record {
1824                BTreeV2Record::LinkNameHash { hash, .. } => *hash,
1825                other => panic!("expected LinkNameHash, got {other:?}"),
1826            })
1827            .collect()
1828    }
1829
1830    #[test]
1831    fn collect_depth_one_tree_includes_internal_records_in_order() {
1832        // Mirrors HDF5 H5B2__iterate_node: recurse into child[i], visit
1833        // record[i], then recurse into the final child. Internal-node records
1834        // are callback records, not just separators.
1835        let node_size = 128usize;
1836        let left_addr = node_size as u64;
1837        let right_addr = (node_size * 2) as u64;
1838        let header = BTreeV2Header {
1839            btree_type: 5,
1840            node_size: node_size as u32,
1841            record_size: 12,
1842            depth: 1,
1843            split_percent: 75,
1844            merge_percent: 40,
1845            root_node_address: 0,
1846            num_records_in_root: 1,
1847            total_records: 3,
1848        };
1849
1850        let root = build_type5_internal(
1851            &[(20, [20; 8])],
1852            &[(left_addr, 1), (right_addr, 1)],
1853            node_size,
1854        );
1855        let left_leaf = build_type5_leaf(&[(10, [10; 8])], node_size);
1856        let right_leaf = build_type5_leaf(&[(30, [30; 8])], node_size);
1857
1858        let mut data = vec![0; node_size * 3];
1859        data[0..node_size].copy_from_slice(&root);
1860        data[node_size..node_size * 2].copy_from_slice(&left_leaf);
1861        data[node_size * 2..node_size * 3].copy_from_slice(&right_leaf);
1862
1863        let records = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap();
1864        assert_eq!(link_hashes(&records), vec![10, 20, 30]);
1865
1866        let storage = crate::storage::BytesStorage::new(data);
1867        let records =
1868            collect_btree_v2_records_storage(&storage, &header, 8, 8, None, &[], None).unwrap();
1869        assert_eq!(link_hashes(&records), vec![10, 20, 30]);
1870    }
1871
1872    #[test]
1873    fn collect_rejects_depth_above_traversal_limit() {
1874        let header = BTreeV2Header {
1875            btree_type: 5,
1876            node_size: 128,
1877            record_size: 12,
1878            depth: MAX_BTREE_V2_DEPTH + 1,
1879            split_percent: 75,
1880            merge_percent: 40,
1881            root_node_address: 0,
1882            num_records_in_root: 1,
1883            total_records: 1,
1884        };
1885        let data = vec![0; 128];
1886
1887        let err = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap_err();
1888        assert!(
1889            matches!(err, Error::InvalidData(message) if message.contains("exceeds traversal limit"))
1890        );
1891    }
1892
1893    #[test]
1894    fn collect_rejects_revisited_leaf_node() {
1895        let node_size = 128usize;
1896        let leaf_addr = node_size as u64;
1897        let header = BTreeV2Header {
1898            btree_type: 5,
1899            node_size: node_size as u32,
1900            record_size: 12,
1901            depth: 1,
1902            split_percent: 75,
1903            merge_percent: 40,
1904            root_node_address: 0,
1905            num_records_in_root: 1,
1906            total_records: 3,
1907        };
1908
1909        let root = build_type5_internal(
1910            &[(20, [20; 8])],
1911            &[(leaf_addr, 1), (leaf_addr, 1)],
1912            node_size,
1913        );
1914        let leaf = build_type5_leaf(&[(10, [10; 8])], node_size);
1915
1916        let mut data = vec![0; node_size * 2];
1917        data[0..node_size].copy_from_slice(&root);
1918        data[node_size..node_size * 2].copy_from_slice(&leaf);
1919
1920        let err = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap_err();
1921        assert!(matches!(err, Error::InvalidData(message) if message.contains("revisits node")));
1922
1923        let storage = crate::storage::BytesStorage::new(data);
1924        let err =
1925            collect_btree_v2_records_storage(&storage, &header, 8, 8, None, &[], None).unwrap_err();
1926        assert!(matches!(err, Error::InvalidData(message) if message.contains("revisits node")));
1927    }
1928
1929    #[test]
1930    fn collect_link_name_hash_records_filters_leaf_records() {
1931        let record_size: u16 = 12;
1932        let node_size: u32 = 4096;
1933        let header = BTreeV2Header {
1934            btree_type: 5,
1935            node_size,
1936            record_size,
1937            depth: 0,
1938            split_percent: 75,
1939            merge_percent: 40,
1940            root_node_address: 0,
1941            num_records_in_root: 2,
1942            total_records: 2,
1943        };
1944
1945        let mut leaf = Vec::new();
1946        leaf.extend_from_slice(b"BTLF");
1947        leaf.push(0);
1948        leaf.push(5);
1949        leaf.extend_from_slice(&0x1111_1111u32.to_le_bytes());
1950        leaf.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1951        leaf.extend_from_slice(&0x2222_2222u32.to_le_bytes());
1952        leaf.extend_from_slice(&[9, 10, 11, 12, 13, 14, 15, 16]);
1953        let checksum = jenkins_lookup3(&leaf);
1954        leaf.extend_from_slice(&checksum.to_le_bytes());
1955        leaf.resize(node_size as usize, 0);
1956
1957        let records =
1958            collect_btree_v2_link_name_hash_records(&leaf, &header, 8, 8, 0x2222_2222).unwrap();
1959        assert_eq!(records.len(), 1);
1960        match &records[0] {
1961            BTreeV2Record::LinkNameHash { hash, heap_id } => {
1962                assert_eq!(*hash, 0x2222_2222);
1963                assert_eq!(heap_id, &[9, 10, 11, 12, 13, 14, 15, 16]);
1964            }
1965            other => panic!("expected LinkNameHash, got {other:?}"),
1966        }
1967    }
1968
1969    #[test]
1970    fn child_link_hash_filter_prunes_disjoint_hash_ranges() {
1971        let records = vec![
1972            BTreeV2Record::LinkNameHash {
1973                hash: 10,
1974                heap_id: vec![],
1975            },
1976            BTreeV2Record::LinkNameHash {
1977                hash: 20,
1978                heap_id: vec![],
1979            },
1980        ];
1981
1982        assert!(child_may_match_link_name_hash(&records, 0, Some(10)));
1983        assert!(!child_may_match_link_name_hash(&records, 0, Some(15)));
1984        assert!(child_may_match_link_name_hash(&records, 1, Some(15)));
1985        assert!(!child_may_match_link_name_hash(&records, 2, Some(15)));
1986        assert!(child_may_match_link_name_hash(&records, 2, Some(20)));
1987    }
1988}