Skip to main content

hdf5_reader/
btree_v2.rs

1//! HDF5 B-tree Version 2.
2//!
3//! V2 B-trees are used by newer-style groups and datasets for indexed link
4//! storage, attribute storage, and chunked dataset indexing. The header
5//! (`BTHD`) describes the tree parameters. Internal nodes (`BTIN`) and leaf
6//! nodes (`BTLF`) contain the actual records.
7//!
8//! This module provides the header parse, record types, and a traversal
9//! function that collects all records from a tree.
10
11use crate::checksum::jenkins_lookup3;
12use crate::error::{Error, Result};
13use crate::io::Cursor;
14use crate::storage::Storage;
15
16// ---------------------------------------------------------------------------
17// Signatures
18// ---------------------------------------------------------------------------
19
20const BTHD_SIGNATURE: [u8; 4] = *b"BTHD";
21const BTIN_SIGNATURE: [u8; 4] = *b"BTIN";
22const BTLF_SIGNATURE: [u8; 4] = *b"BTLF";
23
24// ---------------------------------------------------------------------------
25// Header
26// ---------------------------------------------------------------------------
27
28/// Parsed B-tree v2 header.
29#[derive(Debug, Clone)]
30pub struct BTreeV2Header {
31    /// B-tree type (determines the record format).
32    pub btree_type: u8,
33    /// Size in bytes of each B-tree node (both internal and leaf).
34    pub node_size: u32,
35    /// Size in bytes of each record.
36    pub record_size: u16,
37    /// Depth of the tree (0 = root is a leaf).
38    pub depth: u16,
39    /// Percent full at which to split a node.
40    pub split_percent: u8,
41    /// Percent full at which to merge a node.
42    pub merge_percent: u8,
43    /// Address of the root node.
44    pub root_node_address: u64,
45    /// Number of records in the root node.
46    pub num_records_in_root: u16,
47    /// Total number of records in the entire tree.
48    pub total_records: u64,
49}
50
51impl BTreeV2Header {
52    /// Parse a B-tree v2 header at the current cursor position.
53    ///
54    /// Format:
55    /// - Signature: `BTHD` (4 bytes)
56    /// - Version: 0 (1 byte)
57    /// - B-tree type (u8)
58    /// - Node size (u32 LE)
59    /// - Record size (u16 LE)
60    /// - Depth (u16 LE)
61    /// - Split percent (u8)
62    /// - Merge percent (u8)
63    /// - Root node address (`offset_size` bytes)
64    /// - Number of records in root node (u16 LE)
65    /// - Total number of records in tree (`length_size` bytes)
66    /// - Checksum (u32 LE)
67    pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
68        let start = cursor.position();
69
70        let sig = cursor.read_bytes(4)?;
71        if sig != BTHD_SIGNATURE {
72            return Err(Error::InvalidBTreeV2Signature { context: "header" });
73        }
74
75        let version = cursor.read_u8()?;
76        if version != 0 {
77            return Err(Error::UnsupportedBTreeVersion(version));
78        }
79
80        let btree_type = cursor.read_u8()?;
81        let node_size = cursor.read_u32_le()?;
82        let record_size = cursor.read_u16_le()?;
83        let depth = cursor.read_u16_le()?;
84        let split_percent = cursor.read_u8()?;
85        let merge_percent = cursor.read_u8()?;
86        let root_node_address = cursor.read_offset(offset_size)?;
87        let num_records_in_root = cursor.read_u16_le()?;
88        let total_records = cursor.read_length(length_size)?;
89
90        // Checksum covers everything from signature through total_records.
91        let checksum_end = cursor.position();
92        let stored_checksum = cursor.read_u32_le()?;
93
94        let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_end as usize]);
95        if computed != stored_checksum {
96            return Err(Error::ChecksumMismatch {
97                expected: stored_checksum,
98                actual: computed,
99            });
100        }
101
102        Ok(BTreeV2Header {
103            btree_type,
104            node_size,
105            record_size,
106            depth,
107            split_percent,
108            merge_percent,
109            root_node_address,
110            num_records_in_root,
111            total_records,
112        })
113    }
114
115    /// Parse a B-tree v2 header from random-access storage.
116    pub fn parse_at_storage(
117        storage: &dyn Storage,
118        address: u64,
119        offset_size: u8,
120        length_size: u8,
121    ) -> Result<Self> {
122        let header_len = 4
123            + 1
124            + 1
125            + 4
126            + 2
127            + 2
128            + 1
129            + 1
130            + usize::from(offset_size)
131            + 2
132            + usize::from(length_size)
133            + 4;
134        let bytes = storage.read_range(address, header_len)?;
135        let mut cursor = Cursor::new(bytes.as_ref());
136        Self::parse(&mut cursor, offset_size, length_size)
137    }
138}
139
140// ---------------------------------------------------------------------------
141// Records
142// ---------------------------------------------------------------------------
143
144/// A record from a B-tree v2.
145///
146/// The record format depends on the B-tree type field in the header.
147#[derive(Debug, Clone)]
148pub enum BTreeV2Record {
149    /// Type 1: indirectly accessed, non-filtered huge fractal heap object.
150    HugeIndirectNonFiltered {
151        address: u64,
152        length: u64,
153        object_id: u64,
154    },
155    /// Type 2: indirectly accessed, filtered huge fractal heap object.
156    HugeIndirectFiltered {
157        address: u64,
158        filtered_length: u64,
159        filter_mask: u32,
160        memory_length: u64,
161        object_id: u64,
162    },
163    /// Type 3: directly accessed, non-filtered huge fractal heap object.
164    HugeDirectNonFiltered { address: u64, length: u64 },
165    /// Type 4: directly accessed, filtered huge fractal heap object.
166    HugeDirectFiltered {
167        address: u64,
168        filtered_length: u64,
169        filter_mask: u32,
170        memory_length: u64,
171    },
172    /// Type 5: Link name for indexed group (hashed).
173    LinkNameHash { hash: u32, heap_id: Vec<u8> },
174    /// Type 6: Creation order for indexed group.
175    CreationOrder { order: u64, heap_id: Vec<u8> },
176    /// Type 8: Attribute name for indexed group (hashed).
177    AttributeNameHash {
178        hash: u32,
179        flags: u8,
180        creation_order: u32,
181        heap_id: Vec<u8>,
182    },
183    /// Type 9: Attribute creation order.
184    AttributeCreationOrder { order: u32, heap_id: Vec<u8> },
185    /// Type 10: Non-filtered chunked dataset record (v2 chunk index).
186    ChunkedNonFiltered { address: u64, offsets: Vec<u64> },
187    /// Type 11: Filtered chunked dataset record (v2 chunk index).
188    ChunkedFiltered {
189        address: u64,
190        chunk_size: u64,
191        filter_mask: u32,
192        offsets: Vec<u64>,
193    },
194    /// Type 7: shared object-header message stored in the SOHM heap.
195    SharedMessageHeap {
196        hash: u32,
197        reference_count: u32,
198        heap_id: Vec<u8>,
199    },
200    /// Type 7: shared object-header message stored in an object header.
201    SharedMessageObjectHeader {
202        hash: u32,
203        message_type: u16,
204        object_header_index: u16,
205        object_header_address: u64,
206    },
207    /// Unknown/unsupported record type — raw bytes preserved.
208    Unknown { record_type: u8, data: Vec<u8> },
209}
210
211// ---------------------------------------------------------------------------
212// Record parsing
213// ---------------------------------------------------------------------------
214
215/// Parse a single record of the given B-tree type.
216fn parse_record(
217    cursor: &mut Cursor,
218    btree_type: u8,
219    record_size: u16,
220    offset_size: u8,
221    length_size: u8,
222    _ndims: Option<u32>,
223    heap_id_len: usize,
224) -> Result<BTreeV2Record> {
225    let record_start = cursor.position();
226
227    let record = match btree_type {
228        // Type 1: indirectly accessed, non-filtered huge fractal heap object.
229        1 => BTreeV2Record::HugeIndirectNonFiltered {
230            address: cursor.read_offset(offset_size)?,
231            length: cursor.read_length(length_size)?,
232            object_id: cursor.read_length(length_size)?,
233        },
234
235        // Type 2: indirectly accessed, filtered huge fractal heap object.
236        2 => BTreeV2Record::HugeIndirectFiltered {
237            address: cursor.read_offset(offset_size)?,
238            filtered_length: cursor.read_length(length_size)?,
239            filter_mask: cursor.read_u32_le()?,
240            memory_length: cursor.read_length(length_size)?,
241            object_id: cursor.read_length(length_size)?,
242        },
243
244        // Type 3: directly accessed, non-filtered huge fractal heap object.
245        3 => BTreeV2Record::HugeDirectNonFiltered {
246            address: cursor.read_offset(offset_size)?,
247            length: cursor.read_length(length_size)?,
248        },
249
250        // Type 4: directly accessed, filtered huge fractal heap object.
251        4 => BTreeV2Record::HugeDirectFiltered {
252            address: cursor.read_offset(offset_size)?,
253            filtered_length: cursor.read_length(length_size)?,
254            filter_mask: cursor.read_u32_le()?,
255            memory_length: cursor.read_length(length_size)?,
256        },
257
258        // Type 5: link name hash
259        5 => {
260            let hash = cursor.read_u32_le()?;
261            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
262            BTreeV2Record::LinkNameHash { hash, heap_id }
263        }
264
265        // Type 6: creation order
266        6 => {
267            let order = cursor.read_u64_le()?;
268            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
269            BTreeV2Record::CreationOrder { order, heap_id }
270        }
271
272        // Type 7: shared object-header messages.
273        7 => {
274            let location = cursor.read_u8()?;
275            cursor.skip(3)?;
276            let hash = cursor.read_u32_le()?;
277            match location {
278                0 => {
279                    let reference_count = cursor.read_u32_le()?;
280                    let heap_id = cursor.read_bytes(8)?.to_vec();
281                    BTreeV2Record::SharedMessageHeap {
282                        hash,
283                        reference_count,
284                        heap_id,
285                    }
286                }
287                1 => {
288                    let _reserved = cursor.read_u8()?;
289                    let message_type = u16::from(cursor.read_u8()?);
290                    let object_header_index = cursor.read_u16_le()?;
291                    let object_header_address = cursor.read_offset(offset_size)?;
292                    BTreeV2Record::SharedMessageObjectHeader {
293                        hash,
294                        message_type,
295                        object_header_index,
296                        object_header_address,
297                    }
298                }
299                other => {
300                    return Err(Error::InvalidData(format!(
301                        "unknown SOHM B-tree record location: {other}"
302                    )));
303                }
304            }
305        }
306
307        // Type 8: attribute name hash
308        8 => {
309            let hash = cursor.read_u32_le()?;
310            let flags = cursor.read_u8()?;
311            let creation_order = cursor.read_u32_le()?;
312            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
313            BTreeV2Record::AttributeNameHash {
314                hash,
315                flags,
316                creation_order,
317                heap_id,
318            }
319        }
320
321        // Type 9: attribute creation order
322        9 => {
323            let order = cursor.read_u32_le()?;
324            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
325            BTreeV2Record::AttributeCreationOrder { order, heap_id }
326        }
327
328        // Type 10: non-filtered chunk
329        10 => {
330            let address = cursor.read_offset(offset_size)?;
331            // Chunk offsets are encoded as scaled 64-bit values.
332            // The number of offset dimensions is calculated from the record size.
333            // Each offset is 8 bytes in a type-10 record.
334            let offset_bytes_available = record_size as usize - offset_size as usize;
335            let num_offsets = offset_bytes_available / 8;
336            let mut offsets = Vec::with_capacity(num_offsets);
337            for _ in 0..num_offsets {
338                offsets.push(cursor.read_u64_le()?);
339            }
340            BTreeV2Record::ChunkedNonFiltered { address, offsets }
341        }
342
343        // Type 11: filtered chunk
344        11 => {
345            let address = cursor.read_offset(offset_size)?;
346            // nbytes (chunk size on disk) is encoded using length_size bytes.
347            let nbytes_size = length_size as usize;
348            let chunk_size = cursor.read_length(length_size)?;
349            let filter_mask = cursor.read_u32_le()?;
350            let remaining = record_size as usize - offset_size as usize - nbytes_size - 4; // filter_mask
351            let num_offsets = remaining / 8;
352            let mut offsets = Vec::with_capacity(num_offsets);
353            for _ in 0..num_offsets {
354                offsets.push(cursor.read_u64_le()?);
355            }
356            BTreeV2Record::ChunkedFiltered {
357                address,
358                chunk_size,
359                filter_mask,
360                offsets,
361            }
362        }
363
364        // Unknown type — read raw bytes.
365        _ => {
366            let data = cursor.read_bytes(record_size as usize)?.to_vec();
367            return Ok(BTreeV2Record::Unknown {
368                record_type: btree_type,
369                data,
370            });
371        }
372    };
373
374    // Ensure we consumed exactly record_size bytes (skip any remaining).
375    let consumed = (cursor.position() - record_start) as usize;
376    if consumed < record_size as usize {
377        cursor.skip(record_size as usize - consumed)?;
378    }
379
380    Ok(record)
381}
382
383fn record_matches_chunk_bounds(
384    record: &BTreeV2Record,
385    chunk_dims: &[u32],
386    chunk_bounds: Option<(&[u64], &[u64])>,
387) -> bool {
388    let Some((first_chunk, last_chunk)) = chunk_bounds else {
389        return true;
390    };
391
392    let offsets = match record {
393        BTreeV2Record::ChunkedNonFiltered { offsets, .. }
394        | BTreeV2Record::ChunkedFiltered { offsets, .. } => offsets,
395        _ => return true,
396    };
397
398    offsets.iter().enumerate().all(|(dim, offset)| {
399        let chunk_index = *offset / u64::from(chunk_dims[dim]);
400        chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
401    })
402}
403
404// ---------------------------------------------------------------------------
405// Node parsing
406// ---------------------------------------------------------------------------
407
408/// Compute the number of bytes needed to represent `max_records` as an
409/// unsigned integer (used for child-node record counts in internal nodes).
410fn num_records_size(max_records: u64) -> usize {
411    if max_records <= 0xFF {
412        1
413    } else if max_records <= 0xFFFF {
414        2
415    } else if max_records <= 0xFFFF_FFFF {
416        4
417    } else {
418        8
419    }
420}
421
422/// Compute the maximum number of records that fit in a leaf node.
423fn max_leaf_records(node_size: u32, record_size: u16) -> u64 {
424    // Leaf node overhead: signature(4) + version(1) + type(1) + checksum(4) = 10
425    let overhead = 10u32;
426    if node_size <= overhead || record_size == 0 {
427        return 0;
428    }
429    ((node_size - overhead) / record_size as u32) as u64
430}
431
432/// Compute the maximum number of records that fit in an internal node.
433/// This depends on the pointer size (offset_size) and the number-of-records
434/// encoding for child nodes, which makes it recursive in principle. We use
435/// an iterative approach.
436fn max_internal_records(
437    node_size: u32,
438    record_size: u16,
439    offset_size: u8,
440    max_child_records: u64,
441) -> u64 {
442    // Internal node overhead: signature(4) + version(1) + type(1) + checksum(4) = 10
443    let overhead = 10u32;
444    if node_size <= overhead || record_size == 0 {
445        return 0;
446    }
447    let available = (node_size - overhead) as u64;
448    // Each entry in an internal node is: record(record_size) + child_pointer(offset_size) + num_records(var)
449    let nrec_size = num_records_size(max_child_records) as u64;
450    let entry_size = record_size as u64 + offset_size as u64 + nrec_size;
451    // There is one more child pointer + num_records than records.
452    // So: n * record_size + (n+1) * (offset_size + nrec_size) <= available
453    // => n * (record_size + offset_size + nrec_size) + offset_size + nrec_size <= available
454    let extra = offset_size as u64 + nrec_size;
455    if available <= extra {
456        return 0;
457    }
458    (available - extra) / entry_size
459}
460
461fn node_checksum_bounds(
462    cursor: &Cursor<'_>,
463    start: u64,
464    node_size: u32,
465    compact_end: u64,
466    context: &'static str,
467) -> Result<usize> {
468    let start = usize::try_from(start)
469        .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} offset is too large")))?;
470    let node_size = usize::try_from(node_size)
471        .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} size is too large")))?;
472    if node_size < 10 {
473        return Err(Error::InvalidData(format!(
474            "B-tree v2 {context} node is too small: {node_size} bytes"
475        )));
476    }
477
478    let checksum_pos = usize::try_from(compact_end)
479        .map_err(|_| Error::InvalidData(format!("B-tree v2 {context} checksum is too large")))?;
480    if checksum_pos < start {
481        return Err(Error::InvalidData(format!(
482            "B-tree v2 {context} payload starts before node"
483        )));
484    }
485    let node_end = start
486        .checked_add(node_size)
487        .ok_or_else(|| Error::InvalidData(format!("B-tree v2 {context} size overflow")))?;
488
489    if node_end > cursor.data().len() {
490        return Err(Error::UnexpectedEof {
491            offset: start as u64,
492            needed: node_size as u64,
493            available: cursor.data().len().saturating_sub(start) as u64,
494        });
495    }
496    let checksum_end = checksum_pos
497        .checked_add(4)
498        .ok_or_else(|| Error::InvalidData(format!("B-tree v2 {context} checksum overflow")))?;
499    if checksum_end > node_end {
500        return Err(Error::InvalidData(format!(
501            "B-tree v2 {context} contents exceed node size"
502        )));
503    }
504
505    Ok(checksum_pos)
506}
507
508fn verify_node_checksum(
509    cursor: &mut Cursor<'_>,
510    start: u64,
511    node_size: u32,
512    compact_end: u64,
513    context: &'static str,
514) -> Result<()> {
515    let checksum_pos = node_checksum_bounds(cursor, start, node_size, compact_end, context)?;
516    cursor.set_position(checksum_pos as u64);
517    let stored_checksum = cursor.read_u32_le()?;
518    let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_pos]);
519    if computed != stored_checksum {
520        return Err(Error::ChecksumMismatch {
521            expected: stored_checksum,
522            actual: computed,
523        });
524    }
525    Ok(())
526}
527
528/// Parse a leaf node and collect its records.
529#[allow(clippy::too_many_arguments)]
530fn parse_leaf_node(
531    cursor: &mut Cursor,
532    header: &BTreeV2Header,
533    offset_size: u8,
534    length_size: u8,
535    ndims: Option<u32>,
536    chunk_dims: &[u32],
537    chunk_bounds: Option<(&[u64], &[u64])>,
538    num_records: u16,
539    heap_id_len: usize,
540    records: &mut Vec<BTreeV2Record>,
541) -> Result<()> {
542    let start = cursor.position();
543
544    let sig = cursor.read_bytes(4)?;
545    if sig != BTLF_SIGNATURE {
546        return Err(Error::InvalidBTreeV2Signature {
547            context: "leaf node",
548        });
549    }
550
551    let version = cursor.read_u8()?;
552    if version != 0 {
553        return Err(Error::UnsupportedBTreeVersion(version));
554    }
555
556    let node_type = cursor.read_u8()?;
557    if node_type != header.btree_type {
558        return Err(Error::InvalidData(format!(
559            "B-tree v2 leaf node type mismatch: header says {}, node says {}",
560            header.btree_type, node_type
561        )));
562    }
563
564    for _ in 0..num_records {
565        let record = parse_record(
566            cursor,
567            header.btree_type,
568            header.record_size,
569            offset_size,
570            length_size,
571            ndims,
572            heap_id_len,
573        )?;
574        if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
575            records.push(record);
576        }
577    }
578
579    verify_node_checksum(
580        cursor,
581        start,
582        header.node_size,
583        cursor.position(),
584        "leaf node",
585    )?;
586
587    Ok(())
588}
589
590/// Parse an internal node, collecting child addresses and recursing.
591#[allow(clippy::too_many_arguments)]
592fn parse_internal_node(
593    data: &[u8],
594    address: u64,
595    header: &BTreeV2Header,
596    offset_size: u8,
597    length_size: u8,
598    ndims: Option<u32>,
599    chunk_dims: &[u32],
600    chunk_bounds: Option<(&[u64], &[u64])>,
601    num_records: u16,
602    depth: u16,
603    heap_id_len: usize,
604    records: &mut Vec<BTreeV2Record>,
605) -> Result<()> {
606    let mut cursor = Cursor::new(data);
607    cursor.set_position(address);
608
609    let start = cursor.position();
610
611    let sig = cursor.read_bytes(4)?;
612    if sig != BTIN_SIGNATURE {
613        return Err(Error::InvalidBTreeV2Signature {
614            context: "internal node",
615        });
616    }
617
618    let version = cursor.read_u8()?;
619    if version != 0 {
620        return Err(Error::UnsupportedBTreeVersion(version));
621    }
622
623    let node_type = cursor.read_u8()?;
624    if node_type != header.btree_type {
625        return Err(Error::InvalidData(format!(
626            "B-tree v2 internal node type mismatch: header says {}, node says {}",
627            header.btree_type, node_type
628        )));
629    }
630
631    // Compute max records for children to know the encoding size for
632    // child record counts.
633    let max_child_records = if depth == 1 {
634        max_leaf_records(header.node_size, header.record_size)
635    } else {
636        // For deeper trees, compute iteratively.
637        let leaf_max = max_leaf_records(header.node_size, header.record_size);
638        let mut prev_max = leaf_max;
639        for _ in 1..depth {
640            prev_max =
641                max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
642        }
643        prev_max
644    };
645    let nrec_bytes = num_records_size(max_child_records);
646
647    // Read records and child pointers interleaved:
648    // child[0], record[0], child[1], record[1], ..., record[n-1], child[n]
649    // Plus total_records counts for each child.
650    //
651    // Actually per the HDF5 spec the layout is:
652    // record[0], record[1], ..., record[n-1],
653    // child_ptr[0], nrec[0], total[0], child_ptr[1], nrec[1], total[1], ..., child_ptr[n], nrec[n], total[n]
654    //
655    // Records first, then child pointers with their metadata.
656
657    // Read all records.
658    let mut node_records = Vec::with_capacity(num_records as usize);
659    for _ in 0..num_records {
660        let record = parse_record(
661            &mut cursor,
662            header.btree_type,
663            header.record_size,
664            offset_size,
665            length_size,
666            ndims,
667            heap_id_len,
668        )?;
669        if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
670            node_records.push(record);
671        }
672    }
673
674    // Read child node pointers (num_records + 1 of them).
675    let num_children = num_records as usize + 1;
676
677    // Whether to include a "total records" field for each child.
678    // This is present when depth > 1.
679    let has_total_records = depth > 1;
680    // total_records encoding size for deeper nodes
681    let total_nrec_bytes = if has_total_records {
682        // Total records in a sub-tree — need enough bytes to hold the
683        // maximum total records. We use length_size as an upper bound.
684        length_size as usize
685    } else {
686        0
687    };
688
689    let mut child_addresses = Vec::with_capacity(num_children);
690    let mut child_nrecords = Vec::with_capacity(num_children);
691
692    for _ in 0..num_children {
693        let child_addr = cursor.read_offset(offset_size)?;
694        child_addresses.push(child_addr);
695        let nrec = cursor.read_uvar(nrec_bytes)?;
696        child_nrecords.push(nrec as u16);
697        if has_total_records {
698            // Skip total records count
699            cursor.read_uvar(total_nrec_bytes)?;
700        }
701    }
702
703    let compact_end = cursor.position();
704    verify_node_checksum(
705        &mut cursor,
706        start,
707        header.node_size,
708        compact_end,
709        "internal node",
710    )?;
711
712    // The records from this internal node are NOT included in the leaf
713    // collection — they are separators. We do NOT add them to results.
714    // Only leaf records are collected.
715    // (Actually, in HDF5 v2 B-trees, records in internal nodes are real
716    // records too, not just separators. We should collect them.)
717    records.extend(node_records);
718
719    // Recurse into children.
720    let child_depth = depth - 1;
721    for (i, &child_addr) in child_addresses.iter().enumerate() {
722        if Cursor::is_undefined_offset(child_addr, offset_size) {
723            continue;
724        }
725        let child_nrec = child_nrecords[i];
726        if child_depth == 0 {
727            // Child is a leaf.
728            let mut child_cursor = Cursor::new(data);
729            child_cursor.set_position(child_addr);
730            parse_leaf_node(
731                &mut child_cursor,
732                header,
733                offset_size,
734                length_size,
735                ndims,
736                chunk_dims,
737                chunk_bounds,
738                child_nrec,
739                heap_id_len,
740                records,
741            )?;
742        } else {
743            // Child is another internal node.
744            parse_internal_node(
745                data,
746                child_addr,
747                header,
748                offset_size,
749                length_size,
750                ndims,
751                chunk_dims,
752                chunk_bounds,
753                child_nrec,
754                child_depth,
755                heap_id_len,
756                records,
757            )?;
758        }
759    }
760
761    Ok(())
762}
763
764// ---------------------------------------------------------------------------
765// Public traversal
766// ---------------------------------------------------------------------------
767
768/// Collect all records from a B-tree v2 by traversing from the root.
769///
770/// `header` must be a previously parsed `BTreeV2Header`. `data` is the full
771/// file buffer. `ndims` is needed for chunk index record types (10 and 11).
772pub fn collect_btree_v2_records(
773    data: &[u8],
774    header: &BTreeV2Header,
775    offset_size: u8,
776    length_size: u8,
777    ndims: Option<u32>,
778    chunk_dims: &[u32],
779    chunk_bounds: Option<(&[u64], &[u64])>,
780) -> Result<Vec<BTreeV2Record>> {
781    if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
782        return Ok(Vec::new());
783    }
784
785    if header.total_records == 0 || header.num_records_in_root == 0 {
786        return Ok(Vec::new());
787    }
788
789    // Determine heap_id_len from the record_size and btree_type.
790    let heap_id_len = compute_heap_id_len(header);
791
792    let mut records = Vec::new();
793
794    if header.depth == 0 {
795        // Root is a leaf node.
796        let mut cursor = Cursor::new(data);
797        cursor.set_position(header.root_node_address);
798        parse_leaf_node(
799            &mut cursor,
800            header,
801            offset_size,
802            length_size,
803            ndims,
804            chunk_dims,
805            chunk_bounds,
806            header.num_records_in_root,
807            heap_id_len,
808            &mut records,
809        )?;
810    } else {
811        // Root is an internal node.
812        parse_internal_node(
813            data,
814            header.root_node_address,
815            header,
816            offset_size,
817            length_size,
818            ndims,
819            chunk_dims,
820            chunk_bounds,
821            header.num_records_in_root,
822            header.depth,
823            heap_id_len,
824            &mut records,
825        )?;
826    }
827
828    Ok(records)
829}
830
831/// Collect all records from a B-tree v2 using random-access storage.
832pub fn collect_btree_v2_records_storage(
833    storage: &dyn Storage,
834    header: &BTreeV2Header,
835    offset_size: u8,
836    length_size: u8,
837    ndims: Option<u32>,
838    chunk_dims: &[u32],
839    chunk_bounds: Option<(&[u64], &[u64])>,
840) -> Result<Vec<BTreeV2Record>> {
841    if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
842        return Ok(Vec::new());
843    }
844
845    if header.total_records == 0 || header.num_records_in_root == 0 {
846        return Ok(Vec::new());
847    }
848
849    let heap_id_len = compute_heap_id_len(header);
850    let mut records = Vec::new();
851
852    if header.depth == 0 {
853        parse_leaf_node_storage(
854            storage,
855            header.root_node_address,
856            header,
857            offset_size,
858            length_size,
859            ndims,
860            chunk_dims,
861            chunk_bounds,
862            header.num_records_in_root,
863            heap_id_len,
864            &mut records,
865        )?;
866    } else {
867        parse_internal_node_storage(
868            storage,
869            header.root_node_address,
870            header,
871            offset_size,
872            length_size,
873            ndims,
874            chunk_dims,
875            chunk_bounds,
876            header.num_records_in_root,
877            header.depth,
878            heap_id_len,
879            &mut records,
880        )?;
881    }
882
883    Ok(records)
884}
885
886#[allow(clippy::too_many_arguments)]
887fn parse_leaf_node_storage(
888    storage: &dyn Storage,
889    address: u64,
890    header: &BTreeV2Header,
891    offset_size: u8,
892    length_size: u8,
893    ndims: Option<u32>,
894    chunk_dims: &[u32],
895    chunk_bounds: Option<(&[u64], &[u64])>,
896    num_records: u16,
897    heap_id_len: usize,
898    records: &mut Vec<BTreeV2Record>,
899) -> Result<()> {
900    let node_len = usize::try_from(header.node_size).map_err(|_| {
901        Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
902    })?;
903    let node_bytes = storage.read_range(address, node_len)?;
904    let mut cursor = Cursor::new(node_bytes.as_ref());
905    parse_leaf_node(
906        &mut cursor,
907        header,
908        offset_size,
909        length_size,
910        ndims,
911        chunk_dims,
912        chunk_bounds,
913        num_records,
914        heap_id_len,
915        records,
916    )
917}
918
919#[allow(clippy::too_many_arguments)]
920fn parse_internal_node_storage(
921    storage: &dyn Storage,
922    address: u64,
923    header: &BTreeV2Header,
924    offset_size: u8,
925    length_size: u8,
926    ndims: Option<u32>,
927    chunk_dims: &[u32],
928    chunk_bounds: Option<(&[u64], &[u64])>,
929    num_records: u16,
930    depth: u16,
931    heap_id_len: usize,
932    records: &mut Vec<BTreeV2Record>,
933) -> Result<()> {
934    let node_len = usize::try_from(header.node_size).map_err(|_| {
935        Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
936    })?;
937    let node_bytes = storage.read_range(address, node_len)?;
938    let mut cursor = Cursor::new(node_bytes.as_ref());
939    let start = cursor.position();
940
941    let sig = cursor.read_bytes(4)?;
942    if sig != BTIN_SIGNATURE {
943        return Err(Error::InvalidBTreeV2Signature {
944            context: "internal node",
945        });
946    }
947
948    let version = cursor.read_u8()?;
949    if version != 0 {
950        return Err(Error::UnsupportedBTreeVersion(version));
951    }
952
953    let node_type = cursor.read_u8()?;
954    if node_type != header.btree_type {
955        return Err(Error::InvalidData(format!(
956            "B-tree v2 internal node type mismatch: header says {}, node says {}",
957            header.btree_type, node_type
958        )));
959    }
960
961    let max_child_records = if depth == 1 {
962        max_leaf_records(header.node_size, header.record_size)
963    } else {
964        let leaf_max = max_leaf_records(header.node_size, header.record_size);
965        let mut prev_max = leaf_max;
966        for _ in 1..depth {
967            prev_max =
968                max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
969        }
970        prev_max
971    };
972    let nrec_bytes = num_records_size(max_child_records);
973
974    let mut node_records = Vec::with_capacity(num_records as usize);
975    for _ in 0..num_records {
976        let record = parse_record(
977            &mut cursor,
978            header.btree_type,
979            header.record_size,
980            offset_size,
981            length_size,
982            ndims,
983            heap_id_len,
984        )?;
985        if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
986            node_records.push(record);
987        }
988    }
989
990    let num_children = usize::from(num_records) + 1;
991    let has_total_records = depth > 1;
992    let total_nrec_bytes = if has_total_records {
993        usize::from(length_size)
994    } else {
995        0
996    };
997
998    let mut child_addresses = Vec::with_capacity(num_children);
999    let mut child_nrecords = Vec::with_capacity(num_children);
1000    for _ in 0..num_children {
1001        child_addresses.push(cursor.read_offset(offset_size)?);
1002        child_nrecords.push(cursor.read_uvar(nrec_bytes)? as u16);
1003        if has_total_records {
1004            cursor.read_uvar(total_nrec_bytes)?;
1005        }
1006    }
1007
1008    let compact_end = cursor.position();
1009    verify_node_checksum(
1010        &mut cursor,
1011        start,
1012        header.node_size,
1013        compact_end,
1014        "internal node",
1015    )?;
1016
1017    records.extend(node_records);
1018
1019    let child_depth = depth - 1;
1020    for (i, &child_addr) in child_addresses.iter().enumerate() {
1021        if Cursor::is_undefined_offset(child_addr, offset_size) {
1022            continue;
1023        }
1024        let child_nrec = child_nrecords[i];
1025        if child_depth == 0 {
1026            parse_leaf_node_storage(
1027                storage,
1028                child_addr,
1029                header,
1030                offset_size,
1031                length_size,
1032                ndims,
1033                chunk_dims,
1034                chunk_bounds,
1035                child_nrec,
1036                heap_id_len,
1037                records,
1038            )?;
1039        } else {
1040            parse_internal_node_storage(
1041                storage,
1042                child_addr,
1043                header,
1044                offset_size,
1045                length_size,
1046                ndims,
1047                chunk_dims,
1048                chunk_bounds,
1049                child_nrec,
1050                child_depth,
1051                heap_id_len,
1052                records,
1053            )?;
1054        }
1055    }
1056
1057    Ok(())
1058}
1059
1060/// Compute the heap ID length from the record size and tree type.
1061///
1062/// For link/attribute B-trees (types 5, 6, 8, 9), the heap ID occupies
1063/// the remaining bytes after the fixed fields. For chunk types (10, 11)
1064/// or unknown types, return 0 (heap_id is not used).
1065fn compute_heap_id_len(header: &BTreeV2Header) -> usize {
1066    let rs = header.record_size as usize;
1067    match header.btree_type {
1068        5 => rs.saturating_sub(4),         // hash(4)
1069        6 => rs.saturating_sub(8),         // order(8)
1070        8 => rs.saturating_sub(4 + 1 + 4), // hash(4) + flags(1) + creation_order(4)
1071        9 => rs.saturating_sub(4),         // order(4)
1072        _ => 0,
1073    }
1074}
1075
1076#[cfg(test)]
1077mod tests {
1078    use super::*;
1079
1080    /// Build a minimal BTHD with the given parameters.
1081    #[allow(clippy::too_many_arguments)]
1082    fn build_header(
1083        btree_type: u8,
1084        node_size: u32,
1085        record_size: u16,
1086        depth: u16,
1087        root_node_address: u64,
1088        num_records_in_root: u16,
1089        total_records: u64,
1090        offset_size: u8,
1091        length_size: u8,
1092    ) -> Vec<u8> {
1093        let mut buf = Vec::new();
1094        buf.extend_from_slice(b"BTHD");
1095        buf.push(0); // version
1096        buf.push(btree_type);
1097        buf.extend_from_slice(&node_size.to_le_bytes());
1098        buf.extend_from_slice(&record_size.to_le_bytes());
1099        buf.extend_from_slice(&depth.to_le_bytes());
1100        buf.push(75); // split percent
1101        buf.push(40); // merge percent
1102        match offset_size {
1103            4 => buf.extend_from_slice(&(root_node_address as u32).to_le_bytes()),
1104            8 => buf.extend_from_slice(&root_node_address.to_le_bytes()),
1105            _ => panic!("unsupported"),
1106        }
1107        buf.extend_from_slice(&num_records_in_root.to_le_bytes());
1108        match length_size {
1109            4 => buf.extend_from_slice(&(total_records as u32).to_le_bytes()),
1110            8 => buf.extend_from_slice(&total_records.to_le_bytes()),
1111            _ => panic!("unsupported"),
1112        }
1113        // Compute and append checksum.
1114        let checksum = jenkins_lookup3(&buf);
1115        buf.extend_from_slice(&checksum.to_le_bytes());
1116        buf
1117    }
1118
1119    #[test]
1120    fn test_parse_header() {
1121        let data = build_header(5, 4096, 12, 0, 0x1000, 3, 3, 8, 8);
1122        let mut cursor = Cursor::new(&data);
1123        let hdr = BTreeV2Header::parse(&mut cursor, 8, 8).unwrap();
1124
1125        assert_eq!(hdr.btree_type, 5);
1126        assert_eq!(hdr.node_size, 4096);
1127        assert_eq!(hdr.record_size, 12);
1128        assert_eq!(hdr.depth, 0);
1129        assert_eq!(hdr.split_percent, 75);
1130        assert_eq!(hdr.merge_percent, 40);
1131        assert_eq!(hdr.root_node_address, 0x1000);
1132        assert_eq!(hdr.num_records_in_root, 3);
1133        assert_eq!(hdr.total_records, 3);
1134    }
1135
1136    #[test]
1137    fn test_bad_signature() {
1138        let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1139        data[0] = b'X';
1140        let mut cursor = Cursor::new(&data);
1141        assert!(matches!(
1142            BTreeV2Header::parse(&mut cursor, 8, 8),
1143            Err(Error::InvalidBTreeV2Signature { .. })
1144        ));
1145    }
1146
1147    #[test]
1148    fn test_bad_checksum() {
1149        let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1150        // Corrupt a byte in the middle.
1151        data[6] = 0xFF;
1152        let mut cursor = Cursor::new(&data);
1153        assert!(matches!(
1154            BTreeV2Header::parse(&mut cursor, 8, 8),
1155            Err(Error::ChecksumMismatch { .. })
1156        ));
1157    }
1158
1159    #[test]
1160    fn test_collect_empty_tree() {
1161        let header = BTreeV2Header {
1162            btree_type: 5,
1163            node_size: 4096,
1164            record_size: 12,
1165            depth: 0,
1166            split_percent: 75,
1167            merge_percent: 40,
1168            root_node_address: u64::MAX,
1169            num_records_in_root: 0,
1170            total_records: 0,
1171        };
1172        let data = vec![0u8; 100];
1173        let records = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap();
1174        assert!(records.is_empty());
1175    }
1176
1177    #[test]
1178    fn test_compute_heap_id_len() {
1179        // Type 5: record_size - 4
1180        let h5 = BTreeV2Header {
1181            btree_type: 5,
1182            record_size: 12,
1183            node_size: 0,
1184            depth: 0,
1185            split_percent: 0,
1186            merge_percent: 0,
1187            root_node_address: 0,
1188            num_records_in_root: 0,
1189            total_records: 0,
1190        };
1191        assert_eq!(compute_heap_id_len(&h5), 8);
1192
1193        // Type 8: record_size - 9
1194        let h8 = BTreeV2Header {
1195            btree_type: 8,
1196            record_size: 17,
1197            ..h5
1198        };
1199        assert_eq!(compute_heap_id_len(&h8), 8);
1200    }
1201
1202    #[test]
1203    fn test_max_leaf_records() {
1204        // node_size=4096, record_size=12, overhead=10
1205        // => (4096 - 10) / 12 = 340
1206        assert_eq!(max_leaf_records(4096, 12), 340);
1207    }
1208
1209    #[test]
1210    fn test_num_records_size() {
1211        assert_eq!(num_records_size(0), 1);
1212        assert_eq!(num_records_size(255), 1);
1213        assert_eq!(num_records_size(256), 2);
1214        assert_eq!(num_records_size(65535), 2);
1215        assert_eq!(num_records_size(65536), 4);
1216    }
1217
1218    #[test]
1219    fn test_parse_huge_indirect_record() {
1220        let mut data = Vec::new();
1221        data.extend_from_slice(&0x1234u64.to_le_bytes());
1222        data.extend_from_slice(&99u64.to_le_bytes());
1223        data.extend_from_slice(&7u64.to_le_bytes());
1224        let mut cursor = Cursor::new(&data);
1225
1226        let record = parse_record(&mut cursor, 1, data.len() as u16, 8, 8, None, 0).unwrap();
1227        match record {
1228            BTreeV2Record::HugeIndirectNonFiltered {
1229                address,
1230                length,
1231                object_id,
1232            } => {
1233                assert_eq!(address, 0x1234);
1234                assert_eq!(length, 99);
1235                assert_eq!(object_id, 7);
1236            }
1237            other => panic!("expected huge record, got {:?}", other),
1238        }
1239    }
1240
1241    #[test]
1242    fn test_parse_shared_heap_record() {
1243        let mut data = Vec::new();
1244        data.push(0);
1245        data.extend_from_slice(&[0, 0, 0]);
1246        data.extend_from_slice(&0xAABB_CCDDu32.to_le_bytes());
1247        data.extend_from_slice(&3u32.to_le_bytes());
1248        data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1249        let mut cursor = Cursor::new(&data);
1250
1251        let record = parse_record(&mut cursor, 7, data.len() as u16, 8, 8, None, 0).unwrap();
1252        match record {
1253            BTreeV2Record::SharedMessageHeap {
1254                hash,
1255                reference_count,
1256                heap_id,
1257            } => {
1258                assert_eq!(hash, 0xAABB_CCDD);
1259                assert_eq!(reference_count, 3);
1260                assert_eq!(heap_id, vec![1, 2, 3, 4, 5, 6, 7, 8]);
1261            }
1262            other => panic!("expected shared heap record, got {:?}", other),
1263        }
1264    }
1265
1266    #[test]
1267    fn test_parse_leaf_with_type5_records() {
1268        // Build a leaf node with 2 type-5 records (link name hash).
1269        // record_size = 12 (hash=4 + heap_id=8)
1270        let record_size: u16 = 12;
1271        let node_size: u32 = 4096;
1272
1273        let header = BTreeV2Header {
1274            btree_type: 5,
1275            node_size,
1276            record_size,
1277            depth: 0,
1278            split_percent: 75,
1279            merge_percent: 40,
1280            root_node_address: 0, // will point to our leaf
1281            num_records_in_root: 2,
1282            total_records: 2,
1283        };
1284
1285        // Build the leaf node.
1286        let mut leaf = Vec::new();
1287        leaf.extend_from_slice(b"BTLF"); // signature
1288        leaf.push(0); // version
1289        leaf.push(5); // type
1290
1291        // Record 1: hash=0xAABBCCDD, heap_id=[1,2,3,4,5,6,7,8]
1292        leaf.extend_from_slice(&0xAABBCCDDu32.to_le_bytes());
1293        leaf.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1294
1295        // Record 2: hash=0x11223344, heap_id=[9,10,11,12,13,14,15,16]
1296        leaf.extend_from_slice(&0x11223344u32.to_le_bytes());
1297        leaf.extend_from_slice(&[9, 10, 11, 12, 13, 14, 15, 16]);
1298
1299        // The checksum covers the populated node payload. The node allocation
1300        // may be larger and is padded after the checksum.
1301        let checksum = jenkins_lookup3(&leaf);
1302        leaf.extend_from_slice(&checksum.to_le_bytes());
1303        leaf.resize(node_size as usize, 0);
1304
1305        let mut records = Vec::new();
1306        let mut cursor = Cursor::new(&leaf);
1307        parse_leaf_node(
1308            &mut cursor,
1309            &header,
1310            8,
1311            8,
1312            None,
1313            &[],
1314            None,
1315            2,
1316            8, // heap_id_len
1317            &mut records,
1318        )
1319        .unwrap();
1320
1321        assert_eq!(records.len(), 2);
1322        match &records[0] {
1323            BTreeV2Record::LinkNameHash { hash, heap_id } => {
1324                assert_eq!(*hash, 0xAABBCCDD);
1325                assert_eq!(heap_id, &[1, 2, 3, 4, 5, 6, 7, 8]);
1326            }
1327            _ => panic!("expected LinkNameHash"),
1328        }
1329        match &records[1] {
1330            BTreeV2Record::LinkNameHash { hash, heap_id } => {
1331                assert_eq!(*hash, 0x11223344);
1332                assert_eq!(heap_id, &[9, 10, 11, 12, 13, 14, 15, 16]);
1333            }
1334            _ => panic!("expected LinkNameHash"),
1335        }
1336    }
1337}