Skip to main content

hdf5_reader/
btree_v2.rs

1//! HDF5 B-tree Version 2.
2//!
3//! V2 B-trees are used by newer-style groups and datasets for indexed link
4//! storage, attribute storage, and chunked dataset indexing. The header
5//! (`BTHD`) describes the tree parameters. Internal nodes (`BTIN`) and leaf
6//! nodes (`BTLF`) contain the actual records.
7//!
8//! This module provides the header parse, record types, and a traversal
9//! function that collects all records from a tree.
10
11use crate::checksum::jenkins_lookup3;
12use crate::error::{Error, Result};
13use crate::io::Cursor;
14use crate::storage::Storage;
15
16// ---------------------------------------------------------------------------
17// Signatures
18// ---------------------------------------------------------------------------
19
20const BTHD_SIGNATURE: [u8; 4] = *b"BTHD";
21const BTIN_SIGNATURE: [u8; 4] = *b"BTIN";
22const BTLF_SIGNATURE: [u8; 4] = *b"BTLF";
23
24// ---------------------------------------------------------------------------
25// Header
26// ---------------------------------------------------------------------------
27
28/// Parsed B-tree v2 header.
29#[derive(Debug, Clone)]
30pub struct BTreeV2Header {
31    /// B-tree type (determines the record format).
32    pub btree_type: u8,
33    /// Size in bytes of each B-tree node (both internal and leaf).
34    pub node_size: u32,
35    /// Size in bytes of each record.
36    pub record_size: u16,
37    /// Depth of the tree (0 = root is a leaf).
38    pub depth: u16,
39    /// Percent full at which to split a node.
40    pub split_percent: u8,
41    /// Percent full at which to merge a node.
42    pub merge_percent: u8,
43    /// Address of the root node.
44    pub root_node_address: u64,
45    /// Number of records in the root node.
46    pub num_records_in_root: u16,
47    /// Total number of records in the entire tree.
48    pub total_records: u64,
49}
50
51impl BTreeV2Header {
52    /// Parse a B-tree v2 header at the current cursor position.
53    ///
54    /// Format:
55    /// - Signature: `BTHD` (4 bytes)
56    /// - Version: 0 (1 byte)
57    /// - B-tree type (u8)
58    /// - Node size (u32 LE)
59    /// - Record size (u16 LE)
60    /// - Depth (u16 LE)
61    /// - Split percent (u8)
62    /// - Merge percent (u8)
63    /// - Root node address (`offset_size` bytes)
64    /// - Number of records in root node (u16 LE)
65    /// - Total number of records in tree (`length_size` bytes)
66    /// - Checksum (u32 LE)
67    pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
68        let start = cursor.position();
69
70        let sig = cursor.read_bytes(4)?;
71        if sig != BTHD_SIGNATURE {
72            return Err(Error::InvalidBTreeV2Signature { context: "header" });
73        }
74
75        let version = cursor.read_u8()?;
76        if version != 0 {
77            return Err(Error::UnsupportedBTreeVersion(version));
78        }
79
80        let btree_type = cursor.read_u8()?;
81        let node_size = cursor.read_u32_le()?;
82        let record_size = cursor.read_u16_le()?;
83        let depth = cursor.read_u16_le()?;
84        let split_percent = cursor.read_u8()?;
85        let merge_percent = cursor.read_u8()?;
86        let root_node_address = cursor.read_offset(offset_size)?;
87        let num_records_in_root = cursor.read_u16_le()?;
88        let total_records = cursor.read_length(length_size)?;
89
90        // Checksum covers everything from signature through total_records.
91        let checksum_end = cursor.position();
92        let stored_checksum = cursor.read_u32_le()?;
93
94        let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_end as usize]);
95        if computed != stored_checksum {
96            return Err(Error::ChecksumMismatch {
97                expected: stored_checksum,
98                actual: computed,
99            });
100        }
101
102        Ok(BTreeV2Header {
103            btree_type,
104            node_size,
105            record_size,
106            depth,
107            split_percent,
108            merge_percent,
109            root_node_address,
110            num_records_in_root,
111            total_records,
112        })
113    }
114
115    /// Parse a B-tree v2 header from random-access storage.
116    pub fn parse_at_storage(
117        storage: &dyn Storage,
118        address: u64,
119        offset_size: u8,
120        length_size: u8,
121    ) -> Result<Self> {
122        let header_len = 4
123            + 1
124            + 1
125            + 4
126            + 2
127            + 2
128            + 1
129            + 1
130            + usize::from(offset_size)
131            + 2
132            + usize::from(length_size)
133            + 4;
134        let bytes = storage.read_range(address, header_len)?;
135        let mut cursor = Cursor::new(bytes.as_ref());
136        Self::parse(&mut cursor, offset_size, length_size)
137    }
138}
139
140// ---------------------------------------------------------------------------
141// Records
142// ---------------------------------------------------------------------------
143
144/// A record from a B-tree v2.
145///
146/// The record format depends on the B-tree type field in the header.
147#[derive(Debug, Clone)]
148pub enum BTreeV2Record {
149    /// Type 5: Link name for indexed group (hashed).
150    LinkNameHash { hash: u32, heap_id: Vec<u8> },
151    /// Type 6: Creation order for indexed group.
152    CreationOrder { order: u64, heap_id: Vec<u8> },
153    /// Type 8: Attribute name for indexed group (hashed).
154    AttributeNameHash {
155        hash: u32,
156        flags: u8,
157        creation_order: u32,
158        heap_id: Vec<u8>,
159    },
160    /// Type 9: Attribute creation order.
161    AttributeCreationOrder { order: u32, heap_id: Vec<u8> },
162    /// Type 10: Non-filtered chunked dataset record (v2 chunk index).
163    ChunkedNonFiltered { address: u64, offsets: Vec<u64> },
164    /// Type 11: Filtered chunked dataset record (v2 chunk index).
165    ChunkedFiltered {
166        address: u64,
167        chunk_size: u64,
168        filter_mask: u32,
169        offsets: Vec<u64>,
170    },
171    /// Unknown/unsupported record type — raw bytes preserved.
172    Unknown { record_type: u8, data: Vec<u8> },
173}
174
175// ---------------------------------------------------------------------------
176// Record parsing
177// ---------------------------------------------------------------------------
178
179/// Parse a single record of the given B-tree type.
180fn parse_record(
181    cursor: &mut Cursor,
182    btree_type: u8,
183    record_size: u16,
184    offset_size: u8,
185    length_size: u8,
186    _ndims: Option<u32>,
187    heap_id_len: usize,
188) -> Result<BTreeV2Record> {
189    let record_start = cursor.position();
190
191    let record = match btree_type {
192        // Type 5: link name hash
193        5 => {
194            let hash = cursor.read_u32_le()?;
195            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
196            BTreeV2Record::LinkNameHash { hash, heap_id }
197        }
198
199        // Type 6: creation order
200        6 => {
201            let order = cursor.read_u64_le()?;
202            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
203            BTreeV2Record::CreationOrder { order, heap_id }
204        }
205
206        // Type 8: attribute name hash
207        8 => {
208            let hash = cursor.read_u32_le()?;
209            let flags = cursor.read_u8()?;
210            let creation_order = cursor.read_u32_le()?;
211            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
212            BTreeV2Record::AttributeNameHash {
213                hash,
214                flags,
215                creation_order,
216                heap_id,
217            }
218        }
219
220        // Type 9: attribute creation order
221        9 => {
222            let order = cursor.read_u32_le()?;
223            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
224            BTreeV2Record::AttributeCreationOrder { order, heap_id }
225        }
226
227        // Type 10: non-filtered chunk
228        10 => {
229            let address = cursor.read_offset(offset_size)?;
230            // Chunk offsets are encoded as scaled 64-bit values.
231            // The number of offset dimensions is calculated from the record size.
232            // Each offset is 8 bytes in a type-10 record.
233            let offset_bytes_available = record_size as usize - offset_size as usize;
234            let num_offsets = offset_bytes_available / 8;
235            let mut offsets = Vec::with_capacity(num_offsets);
236            for _ in 0..num_offsets {
237                offsets.push(cursor.read_u64_le()?);
238            }
239            BTreeV2Record::ChunkedNonFiltered { address, offsets }
240        }
241
242        // Type 11: filtered chunk
243        11 => {
244            let address = cursor.read_offset(offset_size)?;
245            // nbytes (chunk size on disk) is encoded using length_size bytes.
246            let nbytes_size = length_size as usize;
247            let chunk_size = cursor.read_length(length_size)?;
248            let filter_mask = cursor.read_u32_le()?;
249            let remaining = record_size as usize - offset_size as usize - nbytes_size - 4; // filter_mask
250            let num_offsets = remaining / 8;
251            let mut offsets = Vec::with_capacity(num_offsets);
252            for _ in 0..num_offsets {
253                offsets.push(cursor.read_u64_le()?);
254            }
255            BTreeV2Record::ChunkedFiltered {
256                address,
257                chunk_size,
258                filter_mask,
259                offsets,
260            }
261        }
262
263        // Unknown type — read raw bytes.
264        _ => {
265            let data = cursor.read_bytes(record_size as usize)?.to_vec();
266            return Ok(BTreeV2Record::Unknown {
267                record_type: btree_type,
268                data,
269            });
270        }
271    };
272
273    // Ensure we consumed exactly record_size bytes (skip any remaining).
274    let consumed = (cursor.position() - record_start) as usize;
275    if consumed < record_size as usize {
276        cursor.skip(record_size as usize - consumed)?;
277    }
278
279    Ok(record)
280}
281
282fn record_matches_chunk_bounds(
283    record: &BTreeV2Record,
284    chunk_dims: &[u32],
285    chunk_bounds: Option<(&[u64], &[u64])>,
286) -> bool {
287    let Some((first_chunk, last_chunk)) = chunk_bounds else {
288        return true;
289    };
290
291    let offsets = match record {
292        BTreeV2Record::ChunkedNonFiltered { offsets, .. }
293        | BTreeV2Record::ChunkedFiltered { offsets, .. } => offsets,
294        _ => return true,
295    };
296
297    offsets.iter().enumerate().all(|(dim, offset)| {
298        let chunk_index = *offset / u64::from(chunk_dims[dim]);
299        chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
300    })
301}
302
303// ---------------------------------------------------------------------------
304// Node parsing
305// ---------------------------------------------------------------------------
306
307/// Compute the number of bytes needed to represent `max_records` as an
308/// unsigned integer (used for child-node record counts in internal nodes).
309fn num_records_size(max_records: u64) -> usize {
310    if max_records <= 0xFF {
311        1
312    } else if max_records <= 0xFFFF {
313        2
314    } else if max_records <= 0xFFFF_FFFF {
315        4
316    } else {
317        8
318    }
319}
320
321/// Compute the maximum number of records that fit in a leaf node.
322fn max_leaf_records(node_size: u32, record_size: u16) -> u64 {
323    // Leaf node overhead: signature(4) + version(1) + type(1) + checksum(4) = 10
324    let overhead = 10u32;
325    if node_size <= overhead || record_size == 0 {
326        return 0;
327    }
328    ((node_size - overhead) / record_size as u32) as u64
329}
330
331/// Compute the maximum number of records that fit in an internal node.
332/// This depends on the pointer size (offset_size) and the number-of-records
333/// encoding for child nodes, which makes it recursive in principle. We use
334/// an iterative approach.
335fn max_internal_records(
336    node_size: u32,
337    record_size: u16,
338    offset_size: u8,
339    max_child_records: u64,
340) -> u64 {
341    // Internal node overhead: signature(4) + version(1) + type(1) + checksum(4) = 10
342    let overhead = 10u32;
343    if node_size <= overhead || record_size == 0 {
344        return 0;
345    }
346    let available = (node_size - overhead) as u64;
347    // Each entry in an internal node is: record(record_size) + child_pointer(offset_size) + num_records(var)
348    let nrec_size = num_records_size(max_child_records) as u64;
349    let entry_size = record_size as u64 + offset_size as u64 + nrec_size;
350    // There is one more child pointer + num_records than records.
351    // So: n * record_size + (n+1) * (offset_size + nrec_size) <= available
352    // => n * (record_size + offset_size + nrec_size) + offset_size + nrec_size <= available
353    let extra = offset_size as u64 + nrec_size;
354    if available <= extra {
355        return 0;
356    }
357    (available - extra) / entry_size
358}
359
360/// Parse a leaf node and collect its records.
361#[allow(clippy::too_many_arguments)]
362fn parse_leaf_node(
363    cursor: &mut Cursor,
364    header: &BTreeV2Header,
365    offset_size: u8,
366    length_size: u8,
367    ndims: Option<u32>,
368    chunk_dims: &[u32],
369    chunk_bounds: Option<(&[u64], &[u64])>,
370    num_records: u16,
371    heap_id_len: usize,
372    records: &mut Vec<BTreeV2Record>,
373) -> Result<()> {
374    let start = cursor.position();
375
376    let sig = cursor.read_bytes(4)?;
377    if sig != BTLF_SIGNATURE {
378        return Err(Error::InvalidBTreeV2Signature {
379            context: "leaf node",
380        });
381    }
382
383    let version = cursor.read_u8()?;
384    if version != 0 {
385        return Err(Error::UnsupportedBTreeVersion(version));
386    }
387
388    let node_type = cursor.read_u8()?;
389    if node_type != header.btree_type {
390        return Err(Error::InvalidData(format!(
391            "B-tree v2 leaf node type mismatch: header says {}, node says {}",
392            header.btree_type, node_type
393        )));
394    }
395
396    for _ in 0..num_records {
397        let record = parse_record(
398            cursor,
399            header.btree_type,
400            header.record_size,
401            offset_size,
402            length_size,
403            ndims,
404            heap_id_len,
405        )?;
406        if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
407            records.push(record);
408        }
409    }
410
411    // Verify checksum: covers signature through the end of records.
412    let checksum_data_end = cursor.position();
413    let stored_checksum = cursor.read_u32_le()?;
414    let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
415    if computed != stored_checksum {
416        return Err(Error::ChecksumMismatch {
417            expected: stored_checksum,
418            actual: computed,
419        });
420    }
421
422    Ok(())
423}
424
425/// Parse an internal node, collecting child addresses and recursing.
426#[allow(clippy::too_many_arguments)]
427fn parse_internal_node(
428    data: &[u8],
429    address: u64,
430    header: &BTreeV2Header,
431    offset_size: u8,
432    length_size: u8,
433    ndims: Option<u32>,
434    chunk_dims: &[u32],
435    chunk_bounds: Option<(&[u64], &[u64])>,
436    num_records: u16,
437    depth: u16,
438    heap_id_len: usize,
439    records: &mut Vec<BTreeV2Record>,
440) -> Result<()> {
441    let mut cursor = Cursor::new(data);
442    cursor.set_position(address);
443
444    let start = cursor.position();
445
446    let sig = cursor.read_bytes(4)?;
447    if sig != BTIN_SIGNATURE {
448        return Err(Error::InvalidBTreeV2Signature {
449            context: "internal node",
450        });
451    }
452
453    let version = cursor.read_u8()?;
454    if version != 0 {
455        return Err(Error::UnsupportedBTreeVersion(version));
456    }
457
458    let node_type = cursor.read_u8()?;
459    if node_type != header.btree_type {
460        return Err(Error::InvalidData(format!(
461            "B-tree v2 internal node type mismatch: header says {}, node says {}",
462            header.btree_type, node_type
463        )));
464    }
465
466    // Compute max records for children to know the encoding size for
467    // child record counts.
468    let max_child_records = if depth == 1 {
469        max_leaf_records(header.node_size, header.record_size)
470    } else {
471        // For deeper trees, compute iteratively.
472        let leaf_max = max_leaf_records(header.node_size, header.record_size);
473        let mut prev_max = leaf_max;
474        for _ in 1..depth {
475            prev_max =
476                max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
477        }
478        prev_max
479    };
480    let nrec_bytes = num_records_size(max_child_records);
481
482    // Read records and child pointers interleaved:
483    // child[0], record[0], child[1], record[1], ..., record[n-1], child[n]
484    // Plus total_records counts for each child.
485    //
486    // Actually per the HDF5 spec the layout is:
487    // record[0], record[1], ..., record[n-1],
488    // child_ptr[0], nrec[0], total[0], child_ptr[1], nrec[1], total[1], ..., child_ptr[n], nrec[n], total[n]
489    //
490    // Records first, then child pointers with their metadata.
491
492    // Read all records.
493    let mut node_records = Vec::with_capacity(num_records as usize);
494    for _ in 0..num_records {
495        let record = parse_record(
496            &mut cursor,
497            header.btree_type,
498            header.record_size,
499            offset_size,
500            length_size,
501            ndims,
502            heap_id_len,
503        )?;
504        if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
505            node_records.push(record);
506        }
507    }
508
509    // Read child node pointers (num_records + 1 of them).
510    let num_children = num_records as usize + 1;
511
512    // Whether to include a "total records" field for each child.
513    // This is present when depth > 1.
514    let has_total_records = depth > 1;
515    // total_records encoding size for deeper nodes
516    let total_nrec_bytes = if has_total_records {
517        // Total records in a sub-tree — need enough bytes to hold the
518        // maximum total records. We use length_size as an upper bound.
519        length_size as usize
520    } else {
521        0
522    };
523
524    let mut child_addresses = Vec::with_capacity(num_children);
525    let mut child_nrecords = Vec::with_capacity(num_children);
526
527    for _ in 0..num_children {
528        let child_addr = cursor.read_offset(offset_size)?;
529        child_addresses.push(child_addr);
530        let nrec = cursor.read_uvar(nrec_bytes)?;
531        child_nrecords.push(nrec as u16);
532        if has_total_records {
533            // Skip total records count
534            cursor.read_uvar(total_nrec_bytes)?;
535        }
536    }
537
538    // Verify checksum.
539    let checksum_data_end = cursor.position();
540    let stored_checksum = cursor.read_u32_le()?;
541    let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
542    if computed != stored_checksum {
543        return Err(Error::ChecksumMismatch {
544            expected: stored_checksum,
545            actual: computed,
546        });
547    }
548
549    // The records from this internal node are NOT included in the leaf
550    // collection — they are separators. We do NOT add them to results.
551    // Only leaf records are collected.
552    // (Actually, in HDF5 v2 B-trees, records in internal nodes are real
553    // records too, not just separators. We should collect them.)
554    records.extend(node_records);
555
556    // Recurse into children.
557    let child_depth = depth - 1;
558    for (i, &child_addr) in child_addresses.iter().enumerate() {
559        if Cursor::is_undefined_offset(child_addr, offset_size) {
560            continue;
561        }
562        let child_nrec = child_nrecords[i];
563        if child_depth == 0 {
564            // Child is a leaf.
565            let mut child_cursor = Cursor::new(data);
566            child_cursor.set_position(child_addr);
567            parse_leaf_node(
568                &mut child_cursor,
569                header,
570                offset_size,
571                length_size,
572                ndims,
573                chunk_dims,
574                chunk_bounds,
575                child_nrec,
576                heap_id_len,
577                records,
578            )?;
579        } else {
580            // Child is another internal node.
581            parse_internal_node(
582                data,
583                child_addr,
584                header,
585                offset_size,
586                length_size,
587                ndims,
588                chunk_dims,
589                chunk_bounds,
590                child_nrec,
591                child_depth,
592                heap_id_len,
593                records,
594            )?;
595        }
596    }
597
598    Ok(())
599}
600
601// ---------------------------------------------------------------------------
602// Public traversal
603// ---------------------------------------------------------------------------
604
605/// Collect all records from a B-tree v2 by traversing from the root.
606///
607/// `header` must be a previously parsed `BTreeV2Header`. `data` is the full
608/// file buffer. `ndims` is needed for chunk index record types (10 and 11).
609pub fn collect_btree_v2_records(
610    data: &[u8],
611    header: &BTreeV2Header,
612    offset_size: u8,
613    length_size: u8,
614    ndims: Option<u32>,
615    chunk_dims: &[u32],
616    chunk_bounds: Option<(&[u64], &[u64])>,
617) -> Result<Vec<BTreeV2Record>> {
618    if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
619        return Ok(Vec::new());
620    }
621
622    if header.total_records == 0 || header.num_records_in_root == 0 {
623        return Ok(Vec::new());
624    }
625
626    // Determine heap_id_len from the record_size and btree_type.
627    let heap_id_len = compute_heap_id_len(header);
628
629    let mut records = Vec::new();
630
631    if header.depth == 0 {
632        // Root is a leaf node.
633        let mut cursor = Cursor::new(data);
634        cursor.set_position(header.root_node_address);
635        parse_leaf_node(
636            &mut cursor,
637            header,
638            offset_size,
639            length_size,
640            ndims,
641            chunk_dims,
642            chunk_bounds,
643            header.num_records_in_root,
644            heap_id_len,
645            &mut records,
646        )?;
647    } else {
648        // Root is an internal node.
649        parse_internal_node(
650            data,
651            header.root_node_address,
652            header,
653            offset_size,
654            length_size,
655            ndims,
656            chunk_dims,
657            chunk_bounds,
658            header.num_records_in_root,
659            header.depth,
660            heap_id_len,
661            &mut records,
662        )?;
663    }
664
665    Ok(records)
666}
667
668/// Collect all records from a B-tree v2 using random-access storage.
669pub fn collect_btree_v2_records_storage(
670    storage: &dyn Storage,
671    header: &BTreeV2Header,
672    offset_size: u8,
673    length_size: u8,
674    ndims: Option<u32>,
675    chunk_dims: &[u32],
676    chunk_bounds: Option<(&[u64], &[u64])>,
677) -> Result<Vec<BTreeV2Record>> {
678    if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
679        return Ok(Vec::new());
680    }
681
682    if header.total_records == 0 || header.num_records_in_root == 0 {
683        return Ok(Vec::new());
684    }
685
686    let heap_id_len = compute_heap_id_len(header);
687    let mut records = Vec::new();
688
689    if header.depth == 0 {
690        parse_leaf_node_storage(
691            storage,
692            header.root_node_address,
693            header,
694            offset_size,
695            length_size,
696            ndims,
697            chunk_dims,
698            chunk_bounds,
699            header.num_records_in_root,
700            heap_id_len,
701            &mut records,
702        )?;
703    } else {
704        parse_internal_node_storage(
705            storage,
706            header.root_node_address,
707            header,
708            offset_size,
709            length_size,
710            ndims,
711            chunk_dims,
712            chunk_bounds,
713            header.num_records_in_root,
714            header.depth,
715            heap_id_len,
716            &mut records,
717        )?;
718    }
719
720    Ok(records)
721}
722
723#[allow(clippy::too_many_arguments)]
724fn parse_leaf_node_storage(
725    storage: &dyn Storage,
726    address: u64,
727    header: &BTreeV2Header,
728    offset_size: u8,
729    length_size: u8,
730    ndims: Option<u32>,
731    chunk_dims: &[u32],
732    chunk_bounds: Option<(&[u64], &[u64])>,
733    num_records: u16,
734    heap_id_len: usize,
735    records: &mut Vec<BTreeV2Record>,
736) -> Result<()> {
737    let _node_len = usize::try_from(header.node_size).map_err(|_| {
738        Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
739    })?;
740    let read_len = usize::try_from(storage.len().saturating_sub(address)).map_err(|_| {
741        Error::InvalidData("B-tree v2 node read exceeds platform usize capacity".into())
742    })?;
743    let node_bytes = storage.read_range(address, read_len)?;
744    let mut cursor = Cursor::new(node_bytes.as_ref());
745    parse_leaf_node(
746        &mut cursor,
747        header,
748        offset_size,
749        length_size,
750        ndims,
751        chunk_dims,
752        chunk_bounds,
753        num_records,
754        heap_id_len,
755        records,
756    )
757}
758
759#[allow(clippy::too_many_arguments)]
760fn parse_internal_node_storage(
761    storage: &dyn Storage,
762    address: u64,
763    header: &BTreeV2Header,
764    offset_size: u8,
765    length_size: u8,
766    ndims: Option<u32>,
767    chunk_dims: &[u32],
768    chunk_bounds: Option<(&[u64], &[u64])>,
769    num_records: u16,
770    depth: u16,
771    heap_id_len: usize,
772    records: &mut Vec<BTreeV2Record>,
773) -> Result<()> {
774    let _node_len = usize::try_from(header.node_size).map_err(|_| {
775        Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
776    })?;
777    let read_len = usize::try_from(storage.len().saturating_sub(address)).map_err(|_| {
778        Error::InvalidData("B-tree v2 node read exceeds platform usize capacity".into())
779    })?;
780    let node_bytes = storage.read_range(address, read_len)?;
781    let mut cursor = Cursor::new(node_bytes.as_ref());
782    let start = cursor.position();
783
784    let sig = cursor.read_bytes(4)?;
785    if sig != BTIN_SIGNATURE {
786        return Err(Error::InvalidBTreeV2Signature {
787            context: "internal node",
788        });
789    }
790
791    let version = cursor.read_u8()?;
792    if version != 0 {
793        return Err(Error::UnsupportedBTreeVersion(version));
794    }
795
796    let node_type = cursor.read_u8()?;
797    if node_type != header.btree_type {
798        return Err(Error::InvalidData(format!(
799            "B-tree v2 internal node type mismatch: header says {}, node says {}",
800            header.btree_type, node_type
801        )));
802    }
803
804    let max_child_records = if depth == 1 {
805        max_leaf_records(header.node_size, header.record_size)
806    } else {
807        let leaf_max = max_leaf_records(header.node_size, header.record_size);
808        let mut prev_max = leaf_max;
809        for _ in 1..depth {
810            prev_max =
811                max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
812        }
813        prev_max
814    };
815    let nrec_bytes = num_records_size(max_child_records);
816
817    let mut node_records = Vec::with_capacity(num_records as usize);
818    for _ in 0..num_records {
819        let record = parse_record(
820            &mut cursor,
821            header.btree_type,
822            header.record_size,
823            offset_size,
824            length_size,
825            ndims,
826            heap_id_len,
827        )?;
828        if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
829            node_records.push(record);
830        }
831    }
832
833    let num_children = usize::from(num_records) + 1;
834    let has_total_records = depth > 1;
835    let total_nrec_bytes = if has_total_records {
836        usize::from(length_size)
837    } else {
838        0
839    };
840
841    let mut child_addresses = Vec::with_capacity(num_children);
842    let mut child_nrecords = Vec::with_capacity(num_children);
843    for _ in 0..num_children {
844        child_addresses.push(cursor.read_offset(offset_size)?);
845        child_nrecords.push(cursor.read_uvar(nrec_bytes)? as u16);
846        if has_total_records {
847            cursor.read_uvar(total_nrec_bytes)?;
848        }
849    }
850
851    let checksum_data_end = cursor.position();
852    let stored_checksum = cursor.read_u32_le()?;
853    let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
854    if computed != stored_checksum {
855        return Err(Error::ChecksumMismatch {
856            expected: stored_checksum,
857            actual: computed,
858        });
859    }
860
861    records.extend(node_records);
862
863    let child_depth = depth - 1;
864    for (i, &child_addr) in child_addresses.iter().enumerate() {
865        if Cursor::is_undefined_offset(child_addr, offset_size) {
866            continue;
867        }
868        let child_nrec = child_nrecords[i];
869        if child_depth == 0 {
870            parse_leaf_node_storage(
871                storage,
872                child_addr,
873                header,
874                offset_size,
875                length_size,
876                ndims,
877                chunk_dims,
878                chunk_bounds,
879                child_nrec,
880                heap_id_len,
881                records,
882            )?;
883        } else {
884            parse_internal_node_storage(
885                storage,
886                child_addr,
887                header,
888                offset_size,
889                length_size,
890                ndims,
891                chunk_dims,
892                chunk_bounds,
893                child_nrec,
894                child_depth,
895                heap_id_len,
896                records,
897            )?;
898        }
899    }
900
901    Ok(())
902}
903
904/// Compute the heap ID length from the record size and tree type.
905///
906/// For link/attribute B-trees (types 5, 6, 8, 9), the heap ID occupies
907/// the remaining bytes after the fixed fields. For chunk types (10, 11)
908/// or unknown types, return 0 (heap_id is not used).
909fn compute_heap_id_len(header: &BTreeV2Header) -> usize {
910    let rs = header.record_size as usize;
911    match header.btree_type {
912        5 => rs.saturating_sub(4),         // hash(4)
913        6 => rs.saturating_sub(8),         // order(8)
914        8 => rs.saturating_sub(4 + 1 + 4), // hash(4) + flags(1) + creation_order(4)
915        9 => rs.saturating_sub(4),         // order(4)
916        _ => 0,
917    }
918}
919
920#[cfg(test)]
921mod tests {
922    use super::*;
923
924    /// Build a minimal BTHD with the given parameters.
925    #[allow(clippy::too_many_arguments)]
926    fn build_header(
927        btree_type: u8,
928        node_size: u32,
929        record_size: u16,
930        depth: u16,
931        root_node_address: u64,
932        num_records_in_root: u16,
933        total_records: u64,
934        offset_size: u8,
935        length_size: u8,
936    ) -> Vec<u8> {
937        let mut buf = Vec::new();
938        buf.extend_from_slice(b"BTHD");
939        buf.push(0); // version
940        buf.push(btree_type);
941        buf.extend_from_slice(&node_size.to_le_bytes());
942        buf.extend_from_slice(&record_size.to_le_bytes());
943        buf.extend_from_slice(&depth.to_le_bytes());
944        buf.push(75); // split percent
945        buf.push(40); // merge percent
946        match offset_size {
947            4 => buf.extend_from_slice(&(root_node_address as u32).to_le_bytes()),
948            8 => buf.extend_from_slice(&root_node_address.to_le_bytes()),
949            _ => panic!("unsupported"),
950        }
951        buf.extend_from_slice(&num_records_in_root.to_le_bytes());
952        match length_size {
953            4 => buf.extend_from_slice(&(total_records as u32).to_le_bytes()),
954            8 => buf.extend_from_slice(&total_records.to_le_bytes()),
955            _ => panic!("unsupported"),
956        }
957        // Compute and append checksum.
958        let checksum = jenkins_lookup3(&buf);
959        buf.extend_from_slice(&checksum.to_le_bytes());
960        buf
961    }
962
963    #[test]
964    fn test_parse_header() {
965        let data = build_header(5, 4096, 12, 0, 0x1000, 3, 3, 8, 8);
966        let mut cursor = Cursor::new(&data);
967        let hdr = BTreeV2Header::parse(&mut cursor, 8, 8).unwrap();
968
969        assert_eq!(hdr.btree_type, 5);
970        assert_eq!(hdr.node_size, 4096);
971        assert_eq!(hdr.record_size, 12);
972        assert_eq!(hdr.depth, 0);
973        assert_eq!(hdr.split_percent, 75);
974        assert_eq!(hdr.merge_percent, 40);
975        assert_eq!(hdr.root_node_address, 0x1000);
976        assert_eq!(hdr.num_records_in_root, 3);
977        assert_eq!(hdr.total_records, 3);
978    }
979
980    #[test]
981    fn test_bad_signature() {
982        let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
983        data[0] = b'X';
984        let mut cursor = Cursor::new(&data);
985        assert!(matches!(
986            BTreeV2Header::parse(&mut cursor, 8, 8),
987            Err(Error::InvalidBTreeV2Signature { .. })
988        ));
989    }
990
991    #[test]
992    fn test_bad_checksum() {
993        let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
994        // Corrupt a byte in the middle.
995        data[6] = 0xFF;
996        let mut cursor = Cursor::new(&data);
997        assert!(matches!(
998            BTreeV2Header::parse(&mut cursor, 8, 8),
999            Err(Error::ChecksumMismatch { .. })
1000        ));
1001    }
1002
1003    #[test]
1004    fn test_collect_empty_tree() {
1005        let header = BTreeV2Header {
1006            btree_type: 5,
1007            node_size: 4096,
1008            record_size: 12,
1009            depth: 0,
1010            split_percent: 75,
1011            merge_percent: 40,
1012            root_node_address: u64::MAX,
1013            num_records_in_root: 0,
1014            total_records: 0,
1015        };
1016        let data = vec![0u8; 100];
1017        let records = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap();
1018        assert!(records.is_empty());
1019    }
1020
1021    #[test]
1022    fn test_compute_heap_id_len() {
1023        // Type 5: record_size - 4
1024        let h5 = BTreeV2Header {
1025            btree_type: 5,
1026            record_size: 12,
1027            node_size: 0,
1028            depth: 0,
1029            split_percent: 0,
1030            merge_percent: 0,
1031            root_node_address: 0,
1032            num_records_in_root: 0,
1033            total_records: 0,
1034        };
1035        assert_eq!(compute_heap_id_len(&h5), 8);
1036
1037        // Type 8: record_size - 9
1038        let h8 = BTreeV2Header {
1039            btree_type: 8,
1040            record_size: 17,
1041            ..h5
1042        };
1043        assert_eq!(compute_heap_id_len(&h8), 8);
1044    }
1045
1046    #[test]
1047    fn test_max_leaf_records() {
1048        // node_size=4096, record_size=12, overhead=10
1049        // => (4096 - 10) / 12 = 340
1050        assert_eq!(max_leaf_records(4096, 12), 340);
1051    }
1052
1053    #[test]
1054    fn test_num_records_size() {
1055        assert_eq!(num_records_size(0), 1);
1056        assert_eq!(num_records_size(255), 1);
1057        assert_eq!(num_records_size(256), 2);
1058        assert_eq!(num_records_size(65535), 2);
1059        assert_eq!(num_records_size(65536), 4);
1060    }
1061
1062    #[test]
1063    fn test_parse_leaf_with_type5_records() {
1064        // Build a leaf node with 2 type-5 records (link name hash).
1065        // record_size = 12 (hash=4 + heap_id=8)
1066        let record_size: u16 = 12;
1067        let node_size: u32 = 4096;
1068
1069        let header = BTreeV2Header {
1070            btree_type: 5,
1071            node_size,
1072            record_size,
1073            depth: 0,
1074            split_percent: 75,
1075            merge_percent: 40,
1076            root_node_address: 0, // will point to our leaf
1077            num_records_in_root: 2,
1078            total_records: 2,
1079        };
1080
1081        // Build the leaf node.
1082        let mut leaf = Vec::new();
1083        leaf.extend_from_slice(b"BTLF"); // signature
1084        leaf.push(0); // version
1085        leaf.push(5); // type
1086
1087        // Record 1: hash=0xAABBCCDD, heap_id=[1,2,3,4,5,6,7,8]
1088        leaf.extend_from_slice(&0xAABBCCDDu32.to_le_bytes());
1089        leaf.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1090
1091        // Record 2: hash=0x11223344, heap_id=[9,10,11,12,13,14,15,16]
1092        leaf.extend_from_slice(&0x11223344u32.to_le_bytes());
1093        leaf.extend_from_slice(&[9, 10, 11, 12, 13, 14, 15, 16]);
1094
1095        // Checksum covers everything so far.
1096        let checksum = jenkins_lookup3(&leaf);
1097        leaf.extend_from_slice(&checksum.to_le_bytes());
1098
1099        // Pad to node_size (not strictly needed for parsing, but realistic).
1100        leaf.resize(node_size as usize, 0);
1101
1102        let mut records = Vec::new();
1103        let mut cursor = Cursor::new(&leaf);
1104        parse_leaf_node(
1105            &mut cursor,
1106            &header,
1107            8,
1108            8,
1109            None,
1110            &[],
1111            None,
1112            2,
1113            8, // heap_id_len
1114            &mut records,
1115        )
1116        .unwrap();
1117
1118        assert_eq!(records.len(), 2);
1119        match &records[0] {
1120            BTreeV2Record::LinkNameHash { hash, heap_id } => {
1121                assert_eq!(*hash, 0xAABBCCDD);
1122                assert_eq!(heap_id, &[1, 2, 3, 4, 5, 6, 7, 8]);
1123            }
1124            _ => panic!("expected LinkNameHash"),
1125        }
1126        match &records[1] {
1127            BTreeV2Record::LinkNameHash { hash, heap_id } => {
1128                assert_eq!(*hash, 0x11223344);
1129                assert_eq!(heap_id, &[9, 10, 11, 12, 13, 14, 15, 16]);
1130            }
1131            _ => panic!("expected LinkNameHash"),
1132        }
1133    }
1134}