Skip to main content

hdf5_reader/
btree_v2.rs

1//! HDF5 B-tree Version 2.
2//!
3//! V2 B-trees are used by newer-style groups and datasets for indexed link
4//! storage, attribute storage, and chunked dataset indexing. The header
5//! (`BTHD`) describes the tree parameters. Internal nodes (`BTIN`) and leaf
6//! nodes (`BTLF`) contain the actual records.
7//!
8//! This module provides the header parse, record types, and a traversal
9//! function that collects all records from a tree.
10
11use crate::checksum::jenkins_lookup3;
12use crate::error::{Error, Result};
13use crate::io::Cursor;
14use crate::storage::Storage;
15
16// ---------------------------------------------------------------------------
17// Signatures
18// ---------------------------------------------------------------------------
19
20const BTHD_SIGNATURE: [u8; 4] = *b"BTHD";
21const BTIN_SIGNATURE: [u8; 4] = *b"BTIN";
22const BTLF_SIGNATURE: [u8; 4] = *b"BTLF";
23
24// ---------------------------------------------------------------------------
25// Header
26// ---------------------------------------------------------------------------
27
28/// Parsed B-tree v2 header.
29#[derive(Debug, Clone)]
30pub struct BTreeV2Header {
31    /// B-tree type (determines the record format).
32    pub btree_type: u8,
33    /// Size in bytes of each B-tree node (both internal and leaf).
34    pub node_size: u32,
35    /// Size in bytes of each record.
36    pub record_size: u16,
37    /// Depth of the tree (0 = root is a leaf).
38    pub depth: u16,
39    /// Percent full at which to split a node.
40    pub split_percent: u8,
41    /// Percent full at which to merge a node.
42    pub merge_percent: u8,
43    /// Address of the root node.
44    pub root_node_address: u64,
45    /// Number of records in the root node.
46    pub num_records_in_root: u16,
47    /// Total number of records in the entire tree.
48    pub total_records: u64,
49}
50
51impl BTreeV2Header {
52    /// Parse a B-tree v2 header at the current cursor position.
53    ///
54    /// Format:
55    /// - Signature: `BTHD` (4 bytes)
56    /// - Version: 0 (1 byte)
57    /// - B-tree type (u8)
58    /// - Node size (u32 LE)
59    /// - Record size (u16 LE)
60    /// - Depth (u16 LE)
61    /// - Split percent (u8)
62    /// - Merge percent (u8)
63    /// - Root node address (`offset_size` bytes)
64    /// - Number of records in root node (u16 LE)
65    /// - Total number of records in tree (`length_size` bytes)
66    /// - Checksum (u32 LE)
67    pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
68        let start = cursor.position();
69
70        let sig = cursor.read_bytes(4)?;
71        if sig != BTHD_SIGNATURE {
72            return Err(Error::InvalidBTreeV2Signature { context: "header" });
73        }
74
75        let version = cursor.read_u8()?;
76        if version != 0 {
77            return Err(Error::UnsupportedBTreeVersion(version));
78        }
79
80        let btree_type = cursor.read_u8()?;
81        let node_size = cursor.read_u32_le()?;
82        let record_size = cursor.read_u16_le()?;
83        let depth = cursor.read_u16_le()?;
84        let split_percent = cursor.read_u8()?;
85        let merge_percent = cursor.read_u8()?;
86        let root_node_address = cursor.read_offset(offset_size)?;
87        let num_records_in_root = cursor.read_u16_le()?;
88        let total_records = cursor.read_length(length_size)?;
89
90        // Checksum covers everything from signature through total_records.
91        let checksum_end = cursor.position();
92        let stored_checksum = cursor.read_u32_le()?;
93
94        let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_end as usize]);
95        if computed != stored_checksum {
96            return Err(Error::ChecksumMismatch {
97                expected: stored_checksum,
98                actual: computed,
99            });
100        }
101
102        Ok(BTreeV2Header {
103            btree_type,
104            node_size,
105            record_size,
106            depth,
107            split_percent,
108            merge_percent,
109            root_node_address,
110            num_records_in_root,
111            total_records,
112        })
113    }
114
115    /// Parse a B-tree v2 header from random-access storage.
116    pub fn parse_at_storage(
117        storage: &dyn Storage,
118        address: u64,
119        offset_size: u8,
120        length_size: u8,
121    ) -> Result<Self> {
122        let header_len = 4
123            + 1
124            + 1
125            + 4
126            + 2
127            + 2
128            + 1
129            + 1
130            + usize::from(offset_size)
131            + 2
132            + usize::from(length_size)
133            + 4;
134        let bytes = storage.read_range(address, header_len)?;
135        let mut cursor = Cursor::new(bytes.as_ref());
136        Self::parse(&mut cursor, offset_size, length_size)
137    }
138}
139
140// ---------------------------------------------------------------------------
141// Records
142// ---------------------------------------------------------------------------
143
144/// A record from a B-tree v2.
145///
146/// The record format depends on the B-tree type field in the header.
147#[derive(Debug, Clone)]
148pub enum BTreeV2Record {
149    /// Type 1: indirectly accessed, non-filtered huge fractal heap object.
150    HugeIndirectNonFiltered {
151        address: u64,
152        length: u64,
153        object_id: u64,
154    },
155    /// Type 2: indirectly accessed, filtered huge fractal heap object.
156    HugeIndirectFiltered {
157        address: u64,
158        filtered_length: u64,
159        filter_mask: u32,
160        memory_length: u64,
161        object_id: u64,
162    },
163    /// Type 3: directly accessed, non-filtered huge fractal heap object.
164    HugeDirectNonFiltered { address: u64, length: u64 },
165    /// Type 4: directly accessed, filtered huge fractal heap object.
166    HugeDirectFiltered {
167        address: u64,
168        filtered_length: u64,
169        filter_mask: u32,
170        memory_length: u64,
171    },
172    /// Type 5: Link name for indexed group (hashed).
173    LinkNameHash { hash: u32, heap_id: Vec<u8> },
174    /// Type 6: Creation order for indexed group.
175    CreationOrder { order: u64, heap_id: Vec<u8> },
176    /// Type 8: Attribute name for indexed group (hashed).
177    AttributeNameHash {
178        hash: u32,
179        flags: u8,
180        creation_order: u32,
181        heap_id: Vec<u8>,
182    },
183    /// Type 9: Attribute creation order.
184    AttributeCreationOrder { order: u32, heap_id: Vec<u8> },
185    /// Type 10: Non-filtered chunked dataset record (v2 chunk index).
186    ChunkedNonFiltered { address: u64, offsets: Vec<u64> },
187    /// Type 11: Filtered chunked dataset record (v2 chunk index).
188    ChunkedFiltered {
189        address: u64,
190        chunk_size: u64,
191        filter_mask: u32,
192        offsets: Vec<u64>,
193    },
194    /// Type 7: shared object-header message stored in the SOHM heap.
195    SharedMessageHeap {
196        hash: u32,
197        reference_count: u32,
198        heap_id: Vec<u8>,
199    },
200    /// Type 7: shared object-header message stored in an object header.
201    SharedMessageObjectHeader {
202        hash: u32,
203        message_type: u16,
204        object_header_index: u16,
205        object_header_address: u64,
206    },
207    /// Unknown/unsupported record type — raw bytes preserved.
208    Unknown { record_type: u8, data: Vec<u8> },
209}
210
211// ---------------------------------------------------------------------------
212// Record parsing
213// ---------------------------------------------------------------------------
214
215/// Parse a single record of the given B-tree type.
216fn parse_record(
217    cursor: &mut Cursor,
218    btree_type: u8,
219    record_size: u16,
220    offset_size: u8,
221    length_size: u8,
222    _ndims: Option<u32>,
223    heap_id_len: usize,
224) -> Result<BTreeV2Record> {
225    let record_start = cursor.position();
226
227    let record = match btree_type {
228        // Type 1: indirectly accessed, non-filtered huge fractal heap object.
229        1 => BTreeV2Record::HugeIndirectNonFiltered {
230            address: cursor.read_offset(offset_size)?,
231            length: cursor.read_length(length_size)?,
232            object_id: cursor.read_length(length_size)?,
233        },
234
235        // Type 2: indirectly accessed, filtered huge fractal heap object.
236        2 => BTreeV2Record::HugeIndirectFiltered {
237            address: cursor.read_offset(offset_size)?,
238            filtered_length: cursor.read_length(length_size)?,
239            filter_mask: cursor.read_u32_le()?,
240            memory_length: cursor.read_length(length_size)?,
241            object_id: cursor.read_length(length_size)?,
242        },
243
244        // Type 3: directly accessed, non-filtered huge fractal heap object.
245        3 => BTreeV2Record::HugeDirectNonFiltered {
246            address: cursor.read_offset(offset_size)?,
247            length: cursor.read_length(length_size)?,
248        },
249
250        // Type 4: directly accessed, filtered huge fractal heap object.
251        4 => BTreeV2Record::HugeDirectFiltered {
252            address: cursor.read_offset(offset_size)?,
253            filtered_length: cursor.read_length(length_size)?,
254            filter_mask: cursor.read_u32_le()?,
255            memory_length: cursor.read_length(length_size)?,
256        },
257
258        // Type 5: link name hash
259        5 => {
260            let hash = cursor.read_u32_le()?;
261            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
262            BTreeV2Record::LinkNameHash { hash, heap_id }
263        }
264
265        // Type 6: creation order
266        6 => {
267            let order = cursor.read_u64_le()?;
268            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
269            BTreeV2Record::CreationOrder { order, heap_id }
270        }
271
272        // Type 7: shared object-header messages.
273        7 => {
274            let location = cursor.read_u8()?;
275            cursor.skip(3)?;
276            let hash = cursor.read_u32_le()?;
277            match location {
278                0 => {
279                    let reference_count = cursor.read_u32_le()?;
280                    let heap_id = cursor.read_bytes(8)?.to_vec();
281                    BTreeV2Record::SharedMessageHeap {
282                        hash,
283                        reference_count,
284                        heap_id,
285                    }
286                }
287                1 => {
288                    let _reserved = cursor.read_u8()?;
289                    let message_type = u16::from(cursor.read_u8()?);
290                    let object_header_index = cursor.read_u16_le()?;
291                    let object_header_address = cursor.read_offset(offset_size)?;
292                    BTreeV2Record::SharedMessageObjectHeader {
293                        hash,
294                        message_type,
295                        object_header_index,
296                        object_header_address,
297                    }
298                }
299                other => {
300                    return Err(Error::InvalidData(format!(
301                        "unknown SOHM B-tree record location: {other}"
302                    )));
303                }
304            }
305        }
306
307        // Type 8: attribute name hash
308        8 => {
309            let hash = cursor.read_u32_le()?;
310            let flags = cursor.read_u8()?;
311            let creation_order = cursor.read_u32_le()?;
312            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
313            BTreeV2Record::AttributeNameHash {
314                hash,
315                flags,
316                creation_order,
317                heap_id,
318            }
319        }
320
321        // Type 9: attribute creation order
322        9 => {
323            let order = cursor.read_u32_le()?;
324            let heap_id = cursor.read_bytes(heap_id_len)?.to_vec();
325            BTreeV2Record::AttributeCreationOrder { order, heap_id }
326        }
327
328        // Type 10: non-filtered chunk
329        10 => {
330            let address = cursor.read_offset(offset_size)?;
331            // Chunk offsets are encoded as scaled 64-bit values.
332            // The number of offset dimensions is calculated from the record size.
333            // Each offset is 8 bytes in a type-10 record.
334            let offset_bytes_available = record_size as usize - offset_size as usize;
335            let num_offsets = offset_bytes_available / 8;
336            let mut offsets = Vec::with_capacity(num_offsets);
337            for _ in 0..num_offsets {
338                offsets.push(cursor.read_u64_le()?);
339            }
340            BTreeV2Record::ChunkedNonFiltered { address, offsets }
341        }
342
343        // Type 11: filtered chunk
344        11 => {
345            let address = cursor.read_offset(offset_size)?;
346            // nbytes (chunk size on disk) is encoded using length_size bytes.
347            let nbytes_size = length_size as usize;
348            let chunk_size = cursor.read_length(length_size)?;
349            let filter_mask = cursor.read_u32_le()?;
350            let remaining = record_size as usize - offset_size as usize - nbytes_size - 4; // filter_mask
351            let num_offsets = remaining / 8;
352            let mut offsets = Vec::with_capacity(num_offsets);
353            for _ in 0..num_offsets {
354                offsets.push(cursor.read_u64_le()?);
355            }
356            BTreeV2Record::ChunkedFiltered {
357                address,
358                chunk_size,
359                filter_mask,
360                offsets,
361            }
362        }
363
364        // Unknown type — read raw bytes.
365        _ => {
366            let data = cursor.read_bytes(record_size as usize)?.to_vec();
367            return Ok(BTreeV2Record::Unknown {
368                record_type: btree_type,
369                data,
370            });
371        }
372    };
373
374    // Ensure we consumed exactly record_size bytes (skip any remaining).
375    let consumed = (cursor.position() - record_start) as usize;
376    if consumed < record_size as usize {
377        cursor.skip(record_size as usize - consumed)?;
378    }
379
380    Ok(record)
381}
382
383fn record_matches_chunk_bounds(
384    record: &BTreeV2Record,
385    chunk_dims: &[u32],
386    chunk_bounds: Option<(&[u64], &[u64])>,
387) -> bool {
388    let Some((first_chunk, last_chunk)) = chunk_bounds else {
389        return true;
390    };
391
392    let offsets = match record {
393        BTreeV2Record::ChunkedNonFiltered { offsets, .. }
394        | BTreeV2Record::ChunkedFiltered { offsets, .. } => offsets,
395        _ => return true,
396    };
397
398    offsets.iter().enumerate().all(|(dim, offset)| {
399        let chunk_index = *offset / u64::from(chunk_dims[dim]);
400        chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
401    })
402}
403
404// ---------------------------------------------------------------------------
405// Node parsing
406// ---------------------------------------------------------------------------
407
408/// Compute the number of bytes needed to represent `max_records` as an
409/// unsigned integer (used for child-node record counts in internal nodes).
410fn num_records_size(max_records: u64) -> usize {
411    if max_records <= 0xFF {
412        1
413    } else if max_records <= 0xFFFF {
414        2
415    } else if max_records <= 0xFFFF_FFFF {
416        4
417    } else {
418        8
419    }
420}
421
422/// Compute the maximum number of records that fit in a leaf node.
423fn max_leaf_records(node_size: u32, record_size: u16) -> u64 {
424    // Leaf node overhead: signature(4) + version(1) + type(1) + checksum(4) = 10
425    let overhead = 10u32;
426    if node_size <= overhead || record_size == 0 {
427        return 0;
428    }
429    ((node_size - overhead) / record_size as u32) as u64
430}
431
432/// Compute the maximum number of records that fit in an internal node.
433/// This depends on the pointer size (offset_size) and the number-of-records
434/// encoding for child nodes, which makes it recursive in principle. We use
435/// an iterative approach.
436fn max_internal_records(
437    node_size: u32,
438    record_size: u16,
439    offset_size: u8,
440    max_child_records: u64,
441) -> u64 {
442    // Internal node overhead: signature(4) + version(1) + type(1) + checksum(4) = 10
443    let overhead = 10u32;
444    if node_size <= overhead || record_size == 0 {
445        return 0;
446    }
447    let available = (node_size - overhead) as u64;
448    // Each entry in an internal node is: record(record_size) + child_pointer(offset_size) + num_records(var)
449    let nrec_size = num_records_size(max_child_records) as u64;
450    let entry_size = record_size as u64 + offset_size as u64 + nrec_size;
451    // There is one more child pointer + num_records than records.
452    // So: n * record_size + (n+1) * (offset_size + nrec_size) <= available
453    // => n * (record_size + offset_size + nrec_size) + offset_size + nrec_size <= available
454    let extra = offset_size as u64 + nrec_size;
455    if available <= extra {
456        return 0;
457    }
458    (available - extra) / entry_size
459}
460
461/// Parse a leaf node and collect its records.
462#[allow(clippy::too_many_arguments)]
463fn parse_leaf_node(
464    cursor: &mut Cursor,
465    header: &BTreeV2Header,
466    offset_size: u8,
467    length_size: u8,
468    ndims: Option<u32>,
469    chunk_dims: &[u32],
470    chunk_bounds: Option<(&[u64], &[u64])>,
471    num_records: u16,
472    heap_id_len: usize,
473    records: &mut Vec<BTreeV2Record>,
474) -> Result<()> {
475    let start = cursor.position();
476
477    let sig = cursor.read_bytes(4)?;
478    if sig != BTLF_SIGNATURE {
479        return Err(Error::InvalidBTreeV2Signature {
480            context: "leaf node",
481        });
482    }
483
484    let version = cursor.read_u8()?;
485    if version != 0 {
486        return Err(Error::UnsupportedBTreeVersion(version));
487    }
488
489    let node_type = cursor.read_u8()?;
490    if node_type != header.btree_type {
491        return Err(Error::InvalidData(format!(
492            "B-tree v2 leaf node type mismatch: header says {}, node says {}",
493            header.btree_type, node_type
494        )));
495    }
496
497    for _ in 0..num_records {
498        let record = parse_record(
499            cursor,
500            header.btree_type,
501            header.record_size,
502            offset_size,
503            length_size,
504            ndims,
505            heap_id_len,
506        )?;
507        if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
508            records.push(record);
509        }
510    }
511
512    // Verify checksum: covers signature through the end of records.
513    let checksum_data_end = cursor.position();
514    let stored_checksum = cursor.read_u32_le()?;
515    let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
516    if computed != stored_checksum {
517        return Err(Error::ChecksumMismatch {
518            expected: stored_checksum,
519            actual: computed,
520        });
521    }
522
523    Ok(())
524}
525
526/// Parse an internal node, collecting child addresses and recursing.
527#[allow(clippy::too_many_arguments)]
528fn parse_internal_node(
529    data: &[u8],
530    address: u64,
531    header: &BTreeV2Header,
532    offset_size: u8,
533    length_size: u8,
534    ndims: Option<u32>,
535    chunk_dims: &[u32],
536    chunk_bounds: Option<(&[u64], &[u64])>,
537    num_records: u16,
538    depth: u16,
539    heap_id_len: usize,
540    records: &mut Vec<BTreeV2Record>,
541) -> Result<()> {
542    let mut cursor = Cursor::new(data);
543    cursor.set_position(address);
544
545    let start = cursor.position();
546
547    let sig = cursor.read_bytes(4)?;
548    if sig != BTIN_SIGNATURE {
549        return Err(Error::InvalidBTreeV2Signature {
550            context: "internal node",
551        });
552    }
553
554    let version = cursor.read_u8()?;
555    if version != 0 {
556        return Err(Error::UnsupportedBTreeVersion(version));
557    }
558
559    let node_type = cursor.read_u8()?;
560    if node_type != header.btree_type {
561        return Err(Error::InvalidData(format!(
562            "B-tree v2 internal node type mismatch: header says {}, node says {}",
563            header.btree_type, node_type
564        )));
565    }
566
567    // Compute max records for children to know the encoding size for
568    // child record counts.
569    let max_child_records = if depth == 1 {
570        max_leaf_records(header.node_size, header.record_size)
571    } else {
572        // For deeper trees, compute iteratively.
573        let leaf_max = max_leaf_records(header.node_size, header.record_size);
574        let mut prev_max = leaf_max;
575        for _ in 1..depth {
576            prev_max =
577                max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
578        }
579        prev_max
580    };
581    let nrec_bytes = num_records_size(max_child_records);
582
583    // Read records and child pointers interleaved:
584    // child[0], record[0], child[1], record[1], ..., record[n-1], child[n]
585    // Plus total_records counts for each child.
586    //
587    // Actually per the HDF5 spec the layout is:
588    // record[0], record[1], ..., record[n-1],
589    // child_ptr[0], nrec[0], total[0], child_ptr[1], nrec[1], total[1], ..., child_ptr[n], nrec[n], total[n]
590    //
591    // Records first, then child pointers with their metadata.
592
593    // Read all records.
594    let mut node_records = Vec::with_capacity(num_records as usize);
595    for _ in 0..num_records {
596        let record = parse_record(
597            &mut cursor,
598            header.btree_type,
599            header.record_size,
600            offset_size,
601            length_size,
602            ndims,
603            heap_id_len,
604        )?;
605        if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
606            node_records.push(record);
607        }
608    }
609
610    // Read child node pointers (num_records + 1 of them).
611    let num_children = num_records as usize + 1;
612
613    // Whether to include a "total records" field for each child.
614    // This is present when depth > 1.
615    let has_total_records = depth > 1;
616    // total_records encoding size for deeper nodes
617    let total_nrec_bytes = if has_total_records {
618        // Total records in a sub-tree — need enough bytes to hold the
619        // maximum total records. We use length_size as an upper bound.
620        length_size as usize
621    } else {
622        0
623    };
624
625    let mut child_addresses = Vec::with_capacity(num_children);
626    let mut child_nrecords = Vec::with_capacity(num_children);
627
628    for _ in 0..num_children {
629        let child_addr = cursor.read_offset(offset_size)?;
630        child_addresses.push(child_addr);
631        let nrec = cursor.read_uvar(nrec_bytes)?;
632        child_nrecords.push(nrec as u16);
633        if has_total_records {
634            // Skip total records count
635            cursor.read_uvar(total_nrec_bytes)?;
636        }
637    }
638
639    // Verify checksum.
640    let checksum_data_end = cursor.position();
641    let stored_checksum = cursor.read_u32_le()?;
642    let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
643    if computed != stored_checksum {
644        return Err(Error::ChecksumMismatch {
645            expected: stored_checksum,
646            actual: computed,
647        });
648    }
649
650    // The records from this internal node are NOT included in the leaf
651    // collection — they are separators. We do NOT add them to results.
652    // Only leaf records are collected.
653    // (Actually, in HDF5 v2 B-trees, records in internal nodes are real
654    // records too, not just separators. We should collect them.)
655    records.extend(node_records);
656
657    // Recurse into children.
658    let child_depth = depth - 1;
659    for (i, &child_addr) in child_addresses.iter().enumerate() {
660        if Cursor::is_undefined_offset(child_addr, offset_size) {
661            continue;
662        }
663        let child_nrec = child_nrecords[i];
664        if child_depth == 0 {
665            // Child is a leaf.
666            let mut child_cursor = Cursor::new(data);
667            child_cursor.set_position(child_addr);
668            parse_leaf_node(
669                &mut child_cursor,
670                header,
671                offset_size,
672                length_size,
673                ndims,
674                chunk_dims,
675                chunk_bounds,
676                child_nrec,
677                heap_id_len,
678                records,
679            )?;
680        } else {
681            // Child is another internal node.
682            parse_internal_node(
683                data,
684                child_addr,
685                header,
686                offset_size,
687                length_size,
688                ndims,
689                chunk_dims,
690                chunk_bounds,
691                child_nrec,
692                child_depth,
693                heap_id_len,
694                records,
695            )?;
696        }
697    }
698
699    Ok(())
700}
701
702// ---------------------------------------------------------------------------
703// Public traversal
704// ---------------------------------------------------------------------------
705
706/// Collect all records from a B-tree v2 by traversing from the root.
707///
708/// `header` must be a previously parsed `BTreeV2Header`. `data` is the full
709/// file buffer. `ndims` is needed for chunk index record types (10 and 11).
710pub fn collect_btree_v2_records(
711    data: &[u8],
712    header: &BTreeV2Header,
713    offset_size: u8,
714    length_size: u8,
715    ndims: Option<u32>,
716    chunk_dims: &[u32],
717    chunk_bounds: Option<(&[u64], &[u64])>,
718) -> Result<Vec<BTreeV2Record>> {
719    if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
720        return Ok(Vec::new());
721    }
722
723    if header.total_records == 0 || header.num_records_in_root == 0 {
724        return Ok(Vec::new());
725    }
726
727    // Determine heap_id_len from the record_size and btree_type.
728    let heap_id_len = compute_heap_id_len(header);
729
730    let mut records = Vec::new();
731
732    if header.depth == 0 {
733        // Root is a leaf node.
734        let mut cursor = Cursor::new(data);
735        cursor.set_position(header.root_node_address);
736        parse_leaf_node(
737            &mut cursor,
738            header,
739            offset_size,
740            length_size,
741            ndims,
742            chunk_dims,
743            chunk_bounds,
744            header.num_records_in_root,
745            heap_id_len,
746            &mut records,
747        )?;
748    } else {
749        // Root is an internal node.
750        parse_internal_node(
751            data,
752            header.root_node_address,
753            header,
754            offset_size,
755            length_size,
756            ndims,
757            chunk_dims,
758            chunk_bounds,
759            header.num_records_in_root,
760            header.depth,
761            heap_id_len,
762            &mut records,
763        )?;
764    }
765
766    Ok(records)
767}
768
769/// Collect all records from a B-tree v2 using random-access storage.
770pub fn collect_btree_v2_records_storage(
771    storage: &dyn Storage,
772    header: &BTreeV2Header,
773    offset_size: u8,
774    length_size: u8,
775    ndims: Option<u32>,
776    chunk_dims: &[u32],
777    chunk_bounds: Option<(&[u64], &[u64])>,
778) -> Result<Vec<BTreeV2Record>> {
779    if Cursor::is_undefined_offset(header.root_node_address, offset_size) {
780        return Ok(Vec::new());
781    }
782
783    if header.total_records == 0 || header.num_records_in_root == 0 {
784        return Ok(Vec::new());
785    }
786
787    let heap_id_len = compute_heap_id_len(header);
788    let mut records = Vec::new();
789
790    if header.depth == 0 {
791        parse_leaf_node_storage(
792            storage,
793            header.root_node_address,
794            header,
795            offset_size,
796            length_size,
797            ndims,
798            chunk_dims,
799            chunk_bounds,
800            header.num_records_in_root,
801            heap_id_len,
802            &mut records,
803        )?;
804    } else {
805        parse_internal_node_storage(
806            storage,
807            header.root_node_address,
808            header,
809            offset_size,
810            length_size,
811            ndims,
812            chunk_dims,
813            chunk_bounds,
814            header.num_records_in_root,
815            header.depth,
816            heap_id_len,
817            &mut records,
818        )?;
819    }
820
821    Ok(records)
822}
823
824#[allow(clippy::too_many_arguments)]
825fn parse_leaf_node_storage(
826    storage: &dyn Storage,
827    address: u64,
828    header: &BTreeV2Header,
829    offset_size: u8,
830    length_size: u8,
831    ndims: Option<u32>,
832    chunk_dims: &[u32],
833    chunk_bounds: Option<(&[u64], &[u64])>,
834    num_records: u16,
835    heap_id_len: usize,
836    records: &mut Vec<BTreeV2Record>,
837) -> Result<()> {
838    let _node_len = usize::try_from(header.node_size).map_err(|_| {
839        Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
840    })?;
841    let read_len = usize::try_from(storage.len().saturating_sub(address)).map_err(|_| {
842        Error::InvalidData("B-tree v2 node read exceeds platform usize capacity".into())
843    })?;
844    let node_bytes = storage.read_range(address, read_len)?;
845    let mut cursor = Cursor::new(node_bytes.as_ref());
846    parse_leaf_node(
847        &mut cursor,
848        header,
849        offset_size,
850        length_size,
851        ndims,
852        chunk_dims,
853        chunk_bounds,
854        num_records,
855        heap_id_len,
856        records,
857    )
858}
859
860#[allow(clippy::too_many_arguments)]
861fn parse_internal_node_storage(
862    storage: &dyn Storage,
863    address: u64,
864    header: &BTreeV2Header,
865    offset_size: u8,
866    length_size: u8,
867    ndims: Option<u32>,
868    chunk_dims: &[u32],
869    chunk_bounds: Option<(&[u64], &[u64])>,
870    num_records: u16,
871    depth: u16,
872    heap_id_len: usize,
873    records: &mut Vec<BTreeV2Record>,
874) -> Result<()> {
875    let _node_len = usize::try_from(header.node_size).map_err(|_| {
876        Error::InvalidData("B-tree v2 node size exceeds platform usize capacity".into())
877    })?;
878    let read_len = usize::try_from(storage.len().saturating_sub(address)).map_err(|_| {
879        Error::InvalidData("B-tree v2 node read exceeds platform usize capacity".into())
880    })?;
881    let node_bytes = storage.read_range(address, read_len)?;
882    let mut cursor = Cursor::new(node_bytes.as_ref());
883    let start = cursor.position();
884
885    let sig = cursor.read_bytes(4)?;
886    if sig != BTIN_SIGNATURE {
887        return Err(Error::InvalidBTreeV2Signature {
888            context: "internal node",
889        });
890    }
891
892    let version = cursor.read_u8()?;
893    if version != 0 {
894        return Err(Error::UnsupportedBTreeVersion(version));
895    }
896
897    let node_type = cursor.read_u8()?;
898    if node_type != header.btree_type {
899        return Err(Error::InvalidData(format!(
900            "B-tree v2 internal node type mismatch: header says {}, node says {}",
901            header.btree_type, node_type
902        )));
903    }
904
905    let max_child_records = if depth == 1 {
906        max_leaf_records(header.node_size, header.record_size)
907    } else {
908        let leaf_max = max_leaf_records(header.node_size, header.record_size);
909        let mut prev_max = leaf_max;
910        for _ in 1..depth {
911            prev_max =
912                max_internal_records(header.node_size, header.record_size, offset_size, prev_max);
913        }
914        prev_max
915    };
916    let nrec_bytes = num_records_size(max_child_records);
917
918    let mut node_records = Vec::with_capacity(num_records as usize);
919    for _ in 0..num_records {
920        let record = parse_record(
921            &mut cursor,
922            header.btree_type,
923            header.record_size,
924            offset_size,
925            length_size,
926            ndims,
927            heap_id_len,
928        )?;
929        if record_matches_chunk_bounds(&record, chunk_dims, chunk_bounds) {
930            node_records.push(record);
931        }
932    }
933
934    let num_children = usize::from(num_records) + 1;
935    let has_total_records = depth > 1;
936    let total_nrec_bytes = if has_total_records {
937        usize::from(length_size)
938    } else {
939        0
940    };
941
942    let mut child_addresses = Vec::with_capacity(num_children);
943    let mut child_nrecords = Vec::with_capacity(num_children);
944    for _ in 0..num_children {
945        child_addresses.push(cursor.read_offset(offset_size)?);
946        child_nrecords.push(cursor.read_uvar(nrec_bytes)? as u16);
947        if has_total_records {
948            cursor.read_uvar(total_nrec_bytes)?;
949        }
950    }
951
952    let checksum_data_end = cursor.position();
953    let stored_checksum = cursor.read_u32_le()?;
954    let computed = jenkins_lookup3(&cursor.data()[start as usize..checksum_data_end as usize]);
955    if computed != stored_checksum {
956        return Err(Error::ChecksumMismatch {
957            expected: stored_checksum,
958            actual: computed,
959        });
960    }
961
962    records.extend(node_records);
963
964    let child_depth = depth - 1;
965    for (i, &child_addr) in child_addresses.iter().enumerate() {
966        if Cursor::is_undefined_offset(child_addr, offset_size) {
967            continue;
968        }
969        let child_nrec = child_nrecords[i];
970        if child_depth == 0 {
971            parse_leaf_node_storage(
972                storage,
973                child_addr,
974                header,
975                offset_size,
976                length_size,
977                ndims,
978                chunk_dims,
979                chunk_bounds,
980                child_nrec,
981                heap_id_len,
982                records,
983            )?;
984        } else {
985            parse_internal_node_storage(
986                storage,
987                child_addr,
988                header,
989                offset_size,
990                length_size,
991                ndims,
992                chunk_dims,
993                chunk_bounds,
994                child_nrec,
995                child_depth,
996                heap_id_len,
997                records,
998            )?;
999        }
1000    }
1001
1002    Ok(())
1003}
1004
1005/// Compute the heap ID length from the record size and tree type.
1006///
1007/// For link/attribute B-trees (types 5, 6, 8, 9), the heap ID occupies
1008/// the remaining bytes after the fixed fields. For chunk types (10, 11)
1009/// or unknown types, return 0 (heap_id is not used).
1010fn compute_heap_id_len(header: &BTreeV2Header) -> usize {
1011    let rs = header.record_size as usize;
1012    match header.btree_type {
1013        5 => rs.saturating_sub(4),         // hash(4)
1014        6 => rs.saturating_sub(8),         // order(8)
1015        8 => rs.saturating_sub(4 + 1 + 4), // hash(4) + flags(1) + creation_order(4)
1016        9 => rs.saturating_sub(4),         // order(4)
1017        _ => 0,
1018    }
1019}
1020
1021#[cfg(test)]
1022mod tests {
1023    use super::*;
1024
1025    /// Build a minimal BTHD with the given parameters.
1026    #[allow(clippy::too_many_arguments)]
1027    fn build_header(
1028        btree_type: u8,
1029        node_size: u32,
1030        record_size: u16,
1031        depth: u16,
1032        root_node_address: u64,
1033        num_records_in_root: u16,
1034        total_records: u64,
1035        offset_size: u8,
1036        length_size: u8,
1037    ) -> Vec<u8> {
1038        let mut buf = Vec::new();
1039        buf.extend_from_slice(b"BTHD");
1040        buf.push(0); // version
1041        buf.push(btree_type);
1042        buf.extend_from_slice(&node_size.to_le_bytes());
1043        buf.extend_from_slice(&record_size.to_le_bytes());
1044        buf.extend_from_slice(&depth.to_le_bytes());
1045        buf.push(75); // split percent
1046        buf.push(40); // merge percent
1047        match offset_size {
1048            4 => buf.extend_from_slice(&(root_node_address as u32).to_le_bytes()),
1049            8 => buf.extend_from_slice(&root_node_address.to_le_bytes()),
1050            _ => panic!("unsupported"),
1051        }
1052        buf.extend_from_slice(&num_records_in_root.to_le_bytes());
1053        match length_size {
1054            4 => buf.extend_from_slice(&(total_records as u32).to_le_bytes()),
1055            8 => buf.extend_from_slice(&total_records.to_le_bytes()),
1056            _ => panic!("unsupported"),
1057        }
1058        // Compute and append checksum.
1059        let checksum = jenkins_lookup3(&buf);
1060        buf.extend_from_slice(&checksum.to_le_bytes());
1061        buf
1062    }
1063
1064    #[test]
1065    fn test_parse_header() {
1066        let data = build_header(5, 4096, 12, 0, 0x1000, 3, 3, 8, 8);
1067        let mut cursor = Cursor::new(&data);
1068        let hdr = BTreeV2Header::parse(&mut cursor, 8, 8).unwrap();
1069
1070        assert_eq!(hdr.btree_type, 5);
1071        assert_eq!(hdr.node_size, 4096);
1072        assert_eq!(hdr.record_size, 12);
1073        assert_eq!(hdr.depth, 0);
1074        assert_eq!(hdr.split_percent, 75);
1075        assert_eq!(hdr.merge_percent, 40);
1076        assert_eq!(hdr.root_node_address, 0x1000);
1077        assert_eq!(hdr.num_records_in_root, 3);
1078        assert_eq!(hdr.total_records, 3);
1079    }
1080
1081    #[test]
1082    fn test_bad_signature() {
1083        let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1084        data[0] = b'X';
1085        let mut cursor = Cursor::new(&data);
1086        assert!(matches!(
1087            BTreeV2Header::parse(&mut cursor, 8, 8),
1088            Err(Error::InvalidBTreeV2Signature { .. })
1089        ));
1090    }
1091
1092    #[test]
1093    fn test_bad_checksum() {
1094        let mut data = build_header(5, 4096, 12, 0, 0x1000, 0, 0, 8, 8);
1095        // Corrupt a byte in the middle.
1096        data[6] = 0xFF;
1097        let mut cursor = Cursor::new(&data);
1098        assert!(matches!(
1099            BTreeV2Header::parse(&mut cursor, 8, 8),
1100            Err(Error::ChecksumMismatch { .. })
1101        ));
1102    }
1103
1104    #[test]
1105    fn test_collect_empty_tree() {
1106        let header = BTreeV2Header {
1107            btree_type: 5,
1108            node_size: 4096,
1109            record_size: 12,
1110            depth: 0,
1111            split_percent: 75,
1112            merge_percent: 40,
1113            root_node_address: u64::MAX,
1114            num_records_in_root: 0,
1115            total_records: 0,
1116        };
1117        let data = vec![0u8; 100];
1118        let records = collect_btree_v2_records(&data, &header, 8, 8, None, &[], None).unwrap();
1119        assert!(records.is_empty());
1120    }
1121
1122    #[test]
1123    fn test_compute_heap_id_len() {
1124        // Type 5: record_size - 4
1125        let h5 = BTreeV2Header {
1126            btree_type: 5,
1127            record_size: 12,
1128            node_size: 0,
1129            depth: 0,
1130            split_percent: 0,
1131            merge_percent: 0,
1132            root_node_address: 0,
1133            num_records_in_root: 0,
1134            total_records: 0,
1135        };
1136        assert_eq!(compute_heap_id_len(&h5), 8);
1137
1138        // Type 8: record_size - 9
1139        let h8 = BTreeV2Header {
1140            btree_type: 8,
1141            record_size: 17,
1142            ..h5
1143        };
1144        assert_eq!(compute_heap_id_len(&h8), 8);
1145    }
1146
1147    #[test]
1148    fn test_max_leaf_records() {
1149        // node_size=4096, record_size=12, overhead=10
1150        // => (4096 - 10) / 12 = 340
1151        assert_eq!(max_leaf_records(4096, 12), 340);
1152    }
1153
1154    #[test]
1155    fn test_num_records_size() {
1156        assert_eq!(num_records_size(0), 1);
1157        assert_eq!(num_records_size(255), 1);
1158        assert_eq!(num_records_size(256), 2);
1159        assert_eq!(num_records_size(65535), 2);
1160        assert_eq!(num_records_size(65536), 4);
1161    }
1162
1163    #[test]
1164    fn test_parse_huge_indirect_record() {
1165        let mut data = Vec::new();
1166        data.extend_from_slice(&0x1234u64.to_le_bytes());
1167        data.extend_from_slice(&99u64.to_le_bytes());
1168        data.extend_from_slice(&7u64.to_le_bytes());
1169        let mut cursor = Cursor::new(&data);
1170
1171        let record = parse_record(&mut cursor, 1, data.len() as u16, 8, 8, None, 0).unwrap();
1172        match record {
1173            BTreeV2Record::HugeIndirectNonFiltered {
1174                address,
1175                length,
1176                object_id,
1177            } => {
1178                assert_eq!(address, 0x1234);
1179                assert_eq!(length, 99);
1180                assert_eq!(object_id, 7);
1181            }
1182            other => panic!("expected huge record, got {:?}", other),
1183        }
1184    }
1185
1186    #[test]
1187    fn test_parse_shared_heap_record() {
1188        let mut data = Vec::new();
1189        data.push(0);
1190        data.extend_from_slice(&[0, 0, 0]);
1191        data.extend_from_slice(&0xAABB_CCDDu32.to_le_bytes());
1192        data.extend_from_slice(&3u32.to_le_bytes());
1193        data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1194        let mut cursor = Cursor::new(&data);
1195
1196        let record = parse_record(&mut cursor, 7, data.len() as u16, 8, 8, None, 0).unwrap();
1197        match record {
1198            BTreeV2Record::SharedMessageHeap {
1199                hash,
1200                reference_count,
1201                heap_id,
1202            } => {
1203                assert_eq!(hash, 0xAABB_CCDD);
1204                assert_eq!(reference_count, 3);
1205                assert_eq!(heap_id, vec![1, 2, 3, 4, 5, 6, 7, 8]);
1206            }
1207            other => panic!("expected shared heap record, got {:?}", other),
1208        }
1209    }
1210
1211    #[test]
1212    fn test_parse_leaf_with_type5_records() {
1213        // Build a leaf node with 2 type-5 records (link name hash).
1214        // record_size = 12 (hash=4 + heap_id=8)
1215        let record_size: u16 = 12;
1216        let node_size: u32 = 4096;
1217
1218        let header = BTreeV2Header {
1219            btree_type: 5,
1220            node_size,
1221            record_size,
1222            depth: 0,
1223            split_percent: 75,
1224            merge_percent: 40,
1225            root_node_address: 0, // will point to our leaf
1226            num_records_in_root: 2,
1227            total_records: 2,
1228        };
1229
1230        // Build the leaf node.
1231        let mut leaf = Vec::new();
1232        leaf.extend_from_slice(b"BTLF"); // signature
1233        leaf.push(0); // version
1234        leaf.push(5); // type
1235
1236        // Record 1: hash=0xAABBCCDD, heap_id=[1,2,3,4,5,6,7,8]
1237        leaf.extend_from_slice(&0xAABBCCDDu32.to_le_bytes());
1238        leaf.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
1239
1240        // Record 2: hash=0x11223344, heap_id=[9,10,11,12,13,14,15,16]
1241        leaf.extend_from_slice(&0x11223344u32.to_le_bytes());
1242        leaf.extend_from_slice(&[9, 10, 11, 12, 13, 14, 15, 16]);
1243
1244        // Checksum covers everything so far.
1245        let checksum = jenkins_lookup3(&leaf);
1246        leaf.extend_from_slice(&checksum.to_le_bytes());
1247
1248        // Pad to node_size (not strictly needed for parsing, but realistic).
1249        leaf.resize(node_size as usize, 0);
1250
1251        let mut records = Vec::new();
1252        let mut cursor = Cursor::new(&leaf);
1253        parse_leaf_node(
1254            &mut cursor,
1255            &header,
1256            8,
1257            8,
1258            None,
1259            &[],
1260            None,
1261            2,
1262            8, // heap_id_len
1263            &mut records,
1264        )
1265        .unwrap();
1266
1267        assert_eq!(records.len(), 2);
1268        match &records[0] {
1269            BTreeV2Record::LinkNameHash { hash, heap_id } => {
1270                assert_eq!(*hash, 0xAABBCCDD);
1271                assert_eq!(heap_id, &[1, 2, 3, 4, 5, 6, 7, 8]);
1272            }
1273            _ => panic!("expected LinkNameHash"),
1274        }
1275        match &records[1] {
1276            BTreeV2Record::LinkNameHash { hash, heap_id } => {
1277                assert_eq!(*hash, 0x11223344);
1278                assert_eq!(heap_id, &[9, 10, 11, 12, 13, 14, 15, 16]);
1279            }
1280            _ => panic!("expected LinkNameHash"),
1281        }
1282    }
1283}