Skip to main content

hdf5_reader/
btree_v1.rs

1//! HDF5 B-link Tree Version 1.
2//!
3//! V1 B-trees are used by old-style (v0/v1) groups for link name lookup
4//! (type 0) and by chunked datasets for raw data chunk indexing (type 1).
5//!
6//! The tree is a B-link tree: each node carries left and right sibling
7//! pointers. Keys bracket children — a node with N children has N+1 keys.
8//! Leaf nodes (level 0) point to either symbol table nodes (SNODs) for
9//! type 0 or raw data chunk addresses for type 1. Internal nodes point to
10//! child B-tree nodes.
11
12use std::collections::HashSet;
13
14use crate::error::{Error, Result};
15use crate::io::Cursor;
16use crate::storage::Storage;
17
18/// Signature bytes for a v1 B-tree node: ASCII `TREE`.
19const BTREE_V1_SIGNATURE: [u8; 4] = *b"TREE";
20const MAX_BTREE_V1_RECURSION_DEPTH: usize = 256;
21
22/// A key within a v1 B-tree node.
23#[derive(Debug, Clone)]
24pub enum BTreeV1Key {
25    /// Type 0 (group) key: offset into the local heap for the link name.
26    Group { local_heap_offset: u64 },
27    /// Type 1 (raw data chunk) key: chunk size, filter mask, and per-dimension
28    /// offsets (including one extra for the dataset element offset).
29    RawData {
30        chunk_size: u32,
31        filter_mask: u32,
32        offsets: Vec<u64>,
33    },
34}
35
36/// A parsed v1 B-tree node.
37#[derive(Debug, Clone)]
38pub struct BTreeV1Node {
39    /// Node type: 0 = group, 1 = raw data chunk.
40    pub node_type: u8,
41    /// Node level: 0 = leaf, > 0 = internal.
42    pub level: u8,
43    /// Number of entries (children) actually in use.
44    pub entries_used: u16,
45    /// Address of the left sibling node (undefined if none).
46    pub left_sibling: u64,
47    /// Address of the right sibling node (undefined if none).
48    pub right_sibling: u64,
49    /// Keys bracketing the children. Length is `entries_used + 1`.
50    pub keys: Vec<BTreeV1Key>,
51    /// Child addresses. Length is `entries_used`.
52    pub children: Vec<u64>,
53}
54
55impl BTreeV1Node {
56    /// Parse a v1 B-tree node at the current cursor position.
57    ///
58    /// `ndims` is required for type-1 (raw data chunk) nodes — it is the
59    /// number of dimensions of the dataset's dataspace. For type-0 (group)
60    /// nodes pass `None`.
61    ///
62    /// Format:
63    /// - Signature: `TREE` (4 bytes)
64    /// - Node type (u8): 0 = group, 1 = raw data
65    /// - Node level (u8)
66    /// - Entries used (u16 LE)
67    /// - Left sibling address (`offset_size` bytes)
68    /// - Right sibling address (`offset_size` bytes)
69    /// - Then interleaved keys and child pointers:
70    ///   key[0], child[0], key[1], child[1], ..., key[K-1], child[K-1], key[K]
71    ///   where K = entries_used.
72    pub fn parse(
73        cursor: &mut Cursor,
74        offset_size: u8,
75        length_size: u8,
76        ndims: Option<u32>,
77    ) -> Result<Self> {
78        let sig = cursor.read_bytes(4)?;
79        if sig != BTREE_V1_SIGNATURE {
80            return Err(Error::InvalidBTreeSignature);
81        }
82
83        let node_type = cursor.read_u8()?;
84        let level = cursor.read_u8()?;
85        let entries_used = cursor.read_u16_le()?;
86        let left_sibling = cursor.read_offset(offset_size)?;
87        let right_sibling = cursor.read_offset(offset_size)?;
88
89        let num_keys = entries_used as usize + 1;
90        let num_children = entries_used as usize;
91
92        let mut keys = Vec::with_capacity(num_keys);
93        let mut children = Vec::with_capacity(num_children);
94
95        // Read interleaved keys and children:
96        // key[0], child[0], key[1], child[1], ..., key[K-1], child[K-1], key[K]
97        for i in 0..num_keys {
98            let key = match node_type {
99                0 => parse_group_key(cursor, length_size)?,
100                1 => parse_raw_data_key(cursor, ndims)?,
101                _ => {
102                    return Err(Error::InvalidData(format!(
103                        "unknown v1 B-tree node type: {node_type}"
104                    )));
105                }
106            };
107            keys.push(key);
108
109            // Read child address after every key except the last.
110            if i < num_children {
111                let child_addr = cursor.read_offset(offset_size)?;
112                children.push(child_addr);
113            }
114        }
115
116        Ok(BTreeV1Node {
117            node_type,
118            level,
119            entries_used,
120            left_sibling,
121            right_sibling,
122            keys,
123            children,
124        })
125    }
126}
127
128/// Parse a type-0 (group) key: just a length-sized offset into the local heap.
129fn parse_group_key(cursor: &mut Cursor, length_size: u8) -> Result<BTreeV1Key> {
130    let local_heap_offset = cursor.read_length(length_size)?;
131    Ok(BTreeV1Key::Group { local_heap_offset })
132}
133
134/// Parse a type-1 (raw data chunk) key.
135///
136/// Format:
137/// - chunk_size (u32 LE) — size of the chunk in bytes after filters
138/// - filter_mask (u32 LE) — bit mask of filters to skip
139/// - (ndims + 1) chunk-offset values per dimension (plus the trailing element
140///   offset, typically 0)
141///
142/// Each chunk-offset value is **always 8 bytes**, independent of the
143/// superblock's "size of offsets" field. The HDF5 format spec fixes these key
144/// dimension offsets at 64 bits (see the "Version 1 B-trees" / raw-data chunk
145/// key layout); reading them as `size_of_offsets` bytes corrupts the cursor
146/// position on 32-bit-superblock files (`size_of_offsets = 4`), yielding a
147/// garbage chunk address and a failed chunk decode.
148fn parse_raw_data_key(cursor: &mut Cursor, ndims: Option<u32>) -> Result<BTreeV1Key> {
149    let nd = ndims.ok_or_else(|| {
150        Error::InvalidData("ndims required for raw data chunk B-tree keys".into())
151    })?;
152
153    let chunk_size = cursor.read_u32_le()?;
154    let filter_mask = cursor.read_u32_le()?;
155
156    let num_offsets = nd as usize + 1;
157    let mut offsets = Vec::with_capacity(num_offsets);
158    for _ in 0..num_offsets {
159        // Fixed 64-bit dimension offset per the HDF5 spec — NOT size_of_offsets.
160        offsets.push(cursor.read_u64_le()?);
161    }
162
163    Ok(BTreeV1Key::RawData {
164        chunk_size,
165        filter_mask,
166        offsets,
167    })
168}
169
170/// Walk a v1 B-tree and collect all (key, child_address) pairs from leaf nodes.
171///
172/// For leaf nodes (level 0), this returns one entry per child paired with the
173/// key that precedes it (key[i] for child[i]). For internal nodes, this
174/// recurses into each child node.
175///
176/// `data` must be the full file data (or at least the region containing the
177/// B-tree nodes). `root_address` is the file offset of the root node.
178pub fn collect_btree_v1_leaves(
179    data: &[u8],
180    root_address: u64,
181    offset_size: u8,
182    length_size: u8,
183    ndims: Option<u32>,
184    chunk_dims: &[u32],
185    chunk_bounds: Option<(&[u64], &[u64])>,
186) -> Result<Vec<(BTreeV1Key, u64)>> {
187    let mut results = Vec::new();
188    let mut visited = HashSet::new();
189    collect_recursive(
190        BTreeV1CollectContext {
191            data,
192            offset_size,
193            length_size,
194            ndims,
195            chunk_dims,
196            chunk_bounds,
197        },
198        root_address,
199        MAX_BTREE_V1_RECURSION_DEPTH,
200        &mut visited,
201        &mut results,
202    )?;
203    Ok(results)
204}
205
206/// Walk a v1 B-tree from random-access storage and collect leaf entries.
207pub fn collect_btree_v1_leaves_storage(
208    storage: &dyn Storage,
209    root_address: u64,
210    offset_size: u8,
211    length_size: u8,
212    ndims: Option<u32>,
213    chunk_dims: &[u32],
214    chunk_bounds: Option<(&[u64], &[u64])>,
215) -> Result<Vec<(BTreeV1Key, u64)>> {
216    let mut results = Vec::new();
217    let mut visited = HashSet::new();
218    collect_recursive_storage(
219        BTreeV1CollectContextStorage {
220            storage,
221            offset_size,
222            length_size,
223            ndims,
224            chunk_dims,
225            chunk_bounds,
226        },
227        root_address,
228        MAX_BTREE_V1_RECURSION_DEPTH,
229        &mut visited,
230        &mut results,
231    )?;
232    Ok(results)
233}
234
235#[derive(Clone, Copy)]
236struct BTreeV1CollectContext<'a> {
237    data: &'a [u8],
238    offset_size: u8,
239    length_size: u8,
240    ndims: Option<u32>,
241    chunk_dims: &'a [u32],
242    chunk_bounds: Option<(&'a [u64], &'a [u64])>,
243}
244
245#[derive(Clone, Copy)]
246struct BTreeV1CollectContextStorage<'a> {
247    storage: &'a dyn Storage,
248    offset_size: u8,
249    length_size: u8,
250    ndims: Option<u32>,
251    chunk_dims: &'a [u32],
252    chunk_bounds: Option<(&'a [u64], &'a [u64])>,
253}
254
255/// Recursive helper for tree traversal.
256fn collect_recursive(
257    context: BTreeV1CollectContext<'_>,
258    address: u64,
259    depth_remaining: usize,
260    visited: &mut HashSet<u64>,
261    results: &mut Vec<(BTreeV1Key, u64)>,
262) -> Result<()> {
263    if Cursor::is_undefined_offset(address, context.offset_size) {
264        return Ok(());
265    }
266    if depth_remaining == 0 {
267        return Err(Error::InvalidData(
268            "B-tree v1 traversal exceeded recursion limit".into(),
269        ));
270    }
271    if !visited.insert(address) {
272        return Err(Error::InvalidData(format!(
273            "B-tree v1 traversal revisits node at offset {address:#x}"
274        )));
275    }
276
277    if address as usize >= context.data.len() {
278        return Err(Error::OffsetOutOfBounds(address));
279    }
280
281    let mut cursor = Cursor::new(context.data);
282    cursor.set_position(address);
283
284    let node = BTreeV1Node::parse(
285        &mut cursor,
286        context.offset_size,
287        context.length_size,
288        context.ndims,
289    )?;
290
291    if node.level == 0 {
292        // Leaf node — collect (key[i], child[i]) pairs.
293        for (i, child_addr) in node.children.iter().enumerate() {
294            let include = match &node.keys[i] {
295                BTreeV1Key::RawData { offsets, .. } => {
296                    context.chunk_bounds.map_or(true, |(first, last)| {
297                        offsets
298                            .iter()
299                            .zip(context.chunk_dims.iter())
300                            .enumerate()
301                            .all(|(dim, (offset, chunk_dim))| {
302                                let chunk_index = *offset / u64::from(*chunk_dim);
303                                chunk_index >= first[dim] && chunk_index <= last[dim]
304                            })
305                    })
306                }
307                _ => true,
308            };
309
310            if include {
311                results.push((node.keys[i].clone(), *child_addr));
312            }
313        }
314    } else {
315        // Internal node — recurse into each child.
316        for child_addr in &node.children {
317            collect_recursive(context, *child_addr, depth_remaining - 1, visited, results)?;
318        }
319    }
320
321    Ok(())
322}
323
324fn collect_recursive_storage(
325    context: BTreeV1CollectContextStorage<'_>,
326    address: u64,
327    depth_remaining: usize,
328    visited: &mut HashSet<u64>,
329    results: &mut Vec<(BTreeV1Key, u64)>,
330) -> Result<()> {
331    if Cursor::is_undefined_offset(address, context.offset_size) {
332        return Ok(());
333    }
334    if depth_remaining == 0 {
335        return Err(Error::InvalidData(
336            "B-tree v1 traversal exceeded recursion limit".into(),
337        ));
338    }
339    if !visited.insert(address) {
340        return Err(Error::InvalidData(format!(
341            "B-tree v1 traversal revisits node at offset {address:#x}"
342        )));
343    }
344
345    let header_len = 4 + 1 + 1 + 2 + 2 * usize::from(context.offset_size);
346    let header = context.storage.read_range(address, header_len)?;
347    let mut header_cursor = Cursor::new(header.as_ref());
348    let sig = header_cursor.read_bytes(4)?;
349    if sig != BTREE_V1_SIGNATURE {
350        return Err(Error::InvalidBTreeSignature);
351    }
352
353    let node_type = header_cursor.read_u8()?;
354    let _level = header_cursor.read_u8()?;
355    let entries_used = header_cursor.read_u16_le()?;
356    let key_size = match node_type {
357        0 => usize::from(context.length_size),
358        1 => {
359            let ndims = context.ndims.ok_or_else(|| {
360                Error::InvalidData("ndims required for raw data chunk B-tree keys".into())
361            })?;
362            // chunk_size (4) + filter_mask (4), then (ndims + 1) dimension
363            // offsets at a fixed 8 bytes each — NOT size_of_offsets (HDF5 spec).
364            8 + (usize::try_from(ndims).map_err(|_| {
365                Error::InvalidData("B-tree v1 ndims exceeds platform usize capacity".into())
366            })? + 1)
367                * 8
368        }
369        _ => {
370            return Err(Error::InvalidData(format!(
371                "unknown v1 B-tree node type: {node_type}"
372            )));
373        }
374    };
375    let node_len = header_len
376        + (usize::from(entries_used) + 1) * key_size
377        + usize::from(entries_used) * usize::from(context.offset_size);
378    let node_bytes = context.storage.read_range(address, node_len)?;
379    let mut cursor = Cursor::new(node_bytes.as_ref());
380    let node = BTreeV1Node::parse(
381        &mut cursor,
382        context.offset_size,
383        context.length_size,
384        context.ndims,
385    )?;
386
387    if node.level == 0 {
388        for (i, child_addr) in node.children.iter().enumerate() {
389            let include = match &node.keys[i] {
390                BTreeV1Key::RawData { offsets, .. } => {
391                    context.chunk_bounds.map_or(true, |(first, last)| {
392                        offsets
393                            .iter()
394                            .zip(context.chunk_dims.iter())
395                            .enumerate()
396                            .all(|(dim, (offset, chunk_dim))| {
397                                let chunk_index = *offset / u64::from(*chunk_dim);
398                                chunk_index >= first[dim] && chunk_index <= last[dim]
399                            })
400                    })
401                }
402                _ => true,
403            };
404
405            if include {
406                results.push((node.keys[i].clone(), *child_addr));
407            }
408        }
409    } else {
410        for child_addr in &node.children {
411            collect_recursive_storage(context, *child_addr, depth_remaining - 1, visited, results)?;
412        }
413    }
414
415    Ok(())
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421
422    /// Build a v1 B-tree node (type 0, group) from the given parameters.
423    #[allow(clippy::too_many_arguments)]
424    fn build_group_node(
425        level: u8,
426        entries_used: u16,
427        left_sibling: u64,
428        right_sibling: u64,
429        keys: &[u64],     // local heap offsets
430        children: &[u64], // child addresses
431        offset_size: u8,
432        length_size: u8,
433    ) -> Vec<u8> {
434        assert_eq!(keys.len(), entries_used as usize + 1);
435        assert_eq!(children.len(), entries_used as usize);
436
437        let mut buf = Vec::new();
438        buf.extend_from_slice(b"TREE");
439        buf.push(0); // type 0 = group
440        buf.push(level);
441        buf.extend_from_slice(&entries_used.to_le_bytes());
442        push_offset(&mut buf, left_sibling, offset_size);
443        push_offset(&mut buf, right_sibling, offset_size);
444
445        for i in 0..keys.len() {
446            push_length(&mut buf, keys[i], length_size);
447            if i < children.len() {
448                push_offset(&mut buf, children[i], offset_size);
449            }
450        }
451
452        buf
453    }
454
455    /// Build a v1 B-tree node (type 1, raw data) from the given parameters.
456    fn build_rawdata_node(
457        level: u8,
458        entries_used: u16,
459        left_sibling: u64,
460        right_sibling: u64,
461        keys: &[(u32, u32, Vec<u64>)], // (chunk_size, filter_mask, offsets)
462        children: &[u64],
463        offset_size: u8,
464    ) -> Vec<u8> {
465        assert_eq!(keys.len(), entries_used as usize + 1);
466        assert_eq!(children.len(), entries_used as usize);
467
468        let mut buf = Vec::new();
469        buf.extend_from_slice(b"TREE");
470        buf.push(1); // type 1 = raw data
471        buf.push(level);
472        buf.extend_from_slice(&entries_used.to_le_bytes());
473        push_offset(&mut buf, left_sibling, offset_size);
474        push_offset(&mut buf, right_sibling, offset_size);
475
476        for i in 0..keys.len() {
477            let (cs, fm, ref offs) = keys[i];
478            buf.extend_from_slice(&cs.to_le_bytes());
479            buf.extend_from_slice(&fm.to_le_bytes());
480            for &o in offs {
481                // Chunk-key dimension offsets are ALWAYS 8 bytes, independent of
482                // the superblock's size_of_offsets (HDF5 format spec). Child
483                // pointers below remain offset_size-byte file addresses.
484                buf.extend_from_slice(&o.to_le_bytes());
485            }
486            if i < children.len() {
487                push_offset(&mut buf, children[i], offset_size);
488            }
489        }
490
491        buf
492    }
493
494    fn push_offset(buf: &mut Vec<u8>, val: u64, size: u8) {
495        match size {
496            4 => buf.extend_from_slice(&(val as u32).to_le_bytes()),
497            8 => buf.extend_from_slice(&val.to_le_bytes()),
498            _ => panic!("unsupported offset size in test"),
499        }
500    }
501
502    fn push_length(buf: &mut Vec<u8>, val: u64, size: u8) {
503        match size {
504            4 => buf.extend_from_slice(&(val as u32).to_le_bytes()),
505            8 => buf.extend_from_slice(&val.to_le_bytes()),
506            _ => panic!("unsupported length size in test"),
507        }
508    }
509
510    #[test]
511    fn parse_group_leaf_node() {
512        let undef8 = 0xFFFF_FFFF_FFFF_FFFFu64;
513        let data = build_group_node(
514            0,                 // leaf
515            2,                 // 2 entries
516            undef8,            // no left sibling
517            undef8,            // no right sibling
518            &[0, 8, 16],       // 3 keys (offsets into local heap)
519            &[0x1000, 0x2000], // 2 children (SNOD addresses)
520            8,
521            8,
522        );
523
524        let mut cursor = Cursor::new(&data);
525        let node = BTreeV1Node::parse(&mut cursor, 8, 8, None).unwrap();
526
527        assert_eq!(node.node_type, 0);
528        assert_eq!(node.level, 0);
529        assert_eq!(node.entries_used, 2);
530        assert!(Cursor::is_undefined_offset(node.left_sibling, 8));
531        assert!(Cursor::is_undefined_offset(node.right_sibling, 8));
532        assert_eq!(node.keys.len(), 3);
533        assert_eq!(node.children.len(), 2);
534
535        match &node.keys[0] {
536            BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 0),
537            _ => panic!("expected Group key"),
538        }
539        match &node.keys[1] {
540            BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 8),
541            _ => panic!("expected Group key"),
542        }
543        assert_eq!(node.children[0], 0x1000);
544        assert_eq!(node.children[1], 0x2000);
545    }
546
547    #[test]
548    fn parse_rawdata_leaf_node() {
549        let undef8 = 0xFFFF_FFFF_FFFF_FFFFu64;
550        // 2D dataset, ndims=2, so keys have 3 offsets each (ndims+1)
551        let data = build_rawdata_node(
552            0, // leaf
553            1, // 1 entry
554            undef8,
555            undef8,
556            &[
557                (1024, 0, vec![0, 0, 0]),   // key[0]: chunk at origin
558                (1024, 0, vec![10, 20, 0]), // key[1]: next chunk boundary
559            ],
560            &[0x5000], // 1 child = chunk data address
561            8,
562        );
563
564        let mut cursor = Cursor::new(&data);
565        let node = BTreeV1Node::parse(&mut cursor, 8, 8, Some(2)).unwrap();
566
567        assert_eq!(node.node_type, 1);
568        assert_eq!(node.level, 0);
569        assert_eq!(node.entries_used, 1);
570        assert_eq!(node.keys.len(), 2);
571        assert_eq!(node.children.len(), 1);
572
573        match &node.keys[0] {
574            BTreeV1Key::RawData {
575                chunk_size,
576                filter_mask,
577                offsets,
578            } => {
579                assert_eq!(*chunk_size, 1024);
580                assert_eq!(*filter_mask, 0);
581                assert_eq!(offsets, &[0, 0, 0]);
582            }
583            _ => panic!("expected RawData key"),
584        }
585
586        assert_eq!(node.children[0], 0x5000);
587    }
588
589    /// Regression test for 32-bit-superblock files (`size_of_offsets = 4`):
590    /// chunk-key dimension offsets are fixed 8-byte values, so the child chunk
591    /// address (a 4-byte file offset here) must be read at the correct cursor
592    /// position. Reading the dimension offsets as 4 bytes (the old bug)
593    /// misaligns the cursor and yields a garbage child address.
594    #[test]
595    fn parse_rawdata_leaf_4byte() {
596        let undef4 = 0xFFFF_FFFFu64;
597        let data = build_rawdata_node(
598            0,
599            1,
600            undef4,
601            undef4,
602            &[
603                (512, 0x03, vec![0, 0]), // ndims=1, key has 2 offsets (each 8 bytes)
604                (512, 0, vec![100, 0]),
605            ],
606            &[0x3000],
607            4,
608        );
609
610        let mut cursor = Cursor::new(&data);
611        let node = BTreeV1Node::parse(&mut cursor, 4, 4, Some(1)).unwrap();
612
613        assert_eq!(node.node_type, 1);
614        match &node.keys[0] {
615            BTreeV1Key::RawData {
616                chunk_size,
617                filter_mask,
618                offsets,
619            } => {
620                assert_eq!(*chunk_size, 512);
621                assert_eq!(*filter_mask, 0x03);
622                assert_eq!(offsets, &[0, 0]);
623            }
624            _ => panic!("expected RawData key"),
625        }
626        // The load-bearing assertion: the child chunk address is located only
627        // when the 8-byte dimension offsets are parsed at full width.
628        assert_eq!(node.children[0], 0x3000);
629    }
630
631    #[test]
632    fn bad_signature() {
633        let mut data = build_group_node(0, 0, u64::MAX, u64::MAX, &[0], &[], 8, 8);
634        data[0] = b'X'; // corrupt
635        let mut cursor = Cursor::new(&data);
636        assert!(matches!(
637            BTreeV1Node::parse(&mut cursor, 8, 8, None),
638            Err(Error::InvalidBTreeSignature)
639        ));
640    }
641
642    #[test]
643    fn collect_leaves_single_leaf() {
644        let undef8 = u64::MAX;
645        // Put the leaf node at offset 0 in our fake file data.
646        let node_data = build_group_node(0, 2, undef8, undef8, &[0, 5, 10], &[0x100, 0x200], 8, 8);
647
648        let results = collect_btree_v1_leaves(&node_data, 0, 8, 8, None, &[], None).unwrap();
649
650        assert_eq!(results.len(), 2);
651        match &results[0].0 {
652            BTreeV1Key::Group { local_heap_offset } => assert_eq!(*local_heap_offset, 0),
653            _ => panic!("expected Group key"),
654        }
655        assert_eq!(results[0].1, 0x100);
656        assert_eq!(results[1].1, 0x200);
657    }
658
659    #[test]
660    fn collect_leaves_two_level_tree() {
661        let undef8 = u64::MAX;
662
663        // Build a two-level tree:
664        //   Root (level 1) at offset 0, with 2 children
665        //     Leaf A at offset 1000
666        //     Leaf B at offset 2000
667
668        let leaf_a = build_group_node(0, 1, undef8, undef8, &[0, 5], &[0xA00], 8, 8);
669
670        let leaf_b = build_group_node(0, 1, undef8, undef8, &[10, 15], &[0xB00], 8, 8);
671
672        let root = build_group_node(
673            1,
674            2,
675            undef8,
676            undef8,
677            &[0, 5, 15],   // 3 keys for 2 children
678            &[1000, 2000], // children at offsets 1000 and 2000
679            8,
680            8,
681        );
682
683        // Assemble into a single buffer: root at 0, leaf_a at 1000, leaf_b at 2000.
684        let total_size = 3000 + leaf_b.len();
685        let mut file_data = vec![0u8; total_size];
686        file_data[..root.len()].copy_from_slice(&root);
687        file_data[1000..1000 + leaf_a.len()].copy_from_slice(&leaf_a);
688        file_data[2000..2000 + leaf_b.len()].copy_from_slice(&leaf_b);
689
690        let results = collect_btree_v1_leaves(&file_data, 0, 8, 8, None, &[], None).unwrap();
691
692        assert_eq!(results.len(), 2);
693        assert_eq!(results[0].1, 0xA00);
694        assert_eq!(results[1].1, 0xB00);
695    }
696
697    #[test]
698    fn collect_rejects_revisited_node() {
699        let undef8 = u64::MAX;
700        let root = build_group_node(
701            1,
702            1,
703            undef8,
704            undef8,
705            &[0, 8],
706            &[0], // corrupt: child points back to the root node
707            8,
708            8,
709        );
710
711        let err = collect_btree_v1_leaves(&root, 0, 8, 8, None, &[], None).unwrap_err();
712        assert!(matches!(err, Error::InvalidData(message) if message.contains("revisits node")));
713
714        let storage = crate::storage::BytesStorage::new(root);
715        let err = collect_btree_v1_leaves_storage(&storage, 0, 8, 8, None, &[], None).unwrap_err();
716        assert!(matches!(err, Error::InvalidData(message) if message.contains("revisits node")));
717    }
718
719    #[test]
720    fn collect_undefined_root() {
721        // Undefined root address should produce empty results.
722        let data = vec![0u8; 100];
723        let results = collect_btree_v1_leaves(&data, u64::MAX, 8, 8, None, &[], None).unwrap();
724        assert!(results.is_empty());
725    }
726}