bao_tree/
iter.rs

1//! Iterators over BaoTree nodes
2//!
3//! Range iterators take a reference to the ranges, and therefore require a lifetime parameter.
4//! They can be used without lifetime parameters using self referencing structs.
5use std::fmt::{self, Debug};
6
7use self_cell::self_cell;
8use smallvec::SmallVec;
9
10use crate::{split, BaoTree, BlockSize, ChunkNum, ChunkRanges, ChunkRangesRef, TreeNode};
11
12/// Extended node info.
13///
14/// Some of the information is redundant, but it is convenient to have it all in one place.
15///
16/// Usually this is used within an iterator, so we hope that the compiler will optimize away
17/// the redundant information.
18#[derive(Debug, PartialEq, Eq)]
19#[cfg(test)]
20pub struct NodeInfo<'a> {
21    /// the node
22    pub node: TreeNode,
23    /// the node is the root node (needs special handling when computing hash)
24    pub is_root: bool,
25    /// ranges of the node and it's two children
26    pub ranges: &'a ChunkRangesRef,
27    /// left child intersection with the query range
28    pub l_ranges: &'a ChunkRangesRef,
29    /// right child intersection with the query range
30    pub r_ranges: &'a ChunkRangesRef,
31    /// the node is fully included in the query range
32    pub full: bool,
33    /// the node is a leaf for the purpose of this query
34    pub query_leaf: bool,
35    /// true if this node is the last leaf, and it is <= half full
36    pub is_half_leaf: bool,
37}
38
39/// Iterator over all nodes in a BaoTree in pre-order that overlap with a given chunk range.
40///
41/// This is mostly used internally
42#[derive(Debug)]
43#[cfg(test)]
44pub struct PreOrderPartialIterRef<'a> {
45    /// the tree we want to traverse
46    tree: BaoTree,
47    /// the minimum level to always emit, even if the node is fully within the query range
48    min_level: u8,
49    /// stack of nodes to visit, together with the ranges that are relevant for the node
50    ///
51    /// The node is shifted by the block size, so these are not normal nodes!
52    stack: SmallVec<[(TreeNode, &'a ChunkRangesRef); 8]>,
53    /// number of valid nodes, needed in shifted.right_descendant
54    ///
55    /// This is also shifted by the block size!
56    shifted_filled_size: TreeNode,
57    /// The root node, shifted by the block size, needed for the is_root check
58    shifted_root: TreeNode,
59}
60
61#[cfg(test)]
62impl<'a> PreOrderPartialIterRef<'a> {
63    /// Create a new iterator over the tree.
64    pub fn new(tree: BaoTree, ranges: &'a ChunkRangesRef, min_level: u8) -> Self {
65        let mut stack = SmallVec::new();
66        let (shifted_root, shifted_filled_size) = tree.shifted();
67        stack.push((shifted_root, ranges));
68        Self {
69            tree,
70            min_level,
71            stack,
72            shifted_filled_size,
73            shifted_root,
74        }
75    }
76
77    /// Get a reference to the tree.
78    pub fn tree(&self) -> &BaoTree {
79        &self.tree
80    }
81}
82
83#[cfg(test)]
84impl<'a> Iterator for PreOrderPartialIterRef<'a> {
85    type Item = NodeInfo<'a>;
86
87    fn next(&mut self) -> Option<Self::Item> {
88        let tree = &self.tree;
89        loop {
90            let (shifted, ranges) = self.stack.pop()?;
91            if ranges.is_empty() {
92                continue;
93            }
94            let node = shifted.subtract_block_size(tree.block_size.0);
95            let is_half_leaf = !tree.is_persisted(node);
96            let (l_ranges, r_ranges) = if !is_half_leaf {
97                // split the ranges into left and right
98                split(ranges, node)
99            } else {
100                (ranges, ranges)
101            };
102            // check if the node is fully included
103            let full = ranges.is_all();
104            // we can't recurse if the node is a leaf
105            // we don't want to recurse if the node is full and below the minimum level
106            let query_leaf = shifted.is_leaf() || (full && node.level() < self.min_level as u32);
107            // recursion is just pushing the children onto the stack
108            if !query_leaf {
109                let l = shifted.left_child().unwrap();
110                let r = shifted.right_descendant(self.shifted_filled_size).unwrap();
111                // push right first so we pop left first
112                self.stack.push((r, r_ranges));
113                self.stack.push((l, l_ranges));
114            }
115            // the first node is the root, so just set the flag to false afterwards
116            let is_root = shifted == self.shifted_root;
117            // emit the node in any case
118            break Some(NodeInfo {
119                node,
120                ranges,
121                l_ranges,
122                r_ranges,
123                full,
124                query_leaf,
125                is_root,
126                is_half_leaf,
127            });
128        }
129    }
130}
131
132/// Iterator over all nodes in a tree in post-order.
133///
134/// If you want to iterate only down to some level, you need to shift the tree
135/// before.
136#[derive(Debug)]
137pub struct PostOrderNodeIter {
138    /// the overall number of nodes in the tree
139    len: TreeNode,
140    /// the current node, None if we are done
141    curr: TreeNode,
142    /// where we came from, used to determine the next node
143    prev: Prev,
144}
145
146impl PostOrderNodeIter {
147    /// Create a new iterator given a root node and a len
148    pub fn new(root: TreeNode, len: TreeNode) -> Self {
149        Self {
150            curr: root,
151            len,
152            prev: Prev::Parent,
153        }
154    }
155
156    fn go_up(&mut self, curr: TreeNode) {
157        let prev = curr;
158        (self.curr, self.prev) = if let Some(parent) = curr.restricted_parent(self.len) {
159            (
160                parent,
161                if prev < parent {
162                    Prev::Left
163                } else {
164                    Prev::Right
165                },
166            )
167        } else {
168            (curr, Prev::Done)
169        };
170    }
171}
172
173impl Iterator for PostOrderNodeIter {
174    type Item = TreeNode;
175
176    fn next(&mut self) -> Option<Self::Item> {
177        loop {
178            let curr = self.curr;
179            match self.prev {
180                Prev::Parent => {
181                    if let Some(child) = curr.left_child() {
182                        // go left first when coming from above, don't emit curr
183                        self.curr = child;
184                        self.prev = Prev::Parent;
185                    } else {
186                        // we are a left or right leaf, go up and emit curr
187                        self.go_up(curr);
188                        break Some(curr);
189                    }
190                }
191                Prev::Left => {
192                    // no need to check is_leaf, since we come from a left child
193                    // go right when coming from left, don't emit curr
194                    self.curr = curr.right_descendant(self.len).unwrap();
195                    self.prev = Prev::Parent;
196                }
197                Prev::Right => {
198                    // go up in any case, do emit curr
199                    self.go_up(curr);
200                    break Some(curr);
201                }
202                Prev::Done => {
203                    break None;
204                }
205            }
206        }
207    }
208}
209
210/// Iterator over all nodes in a BaoTree in pre-order.
211#[derive(Debug)]
212pub struct PreOrderNodeIter {
213    /// the overall number of nodes in the tree
214    len: TreeNode,
215    /// the current node, None if we are done
216    curr: TreeNode,
217    /// where we came from, used to determine the next node
218    prev: Prev,
219}
220
221impl PreOrderNodeIter {
222    /// Create a new iterator given a root node and a len
223    pub fn new(root: TreeNode, len: TreeNode) -> Self {
224        Self {
225            curr: root,
226            len,
227            prev: Prev::Parent,
228        }
229    }
230
231    fn go_up(&mut self, curr: TreeNode) {
232        let prev = curr;
233        (self.curr, self.prev) = if let Some(parent) = curr.restricted_parent(self.len) {
234            (
235                parent,
236                if prev < parent {
237                    Prev::Left
238                } else {
239                    Prev::Right
240                },
241            )
242        } else {
243            (curr, Prev::Done)
244        };
245    }
246}
247
248impl Iterator for PreOrderNodeIter {
249    type Item = TreeNode;
250
251    fn next(&mut self) -> Option<Self::Item> {
252        loop {
253            let curr = self.curr;
254            match self.prev {
255                Prev::Parent => {
256                    if let Some(child) = curr.left_child() {
257                        // go left first when coming from above
258                        self.curr = child;
259                        self.prev = Prev::Parent;
260                    } else {
261                        // we are a left or right leaf, go up
262                        self.go_up(curr);
263                    }
264                    // emit curr before children (pre-order)
265                    break Some(curr);
266                }
267                Prev::Left => {
268                    // no need to check is_leaf, since we come from a left child
269                    // go right when coming from left, don't emit curr
270                    self.curr = curr.right_descendant(self.len).unwrap();
271                    self.prev = Prev::Parent;
272                }
273                Prev::Right => {
274                    // go up in any case
275                    self.go_up(curr);
276                }
277                Prev::Done => {
278                    break None;
279                }
280            }
281        }
282    }
283}
284
285#[derive(Debug)]
286enum Prev {
287    Parent,
288    Left,
289    Right,
290    Done,
291}
292
293#[derive(Debug, Clone, Copy, PartialEq, Eq)]
294/// A chunk describeds what to read or write next
295///
296/// In some cases you want additional information about what part of the chunk matches the query.
297/// That is what the `R` type parameter is for. By default it is `()`.
298pub enum BaoChunk<R = ()> {
299    /// expect a 64 byte parent node.
300    ///
301    /// To validate, use parent_cv using the is_root value
302    Parent {
303        /// The tree node, useful for error reporting
304        node: TreeNode,
305        /// This is the root, to be passed to parent_cv
306        is_root: bool,
307        /// Push the left hash to the stack, since it will be needed later
308        left: bool,
309        /// Push the right hash to the stack, since it will be needed later
310        right: bool,
311        /// Additional information about what part of the chunk matches the query
312        ranges: R,
313    },
314    /// expect data of size `size`
315    ///
316    /// To validate, use hash_block using the is_root and start_chunk values
317    Leaf {
318        /// Start chunk, to be passed to hash_block
319        start_chunk: ChunkNum,
320        /// Size of the data to expect. Will be chunk_group_bytes for all but the last block.
321        size: usize,
322        /// This is the root, to be passed to hash_block
323        is_root: bool,
324        /// Additional information about what part of the chunk matches the query
325        ranges: R,
326    },
327}
328
329impl<T> BaoChunk<T> {
330    #[cfg(test)]
331    pub fn to_debug_string(&self, max_level: usize) -> String {
332        match self {
333            BaoChunk::Parent { node, is_root, .. } => {
334                let n = max_level.saturating_sub(node.level() as usize + 1);
335                let prefix = " ".repeat(n);
336                let start_chunk = node.chunk_range().start;
337                format!(
338                    "{}{},{},{}",
339                    prefix,
340                    start_chunk.to_bytes(),
341                    node.level(),
342                    is_root
343                )
344            }
345            BaoChunk::Leaf {
346                start_chunk,
347                size,
348                is_root,
349                ..
350            } => {
351                let prefix = " ".repeat(max_level);
352                format!("{}{},{},{}", prefix, start_chunk.to_bytes(), size, is_root)
353            }
354        }
355    }
356
357    /// Set the ranges to the unit value
358    pub fn without_ranges(&self) -> BaoChunk {
359        match self {
360            Self::Parent {
361                node,
362                is_root,
363                left,
364                right,
365                ..
366            } => BaoChunk::Parent {
367                node: *node,
368                is_root: *is_root,
369                left: *left,
370                right: *right,
371                ranges: (),
372            },
373            Self::Leaf {
374                start_chunk,
375                size,
376                is_root,
377                ..
378            } => BaoChunk::Leaf {
379                start_chunk: *start_chunk,
380                size: *size,
381                is_root: *is_root,
382                ranges: (),
383            },
384        }
385    }
386}
387
388/// Iterator over all chunks in a BaoTree in post-order.
389#[derive(Debug)]
390pub struct PostOrderChunkIter {
391    tree: BaoTree,
392    inner: PostOrderNodeIter,
393    // stack with 2 elements, since we can only have 2 items in flight
394    stack: SmallVec<[BaoChunk; 2]>,
395    shifted_root: TreeNode,
396}
397
398impl PostOrderChunkIter {
399    /// Create a new iterator over the tree.
400    pub fn new(tree: BaoTree) -> Self {
401        let (shifted_root, shifted_len) = tree.shifted();
402        let inner = PostOrderNodeIter::new(shifted_root, shifted_len);
403        Self {
404            tree,
405            inner,
406            stack: Default::default(),
407            shifted_root,
408        }
409    }
410}
411
412impl Iterator for PostOrderChunkIter {
413    type Item = BaoChunk;
414
415    fn next(&mut self) -> Option<Self::Item> {
416        loop {
417            if let Some(item) = self.stack.pop() {
418                return Some(item);
419            }
420            let shifted = self.inner.next()?;
421            // the is_root check needs to be done before shifting the node
422            let is_root = shifted == self.shifted_root;
423            let node = shifted.subtract_block_size(self.tree.block_size.0);
424            if shifted.is_leaf() {
425                let tree = &self.tree;
426                let (s, m, e) = tree.leaf_byte_ranges3(node);
427                let l_start_chunk = node.chunk_range().start;
428                let r_start_chunk = l_start_chunk + tree.chunk_group_chunks();
429                let is_half_leaf = m == e;
430                // for the half leaf we emit just the leaf
431                // for all other leaves we emit the parent and two leaves
432                if !is_half_leaf {
433                    self.stack.push(BaoChunk::Parent {
434                        node,
435                        is_root,
436                        left: true,
437                        right: true,
438                        ranges: (),
439                    });
440                    self.stack.push(BaoChunk::Leaf {
441                        is_root: false,
442                        start_chunk: r_start_chunk,
443                        size: (e - m).try_into().unwrap(),
444                        ranges: (),
445                    });
446                };
447                break Some(BaoChunk::Leaf {
448                    is_root: is_root && is_half_leaf,
449                    start_chunk: l_start_chunk,
450                    size: (m - s).try_into().unwrap(),
451                    ranges: (),
452                });
453            } else {
454                self.stack.push(BaoChunk::Parent {
455                    node,
456                    is_root,
457                    left: true,
458                    right: true,
459                    ranges: (),
460                });
461            }
462        }
463    }
464}
465
466impl BaoChunk {
467    /// Return the size of the chunk in bytes.
468    pub fn size(&self) -> usize {
469        match self {
470            Self::Parent { .. } => 64,
471            Self::Leaf { size, .. } => *size,
472        }
473    }
474}
475
476impl<T: Default> Default for BaoChunk<T> {
477    fn default() -> Self {
478        Self::Leaf {
479            is_root: true,
480            size: 0,
481            start_chunk: ChunkNum(0),
482            ranges: T::default(),
483        }
484    }
485}
486
487/// Iterator over all nodes in a BaoTree in pre-order that overlap with a given chunk range.
488///
489/// This is mostly used internally
490#[derive(Debug)]
491pub struct PreOrderPartialChunkIterRef<'a> {
492    /// the tree we want to traverse
493    tree: BaoTree,
494    /// the minimum level to always emit, even if the node is fully within the query range
495    min_full_level: u8,
496    /// stack of nodes to visit, together with the ranges that are relevant for the node
497    ///
498    /// The node is shifted by the block size, so these are not normal nodes!
499    stack: SmallVec<[(TreeNode, &'a ChunkRangesRef); 8]>,
500    /// number of valid nodes, needed in shifted.right_descendant
501    ///
502    /// This is also shifted by the block size!
503    shifted_filled_size: TreeNode,
504    /// The root node, shifted by the block size, needed for the is_root check
505    shifted_root: TreeNode,
506    /// chunk buffer. This will only ever contain leaves, and will never be more than 2 elements
507    buffer: SmallVec<[BaoChunk<&'a ChunkRangesRef>; 2]>,
508}
509
510impl<'a> PreOrderPartialChunkIterRef<'a> {
511    /// Create a new iterator over the tree.
512    pub fn new(tree: BaoTree, ranges: &'a ChunkRangesRef, min_full_level: u8) -> Self {
513        let mut stack = SmallVec::new();
514        let (shifted_root, shifted_filled_size) = tree.shifted();
515        stack.push((shifted_root, ranges));
516        Self {
517            tree,
518            min_full_level,
519            stack,
520            shifted_filled_size,
521            shifted_root,
522            buffer: SmallVec::new(),
523        }
524    }
525
526    /// Get a reference to the tree.
527    pub fn tree(&self) -> &BaoTree {
528        &self.tree
529    }
530
531    /// Get the minimum level to always emit, even if the node is fully within the query range
532    pub fn min_full_level(&self) -> u8 {
533        self.min_full_level
534    }
535}
536
537impl<'a> Iterator for PreOrderPartialChunkIterRef<'a> {
538    type Item = BaoChunk<&'a ChunkRangesRef>;
539
540    fn next(&mut self) -> Option<Self::Item> {
541        if let Some(item) = self.buffer.pop() {
542            return Some(item);
543        }
544        let tree = &self.tree;
545        let (shifted, ranges) = self.stack.pop()?;
546        debug_assert!(!ranges.is_empty());
547        let node = shifted.subtract_block_size(tree.block_size.0);
548        // we don't want to recurse if the node is full and below the minimum level
549        let ranges_is_all = ranges.is_all();
550        let below_min_full_level = node.level() < self.min_full_level as u32;
551        let query_leaf = ranges_is_all && below_min_full_level;
552        // check if the node is the root by comparing the shifted node to the shifted root
553        let is_root = shifted == self.shifted_root;
554        let chunk_range = node.chunk_range();
555        let byte_range = tree.byte_range(node);
556        let size = (byte_range.end - byte_range.start).try_into().unwrap();
557        // There are three cases.
558        if query_leaf {
559            // The node is a query leaf, meaning that we stop descending because the
560            // node is fully within the query range and it's level is below min_full_level.
561            // This can be fully disabled by setting min_full_level to 0.
562            //
563            // In this case we just emit the range of the node, and don't recurse.
564            Some(BaoChunk::Leaf {
565                start_chunk: chunk_range.start,
566                size,
567                is_root,
568                ranges,
569            })
570        } else if !shifted.is_leaf() {
571            // The node is either not fully within the query range, or it's level is above
572            // min_full_level. In this case we need to recurse.
573            let (l_ranges, r_ranges) = split(ranges, node);
574            // emit right child first, so it gets yielded last
575            if !r_ranges.is_empty() {
576                let r = shifted.right_descendant(self.shifted_filled_size).unwrap();
577                self.stack.push((r, r_ranges));
578            }
579            // emit left child second, so it gets yielded first
580            if !l_ranges.is_empty() {
581                let l = shifted.left_child().unwrap();
582                self.stack.push((l, l_ranges));
583            }
584            // immediately emit the parent
585            Some(BaoChunk::Parent {
586                node,
587                left: !l_ranges.is_empty(),
588                right: !r_ranges.is_empty(),
589                is_root,
590                ranges,
591            })
592        } else {
593            // The node is a real leaf.
594            //
595            // If it is a normal leaf and we got this far, we need to split it into 2 ranges.
596            // E.g. the first leaf of a tree covers the range 0..2048, but we want two 1024
597            // byte BLAKE3 chunks.
598            //
599            // There is a special case for the last leaf, if its right range is not within the
600            // tree. In this case we don't need to split it, and can just emit it as is.
601            let mid_chunk = node.mid();
602            let mid = mid_chunk.to_bytes();
603            if mid >= tree.size {
604                // this is the last leaf node, and only it's left part is in the range
605                // we can just emit it without splitting
606                Some(BaoChunk::Leaf {
607                    start_chunk: chunk_range.start,
608                    size,
609                    is_root,
610                    ranges,
611                })
612            } else {
613                let (l_ranges, r_ranges) = split(ranges, node);
614                // emit right range first, so it gets yielded last
615                if !r_ranges.is_empty() {
616                    self.buffer.push(BaoChunk::Leaf {
617                        start_chunk: mid_chunk,
618                        size: (byte_range.end - mid).try_into().unwrap(),
619                        is_root: false,
620                        ranges: r_ranges,
621                    });
622                }
623                // emit left range second, so it gets yielded first
624                if !l_ranges.is_empty() {
625                    self.buffer.push(BaoChunk::Leaf {
626                        start_chunk: chunk_range.start,
627                        size: (mid - byte_range.start).try_into().unwrap(),
628                        is_root: false,
629                        ranges: l_ranges,
630                    });
631                }
632                // immediately emit the parent
633                Some(BaoChunk::Parent {
634                    node,
635                    left: !l_ranges.is_empty(),
636                    right: !r_ranges.is_empty(),
637                    is_root,
638                    ranges,
639                })
640            }
641        }
642    }
643}
644
645/// An iterator that produces chunks in pre order.
646///
647/// This wraps a `PreOrderPartialIterRef` and iterates over the chunk groups
648/// all the way down to individual chunks if needed.
649#[derive(Debug)]
650pub struct ResponseIterRef<'a> {
651    inner: PreOrderPartialChunkIterRef<'a>,
652}
653
654impl<'a> ResponseIterRef<'a> {
655    /// Create a new iterator over the tree.
656    pub fn new(tree: BaoTree, ranges: &'a ChunkRangesRef) -> Self {
657        let tree1 = BaoTree::new(tree.size, BlockSize::ZERO);
658        Self {
659            inner: PreOrderPartialChunkIterRef::new(tree1, ranges, tree.block_size.0),
660        }
661    }
662
663    /// Return the underlying tree.
664    pub fn tree(&self) -> BaoTree {
665        // the inner iterator uses a tree with block size 0, so we need to return the original tree
666        BaoTree::new(
667            self.inner.tree().size,
668            BlockSize(self.inner.min_full_level()),
669        )
670    }
671}
672
673impl<'a> Iterator for ResponseIterRef<'a> {
674    type Item = BaoChunk;
675
676    fn next(&mut self) -> Option<Self::Item> {
677        Some(self.inner.next()?.without_ranges())
678    }
679}
680
681self_cell! {
682    pub(crate) struct ResponseIterInner {
683        owner: ChunkRanges,
684        #[not_covariant]
685        dependent: ResponseIterRef,
686    }
687}
688
689impl ResponseIterInner {
690    fn next(&mut self) -> Option<BaoChunk> {
691        self.with_dependent_mut(|_, iter| iter.next())
692    }
693
694    fn tree(&self) -> BaoTree {
695        self.with_dependent(|_, iter| iter.tree())
696    }
697}
698
699/// The owned version of `ResponseIterRef`.
700pub struct ResponseIter(ResponseIterInner);
701
702impl fmt::Debug for ResponseIter {
703    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
704        f.debug_struct("ResponseIter").finish_non_exhaustive()
705    }
706}
707
708impl ResponseIter {
709    /// Create a new iterator over the tree.
710    pub fn new(tree: BaoTree, ranges: ChunkRanges) -> Self {
711        Self(ResponseIterInner::new(ranges, |ranges| {
712            ResponseIterRef::new(tree, ranges)
713        }))
714    }
715
716    /// The tree this iterator is iterating over.
717    pub fn tree(&self) -> BaoTree {
718        self.0.tree()
719    }
720}
721
722impl Iterator for ResponseIter {
723    type Item = BaoChunk;
724
725    fn next(&mut self) -> Option<Self::Item> {
726        self.0.next()
727    }
728}